mirror of
https://github.com/vinta/awesome-python.git
synced 2026-05-28 22:31:31 +08:00
Merge pull request #2972 from vinta/feature/fetch-stats-workflow
ci: consolidate star fetch into deploy workflow with Actions cache
This commit is contained in:
@@ -4,6 +4,8 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
schedule:
|
||||||
|
- cron: "0 0 * * *"
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
@@ -26,10 +28,40 @@ jobs:
|
|||||||
enable-cache: true
|
enable-cache: true
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: uv sync --no-dev
|
run: uv sync --group build
|
||||||
|
|
||||||
|
- name: Restore star data cache
|
||||||
|
id: cache-stars
|
||||||
|
uses: actions/cache/restore@v4
|
||||||
|
with:
|
||||||
|
path: website/data/github_stars.json
|
||||||
|
key: github-stars-${{ github.run_id }}
|
||||||
|
restore-keys: github-stars-
|
||||||
|
|
||||||
|
- name: Fetch GitHub stars
|
||||||
|
id: fetch-stars
|
||||||
|
continue-on-error: true
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: make fetch_github_stars
|
||||||
|
|
||||||
|
- name: Save star data cache
|
||||||
|
if: steps.fetch-stars.outcome == 'success'
|
||||||
|
uses: actions/cache/save@v4
|
||||||
|
with:
|
||||||
|
path: website/data/github_stars.json
|
||||||
|
key: github-stars-${{ github.run_id }}
|
||||||
|
|
||||||
|
- name: Verify star data exists
|
||||||
|
run: |
|
||||||
|
if [ ! -f website/data/github_stars.json ]; then
|
||||||
|
echo "::error::github_stars.json not found. No cache and fetch failed or was skipped."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Star data found: $(wc -l < website/data/github_stars.json) lines"
|
||||||
|
|
||||||
- name: Build site
|
- name: Build site
|
||||||
run: uv run python website/build.py
|
run: make build
|
||||||
|
|
||||||
- name: Upload artifact
|
- name: Upload artifact
|
||||||
uses: actions/upload-pages-artifact@v4
|
uses: actions/upload-pages-artifact@v4
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
# website
|
# website
|
||||||
website/output/
|
website/output/
|
||||||
|
website/data/
|
||||||
|
|
||||||
# claude code
|
# claude code
|
||||||
.claude/skills/
|
.claude/skills/
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ export
|
|||||||
install:
|
install:
|
||||||
uv sync
|
uv sync
|
||||||
|
|
||||||
fetch_stats:
|
fetch_github_stars:
|
||||||
uv run python website/fetch_github_stars.py
|
uv run python website/fetch_github_stars.py
|
||||||
|
|
||||||
test:
|
test:
|
||||||
|
|||||||
+165
-75
@@ -8,62 +8,144 @@ from pathlib import Path
|
|||||||
from typing import TypedDict
|
from typing import TypedDict
|
||||||
|
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
from readme_parser import parse_readme, slugify
|
from readme_parser import parse_readme, slugify
|
||||||
|
|
||||||
# Thematic grouping of categories. Each category name must match exactly
|
# Thematic grouping of categories. Each category name must match exactly
|
||||||
# as it appears in README.md (the ## heading text).
|
# as it appears in README.md (the ## heading text).
|
||||||
SECTION_GROUPS: list[tuple[str, list[str]]] = [
|
SECTION_GROUPS: list[tuple[str, list[str]]] = [
|
||||||
("Web & API", [
|
(
|
||||||
"Web Frameworks", "RESTful API", "GraphQL", "WebSocket",
|
"Web & API",
|
||||||
"ASGI Servers", "WSGI Servers", "HTTP Clients", "Template Engine",
|
[
|
||||||
"Web Asset Management", "Web Content Extracting", "Web Crawling",
|
"Web Frameworks",
|
||||||
]),
|
"RESTful API",
|
||||||
("Data & ML", [
|
"GraphQL",
|
||||||
"Data Analysis", "Data Validation", "Data Visualization",
|
"WebSocket",
|
||||||
"Machine Learning", "Deep Learning", "Computer Vision",
|
"ASGI Servers",
|
||||||
"Natural Language Processing", "Recommender Systems", "Science",
|
"WSGI Servers",
|
||||||
"Quantum Computing",
|
"HTTP Clients",
|
||||||
]),
|
"Template Engine",
|
||||||
("DevOps & Infrastructure", [
|
"Web Asset Management",
|
||||||
"DevOps Tools", "Distributed Computing", "Task Queues",
|
"Web Content Extracting",
|
||||||
"Job Scheduler", "Serverless Frameworks", "Logging", "Processes",
|
"Web Crawling",
|
||||||
"Shell", "Network Virtualization", "RPC Servers",
|
],
|
||||||
]),
|
),
|
||||||
("Database & Storage", [
|
(
|
||||||
"Database", "Database Drivers", "ORM", "Caching", "Search",
|
"Data & ML",
|
||||||
"Serialization",
|
[
|
||||||
]),
|
"Data Analysis",
|
||||||
("Development Tools", [
|
"Data Validation",
|
||||||
"Testing", "Debugging Tools", "Code Analysis", "Build Tools",
|
"Data Visualization",
|
||||||
"Refactoring", "Documentation", "Editor Plugins and IDEs",
|
"Machine Learning",
|
||||||
"Interactive Interpreter",
|
"Deep Learning",
|
||||||
]),
|
"Computer Vision",
|
||||||
("CLI & GUI", [
|
"Natural Language Processing",
|
||||||
"Command-line Interface Development", "Command-line Tools",
|
"Recommender Systems",
|
||||||
"GUI Development",
|
"Science",
|
||||||
]),
|
"Quantum Computing",
|
||||||
("Content & Media", [
|
],
|
||||||
"Audio", "Video", "Image Processing", "HTML Manipulation",
|
),
|
||||||
"Text Processing", "Specific Formats Processing",
|
(
|
||||||
"File Manipulation", "Downloader",
|
"DevOps & Infrastructure",
|
||||||
]),
|
[
|
||||||
("System & Runtime", [
|
"DevOps Tools",
|
||||||
"Asynchronous Programming", "Environment Management",
|
"Distributed Computing",
|
||||||
"Package Management", "Package Repositories", "Distribution",
|
"Task Queues",
|
||||||
"Implementations", "Built-in Classes Enhancement",
|
"Job Scheduler",
|
||||||
"Functional Programming", "Configuration Files",
|
"Serverless Frameworks",
|
||||||
]),
|
"Logging",
|
||||||
("Security & Auth", [
|
"Processes",
|
||||||
"Authentication", "Cryptography", "Penetration Testing",
|
"Shell",
|
||||||
"Permissions",
|
"Network Virtualization",
|
||||||
]),
|
"RPC Servers",
|
||||||
("Specialized", [
|
],
|
||||||
"CMS", "Admin Panels", "Email", "Game Development", "Geolocation",
|
),
|
||||||
"Hardware", "Internationalization", "Date and Time",
|
(
|
||||||
"URL Manipulation", "Robotics", "Microsoft Windows", "Miscellaneous",
|
"Database & Storage",
|
||||||
"Algorithms and Design Patterns", "Static Site Generator",
|
[
|
||||||
]),
|
"Database",
|
||||||
|
"Database Drivers",
|
||||||
|
"ORM",
|
||||||
|
"Caching",
|
||||||
|
"Search",
|
||||||
|
"Serialization",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Development Tools",
|
||||||
|
[
|
||||||
|
"Testing",
|
||||||
|
"Debugging Tools",
|
||||||
|
"Code Analysis",
|
||||||
|
"Build Tools",
|
||||||
|
"Refactoring",
|
||||||
|
"Documentation",
|
||||||
|
"Editor Plugins and IDEs",
|
||||||
|
"Interactive Interpreter",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"CLI & GUI",
|
||||||
|
[
|
||||||
|
"Command-line Interface Development",
|
||||||
|
"Command-line Tools",
|
||||||
|
"GUI Development",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Content & Media",
|
||||||
|
[
|
||||||
|
"Audio",
|
||||||
|
"Video",
|
||||||
|
"Image Processing",
|
||||||
|
"HTML Manipulation",
|
||||||
|
"Text Processing",
|
||||||
|
"Specific Formats Processing",
|
||||||
|
"File Manipulation",
|
||||||
|
"Downloader",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"System & Runtime",
|
||||||
|
[
|
||||||
|
"Asynchronous Programming",
|
||||||
|
"Environment Management",
|
||||||
|
"Package Management",
|
||||||
|
"Package Repositories",
|
||||||
|
"Distribution",
|
||||||
|
"Implementations",
|
||||||
|
"Built-in Classes Enhancement",
|
||||||
|
"Functional Programming",
|
||||||
|
"Configuration Files",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Security & Auth",
|
||||||
|
[
|
||||||
|
"Authentication",
|
||||||
|
"Cryptography",
|
||||||
|
"Penetration Testing",
|
||||||
|
"Permissions",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Specialized",
|
||||||
|
[
|
||||||
|
"CMS",
|
||||||
|
"Admin Panels",
|
||||||
|
"Email",
|
||||||
|
"Game Development",
|
||||||
|
"Geolocation",
|
||||||
|
"Hardware",
|
||||||
|
"Internationalization",
|
||||||
|
"Date and Time",
|
||||||
|
"URL Manipulation",
|
||||||
|
"Robotics",
|
||||||
|
"Microsoft Windows",
|
||||||
|
"Miscellaneous",
|
||||||
|
"Algorithms and Design Patterns",
|
||||||
|
"Static Site Generator",
|
||||||
|
],
|
||||||
|
),
|
||||||
("Resources", []), # Filled dynamically from parsed resources
|
("Resources", []), # Filled dynamically from parsed resources
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -85,20 +167,24 @@ def group_categories(
|
|||||||
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
|
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
|
||||||
|
|
||||||
if group_cats:
|
if group_cats:
|
||||||
groups.append({
|
groups.append(
|
||||||
"name": group_name,
|
{
|
||||||
"slug": slugify(group_name),
|
"name": group_name,
|
||||||
"categories": group_cats,
|
"slug": slugify(group_name),
|
||||||
})
|
"categories": group_cats,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Any categories not in a group go into "Other"
|
# Any categories not in a group go into "Other"
|
||||||
ungrouped = [c for c in categories if c["name"] not in grouped_names]
|
ungrouped = [c for c in categories if c["name"] not in grouped_names]
|
||||||
if ungrouped:
|
if ungrouped:
|
||||||
groups.append({
|
groups.append(
|
||||||
"name": "Other",
|
{
|
||||||
"slug": "other",
|
"name": "Other",
|
||||||
"categories": ungrouped,
|
"slug": "other",
|
||||||
})
|
"categories": ungrouped,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
@@ -121,9 +207,7 @@ class StarData(TypedDict):
|
|||||||
fetched_at: str
|
fetched_at: str
|
||||||
|
|
||||||
|
|
||||||
GITHUB_REPO_URL_RE = re.compile(
|
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
|
||||||
r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_github_repo(url: str) -> str | None:
|
def extract_github_repo(url: str) -> str | None:
|
||||||
@@ -144,12 +228,14 @@ def load_stars(path: Path) -> dict[str, StarData]:
|
|||||||
|
|
||||||
def sort_entries(entries: list[dict]) -> list[dict]:
|
def sort_entries(entries: list[dict]) -> list[dict]:
|
||||||
"""Sort entries by stars descending, then name ascending. No-star entries go last."""
|
"""Sort entries by stars descending, then name ascending. No-star entries go last."""
|
||||||
|
|
||||||
def sort_key(entry: dict) -> tuple[int, int, str]:
|
def sort_key(entry: dict) -> tuple[int, int, str]:
|
||||||
stars = entry["stars"]
|
stars = entry["stars"]
|
||||||
name = entry["name"].lower()
|
name = entry["name"].lower()
|
||||||
if stars is None:
|
if stars is None:
|
||||||
return (1, 0, name)
|
return (1, 0, name)
|
||||||
return (0, -stars, name)
|
return (0, -stars, name)
|
||||||
|
|
||||||
return sorted(entries, key=sort_key)
|
return sorted(entries, key=sort_key)
|
||||||
|
|
||||||
|
|
||||||
@@ -167,17 +253,19 @@ def extract_entries(
|
|||||||
for cat in categories:
|
for cat in categories:
|
||||||
group_name = cat_to_group.get(cat["name"], "Other")
|
group_name = cat_to_group.get(cat["name"], "Other")
|
||||||
for entry in cat["entries"]:
|
for entry in cat["entries"]:
|
||||||
entries.append({
|
entries.append(
|
||||||
"name": entry["name"],
|
{
|
||||||
"url": entry["url"],
|
"name": entry["name"],
|
||||||
"description": entry["description"],
|
"url": entry["url"],
|
||||||
"category": cat["name"],
|
"description": entry["description"],
|
||||||
"group": group_name,
|
"category": cat["name"],
|
||||||
"stars": None,
|
"group": group_name,
|
||||||
"owner": None,
|
"stars": None,
|
||||||
"last_commit_at": None,
|
"owner": None,
|
||||||
"also_see": entry["also_see"],
|
"last_commit_at": None,
|
||||||
})
|
"also_see": entry["also_see"],
|
||||||
|
}
|
||||||
|
)
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|
||||||
@@ -241,6 +329,8 @@ def build(repo_root: str) -> None:
|
|||||||
if static_src.exists():
|
if static_src.exists():
|
||||||
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
|
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
|
||||||
|
|
||||||
|
shutil.copy(repo / "README.md", site_dir / "llms.txt")
|
||||||
|
|
||||||
print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
|
print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
|
||||||
print(f"Total entries: {total_entries}")
|
print(f"Total entries: {total_entries}")
|
||||||
print(f"Output: {site_dir}")
|
print(f"Output: {site_dir}")
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -12,7 +12,7 @@ import httpx
|
|||||||
|
|
||||||
from build import extract_github_repo, load_stars
|
from build import extract_github_repo, load_stars
|
||||||
|
|
||||||
CACHE_MAX_AGE_DAYS = 7
|
CACHE_MAX_AGE_HOURS = 12
|
||||||
DATA_DIR = Path(__file__).parent / "data"
|
DATA_DIR = Path(__file__).parent / "data"
|
||||||
CACHE_FILE = DATA_DIR / "github_stars.json"
|
CACHE_FILE = DATA_DIR / "github_stars.json"
|
||||||
README_PATH = Path(__file__).parent.parent / "README.md"
|
README_PATH = Path(__file__).parent.parent / "README.md"
|
||||||
@@ -120,8 +120,8 @@ def main() -> None:
|
|||||||
entry = cache.get(repo)
|
entry = cache.get(repo)
|
||||||
if entry and "fetched_at" in entry:
|
if entry and "fetched_at" in entry:
|
||||||
fetched = datetime.fromisoformat(entry["fetched_at"])
|
fetched = datetime.fromisoformat(entry["fetched_at"])
|
||||||
age_days = (now - fetched).days
|
age_hours = (now - fetched).total_seconds() / 3600
|
||||||
if age_days < CACHE_MAX_AGE_DAYS:
|
if age_hours < CACHE_MAX_AGE_HOURS:
|
||||||
continue
|
continue
|
||||||
to_fetch.append(repo)
|
to_fetch.append(repo)
|
||||||
|
|
||||||
|
|||||||
@@ -137,3 +137,95 @@ class TestParseGraphqlResponse:
|
|||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result["a/x"]["stars"] == 100
|
assert result["a/x"]["stars"] == 100
|
||||||
assert result["b/y"]["stars"] == 200
|
assert result["b/y"]["stars"] == 200
|
||||||
|
|
||||||
|
|
||||||
|
class TestMainSkipsFreshCache:
|
||||||
|
"""Verify that main() skips fetching when all cache entries are fresh."""
|
||||||
|
|
||||||
|
def test_skips_fetch_when_cache_is_fresh(self, tmp_path, monkeypatch, capsys):
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
from fetch_github_stars import main
|
||||||
|
|
||||||
|
# Set up a minimal README with one repo
|
||||||
|
readme = tmp_path / "README.md"
|
||||||
|
readme.write_text("* [req](https://github.com/psf/requests) - HTTP.\n")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.README_PATH", readme)
|
||||||
|
|
||||||
|
# Pre-populate cache with a fresh entry (1 hour ago)
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
cache_file = data_dir / "github_stars.json"
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
fresh_cache = {
|
||||||
|
"psf/requests": {
|
||||||
|
"stars": 52000,
|
||||||
|
"owner": "psf",
|
||||||
|
"last_commit_at": "2025-01-01T00:00:00+00:00",
|
||||||
|
"fetched_at": (now - timedelta(hours=1)).isoformat(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cache_file.write_text(json.dumps(fresh_cache), encoding="utf-8")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
|
||||||
|
monkeypatch.setattr("fetch_github_stars.DATA_DIR", data_dir)
|
||||||
|
monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "0 repos to fetch" in output
|
||||||
|
assert "Cache is up to date" in output
|
||||||
|
|
||||||
|
def test_fetches_when_cache_is_stale(self, tmp_path, monkeypatch, capsys):
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from fetch_github_stars import main
|
||||||
|
|
||||||
|
# Set up a minimal README with one repo
|
||||||
|
readme = tmp_path / "README.md"
|
||||||
|
readme.write_text("* [req](https://github.com/psf/requests) - HTTP.\n")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.README_PATH", readme)
|
||||||
|
|
||||||
|
# Pre-populate cache with a stale entry (24 hours ago)
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
cache_file = data_dir / "github_stars.json"
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
stale_cache = {
|
||||||
|
"psf/requests": {
|
||||||
|
"stars": 52000,
|
||||||
|
"owner": "psf",
|
||||||
|
"last_commit_at": "2025-01-01T00:00:00+00:00",
|
||||||
|
"fetched_at": (now - timedelta(hours=24)).isoformat(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cache_file.write_text(json.dumps(stale_cache), encoding="utf-8")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
|
||||||
|
monkeypatch.setattr("fetch_github_stars.DATA_DIR", data_dir)
|
||||||
|
monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
|
||||||
|
|
||||||
|
# Mock httpx.Client to avoid real API calls
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"data": {
|
||||||
|
"repo_0": {
|
||||||
|
"stargazerCount": 53000,
|
||||||
|
"owner": {"login": "psf"},
|
||||||
|
"defaultBranchRef": {"target": {"committedDate": "2025-06-01T00:00:00Z"}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mock_response.raise_for_status = MagicMock()
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||||
|
mock_client.__exit__ = MagicMock(return_value=False)
|
||||||
|
mock_client.post.return_value = mock_response
|
||||||
|
monkeypatch.setattr("fetch_github_stars.httpx.Client", lambda **kwargs: mock_client)
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "1 repos to fetch" in output
|
||||||
|
assert "Done. Fetched 1 repos" in output
|
||||||
|
mock_client.post.assert_called_once()
|
||||||
|
|||||||
Reference in New Issue
Block a user