Files
awesome-python/website/build.py
Vinta Chen d36f1ed8d1 refactor(website): remove unused Entry TypedDict, write llms.txt from parsed text
Entry was dead code with no callers. Switching from shutil.copy to
write_text uses the already-loaded readme_text variable directly instead
of re-reading the file from disk.

Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-22 01:32:17 +08:00

184 lines
5.6 KiB
Python

#!/usr/bin/env python3
"""Build a single-page HTML site from README.md for the awesome-python website."""
import json
import re
import shutil
from pathlib import Path
from typing import TypedDict
from jinja2 import Environment, FileSystemLoader
from readme_parser import parse_readme, slugify
def group_categories(
parsed_groups: list[dict],
resources: list[dict],
) -> list[dict]:
"""Combine parsed groups with resources for template rendering."""
groups = list(parsed_groups)
if resources:
groups.append(
{
"name": "Resources",
"slug": slugify("Resources"),
"categories": list(resources),
}
)
return groups
class StarData(TypedDict):
stars: int
owner: str
last_commit_at: str
fetched_at: str
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
def extract_github_repo(url: str) -> str | None:
"""Extract owner/repo from a GitHub repo URL. Returns None for non-GitHub URLs."""
m = GITHUB_REPO_URL_RE.match(url)
return m.group(1) if m else None
def load_stars(path: Path) -> dict[str, StarData]:
"""Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt."""
if path.exists():
try:
return json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return {}
return {}
def sort_entries(entries: list[dict]) -> list[dict]:
"""Sort entries by stars descending, then name ascending. No-star entries go last."""
def sort_key(entry: dict) -> tuple[int, int, str]:
stars = entry["stars"]
name = entry["name"].lower()
if stars is None:
return (1, 0, name)
return (0, -stars, name)
return sorted(entries, key=sort_key)
def extract_entries(
categories: list[dict],
groups: list[dict],
) -> list[dict]:
"""Flatten categories into individual library entries for table display.
Entries appearing in multiple categories are merged into a single entry
with lists of categories and groups.
"""
cat_to_group: dict[str, str] = {}
for group in groups:
for cat in group["categories"]:
cat_to_group[cat["name"]] = group["name"]
seen: dict[str, dict] = {} # url -> entry
entries: list[dict] = []
for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other")
for entry in cat["entries"]:
url = entry["url"]
if url in seen:
existing = seen[url]
if cat["name"] not in existing["categories"]:
existing["categories"].append(cat["name"])
if group_name not in existing["groups"]:
existing["groups"].append(group_name)
else:
merged = {
"name": entry["name"],
"url": url,
"description": entry["description"],
"categories": [cat["name"]],
"groups": [group_name],
"stars": None,
"owner": None,
"last_commit_at": None,
"also_see": entry["also_see"],
}
seen[url] = merged
entries.append(merged)
return entries
def build(repo_root: str) -> None:
"""Main build: parse README, render single-page HTML via Jinja2 templates."""
repo = Path(repo_root)
website = repo / "website"
readme_text = (repo / "README.md").read_text(encoding="utf-8")
subtitle = ""
for line in readme_text.split("\n"):
stripped = line.strip()
if stripped and not stripped.startswith("#"):
subtitle = stripped
break
parsed_groups, resources = parse_readme(readme_text)
categories = [cat for g in parsed_groups for cat in g["categories"]]
total_entries = sum(c["entry_count"] for c in categories)
groups = group_categories(parsed_groups, resources)
entries = extract_entries(categories, groups)
stars_data = load_stars(website / "data" / "github_stars.json")
for entry in entries:
repo_key = extract_github_repo(entry["url"])
if repo_key and repo_key in stars_data:
sd = stars_data[repo_key]
entry["stars"] = sd["stars"]
entry["owner"] = sd["owner"]
entry["last_commit_at"] = sd.get("last_commit_at", "")
entries = sort_entries(entries)
env = Environment(
loader=FileSystemLoader(website / "templates"),
autoescape=True,
)
site_dir = website / "output"
if site_dir.exists():
shutil.rmtree(site_dir)
site_dir.mkdir(parents=True)
tpl_index = env.get_template("index.html")
(site_dir / "index.html").write_text(
tpl_index.render(
categories=categories,
resources=resources,
groups=groups,
subtitle=subtitle,
entries=entries,
total_entries=total_entries,
total_categories=len(categories),
),
encoding="utf-8",
)
static_src = website / "static"
static_dst = site_dir / "static"
if static_src.exists():
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
(site_dir / "llms.txt").write_text(readme_text, encoding="utf-8")
print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories + {len(resources)} resources")
print(f"Total entries: {total_entries}")
print(f"Output: {site_dir}")
if __name__ == "__main__":
build(str(Path(__file__).parent.parent))