#!/usr/bin/env python3 """Build a single-page HTML site from README.md for the awesome-python website.""" import json import re import shutil from pathlib import Path from typing import TypedDict from jinja2 import Environment, FileSystemLoader from readme_parser import parse_readme, slugify def group_categories( parsed_groups: list[dict], resources: list[dict], ) -> list[dict]: """Combine parsed groups with resources for template rendering.""" groups = list(parsed_groups) if resources: groups.append( { "name": "Resources", "slug": slugify("Resources"), "categories": list(resources), } ) return groups class Entry(TypedDict): name: str url: str description: str category: str group: str stars: int | None owner: str | None last_commit_at: str | None class StarData(TypedDict): stars: int owner: str last_commit_at: str fetched_at: str GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$") def extract_github_repo(url: str) -> str | None: """Extract owner/repo from a GitHub repo URL. Returns None for non-GitHub URLs.""" m = GITHUB_REPO_URL_RE.match(url) return m.group(1) if m else None def load_stars(path: Path) -> dict[str, StarData]: """Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt.""" if path.exists(): try: return json.loads(path.read_text(encoding="utf-8")) except json.JSONDecodeError: return {} return {} def sort_entries(entries: list[dict]) -> list[dict]: """Sort entries by stars descending, then name ascending. No-star entries go last.""" def sort_key(entry: dict) -> tuple[int, int, str]: stars = entry["stars"] name = entry["name"].lower() if stars is None: return (1, 0, name) return (0, -stars, name) return sorted(entries, key=sort_key) def extract_entries( categories: list[dict], groups: list[dict], ) -> list[dict]: """Flatten categories into individual library entries for table display. Entries appearing in multiple categories are merged into a single entry with lists of categories and groups. """ cat_to_group: dict[str, str] = {} for group in groups: for cat in group["categories"]: cat_to_group[cat["name"]] = group["name"] seen: dict[str, dict] = {} # url -> entry entries: list[dict] = [] for cat in categories: group_name = cat_to_group.get(cat["name"], "Other") for entry in cat["entries"]: url = entry["url"] if url in seen: existing = seen[url] if cat["name"] not in existing["categories"]: existing["categories"].append(cat["name"]) if group_name not in existing["groups"]: existing["groups"].append(group_name) else: merged = { "name": entry["name"], "url": url, "description": entry["description"], "categories": [cat["name"]], "groups": [group_name], "stars": None, "owner": None, "last_commit_at": None, "also_see": entry["also_see"], } seen[url] = merged entries.append(merged) return entries def build(repo_root: str) -> None: """Main build: parse README, render single-page HTML via Jinja2 templates.""" repo = Path(repo_root) website = repo / "website" readme_text = (repo / "README.md").read_text(encoding="utf-8") subtitle = "" for line in readme_text.split("\n"): stripped = line.strip() if stripped and not stripped.startswith("#"): subtitle = stripped break parsed_groups, resources = parse_readme(readme_text) categories = [cat for g in parsed_groups for cat in g["categories"]] total_entries = sum(c["entry_count"] for c in categories) groups = group_categories(parsed_groups, resources) entries = extract_entries(categories, groups) stars_data = load_stars(website / "data" / "github_stars.json") for entry in entries: repo_key = extract_github_repo(entry["url"]) if repo_key and repo_key in stars_data: sd = stars_data[repo_key] entry["stars"] = sd["stars"] entry["owner"] = sd["owner"] entry["last_commit_at"] = sd.get("last_commit_at", "") entries = sort_entries(entries) env = Environment( loader=FileSystemLoader(website / "templates"), autoescape=True, ) site_dir = website / "output" if site_dir.exists(): shutil.rmtree(site_dir) site_dir.mkdir(parents=True) tpl_index = env.get_template("index.html") (site_dir / "index.html").write_text( tpl_index.render( categories=categories, resources=resources, groups=groups, subtitle=subtitle, entries=entries, total_entries=total_entries, total_categories=len(categories), ), encoding="utf-8", ) static_src = website / "static" static_dst = site_dir / "static" if static_src.exists(): shutil.copytree(static_src, static_dst, dirs_exist_ok=True) shutil.copy(repo / "README.md", site_dir / "llms.txt") print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories + {len(resources)} resources") print(f"Total entries: {total_entries}") print(f"Output: {site_dir}") if __name__ == "__main__": build(str(Path(__file__).parent.parent))