fix(website): type build template entries

This commit is contained in:
Vinta Chen
2026-05-03 12:08:15 +08:00
parent 38b54caabb
commit a068219684
2 changed files with 110 additions and 103 deletions
+70 -48
View File
@@ -9,10 +9,10 @@ from collections import Counter
from collections.abc import Sequence
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from typing import TypedDict
from jinja2 import Environment, FileSystemLoader
from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors, slugify
from readme_parser import AlsoSee, ParsedGroup, ParsedSection, parse_readme, parse_sponsors, slugify
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
MARKDOWN_LINK_RE = re.compile(r"\[[^\]]+\]\(([^)\s]+)\)")
@@ -36,6 +36,37 @@ SOURCE_TYPE_DOMAINS = {
}
class TemplateSubcategory(TypedDict):
name: str
value: str
slug: str
url: str
class TemplateEntry(TypedDict):
name: str
url: str
description: str
categories: list[str]
groups: list[str]
subcategories: list[TemplateSubcategory]
stars: int | None
owner: str | None
last_commit_at: str | None
source_type: str | None
also_see: list[AlsoSee]
class SyntheticCategory(TypedDict):
name: str
slug: str
description: str
description_html: str
TemplateCategory = ParsedSection | SyntheticCategory
def detect_source_type(url: str) -> str | None:
"""Detect source type from URL domain. Returns None for GitHub URLs."""
if GITHUB_REPO_URL_RE.match(url):
@@ -64,13 +95,13 @@ def load_stars(path: Path) -> dict[str, dict]:
return {}
def sort_entries(entries: list[dict]) -> list[dict]:
def sort_entries(entries: Sequence[TemplateEntry]) -> list[TemplateEntry]:
"""Sort entries by stars descending, then name ascending.
Three tiers: starred entries first, stdlib second, other non-starred last.
"""
def sort_key(entry: dict) -> tuple[int, int, int, str]:
def sort_key(entry: TemplateEntry) -> tuple[int, int, int, str]:
stars = entry["stars"]
name = entry["name"].lower()
if stars is not None:
@@ -84,13 +115,7 @@ def sort_entries(entries: list[dict]) -> list[dict]:
def build_robots_txt() -> str:
return (
"User-agent: *\n"
"Content-Signal: search=yes, ai-input=yes, ai-train=yes\n"
"Allow: /\n"
"\n"
f"Sitemap: {SITEMAP_URL}\n"
)
return f"User-agent: *\nContent-Signal: search=yes, ai-input=yes, ai-train=yes\nAllow: /\n\nSitemap: {SITEMAP_URL}\n"
def category_path(category: ParsedSection) -> str:
@@ -117,7 +142,7 @@ def subcategory_public_url(category_slug: str, subcategory_slug: str) -> str:
return f"{SITE_URL}categories/{category_slug}/{subcategory_slug}/"
def synthetic_category(name: str, slug: str) -> dict[str, str]:
def synthetic_category(name: str, slug: str) -> SyntheticCategory:
return {"name": name, "slug": slug, "description": "", "description_html": ""}
@@ -202,7 +227,7 @@ def annotate_entries_with_stars(
if not entry or "stars" not in entry:
continue
stripped = line.rstrip("\n")
ending = line[len(stripped):]
ending = line[len(stripped) :]
annotated = f"{stripped} ({format_stars(entry['stars'])}){ending}"
break
out.append(annotated)
@@ -233,7 +258,7 @@ def remove_sponsors_section(markdown: str) -> str:
def extract_entries(
categories: list[ParsedSection],
groups: list[ParsedGroup],
) -> list[dict]:
) -> list[TemplateEntry]:
"""Flatten categories into individual library entries for table display.
Entries appearing in multiple categories are merged into a single entry
@@ -241,27 +266,27 @@ def extract_entries(
"""
cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]}
seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry
entries: list[dict[str, Any]] = []
seen: dict[tuple[str, str], TemplateEntry] = {} # (url, name) -> entry
entries: list[TemplateEntry] = []
for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other")
for entry in cat["entries"]:
key = (entry["url"], entry["name"])
existing: dict[str, Any] | None = seen.get(key)
existing = seen.get(key)
if existing is None:
existing = {
"name": entry["name"],
"url": entry["url"],
"description": entry["description"],
"categories": [],
"groups": [],
"subcategories": [],
"stars": None,
"owner": None,
"last_commit_at": None,
"source_type": detect_source_type(entry["url"]),
"also_see": entry["also_see"],
}
existing = TemplateEntry(
name=entry["name"],
url=entry["url"],
description=entry["description"],
categories=[],
groups=[],
subcategories=[],
stars=None,
owner=None,
last_commit_at=None,
source_type=detect_source_type(entry["url"]),
also_see=entry["also_see"],
)
seen[key] = existing
entries.append(existing)
if cat["name"] not in existing["categories"]:
@@ -273,12 +298,14 @@ def extract_entries(
scoped = f"{cat['name']} > {subcat}"
if not any(s["value"] == scoped for s in existing["subcategories"]):
sub_slug = slugify(subcat)
existing["subcategories"].append({
"name": subcat,
"value": scoped,
"slug": sub_slug,
"url": f"/categories/{cat['slug']}/{sub_slug}/",
})
existing["subcategories"].append(
TemplateSubcategory(
name=subcat,
value=scoped,
slug=sub_slug,
url=f"/categories/{cat['slug']}/{sub_slug}/",
)
)
return entries
@@ -303,10 +330,7 @@ def build(repo_root: Path) -> None:
all_top_level_slugs = cat_slugs + group_slugs + [BUILTIN_SLUG]
duplicates = {s for s, n in Counter(all_top_level_slugs).items() if n > 1}
if duplicates:
raise ValueError(
f"slug collision in /categories/ namespace: {sorted(duplicates)}. "
"Rename a category or group so their slugs differ."
)
raise ValueError(f"slug collision in /categories/ namespace: {sorted(duplicates)}. Rename a category or group so their slugs differ.")
total_entries = sum(c["entry_count"] for c in categories)
entries = extract_entries(categories, parsed_groups)
build_date = datetime.now(UTC)
@@ -377,14 +401,14 @@ def build(repo_root: Path) -> None:
categories_dir = site_dir / "categories"
def render_category(
category: dict,
category: TemplateCategory,
*,
category_url: str,
entries: list[dict],
entries: Sequence[TemplateEntry],
current_path: str,
page_dir: Path,
parent_category: dict | None = None,
group_categories: list | None = None,
parent_category: ParsedSection | None = None,
group_categories: Sequence[ParsedSection] | None = None,
) -> None:
page_dir.mkdir(parents=True, exist_ok=True)
(page_dir / "index.html").write_text(
@@ -443,7 +467,7 @@ def build(repo_root: Path) -> None:
encoding="utf-8",
)
subcat_to_entries: dict[str, list[dict]] = {}
subcat_to_entries: dict[str, list[TemplateEntry]] = {}
subcat_meta: dict[str, tuple[str, str, str]] = {} # value -> (cat_slug, sub_slug, sub_name)
cat_slug_by_url_prefix = {f"/categories/{c['slug']}/": c["slug"] for c in categories}
cat_by_slug = {c["slug"]: c for c in categories}
@@ -472,9 +496,7 @@ def build(repo_root: Path) -> None:
if static_src.exists():
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
markdown_index = annotate_entries_with_stars(
remove_sponsors_section(readme_text), stars_data
)
markdown_index = annotate_entries_with_stars(remove_sponsors_section(readme_text), stars_data)
llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8")
llms_txt = build_llms_txt(llms_template, readme_text, stars_data)
(site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8")