From b897676e01e4f24a9995dfda48d923eaea726ab3 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:53:39 +0800 Subject: [PATCH 01/23] refactor(fetch_github_stars): remove redundant early-return guard in build_graphql_query The empty-parts check after the loop makes the upfront `if not repos: return ""` guard redundant. Co-Authored-By: Claude --- website/fetch_github_stars.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index 741ac07d..2ac0ef65 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -46,8 +46,6 @@ def save_cache(cache: dict) -> None: def build_graphql_query(repos: list[str]) -> str: """Build a GraphQL query with aliases for up to 100 repos.""" - if not repos: - return "" parts = [] for i, repo in enumerate(repos): owner, name = repo.split("/", 1) From e47d22952862630b1636bb8a6fa322d93ee3d973 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:54:16 +0800 Subject: [PATCH 02/23] refactor(readme_parser): consolidate state reset to tail of flush_group State reset (current_group_name = None, current_group_cats = []) was duplicated in both branches of the early-return guard. Move it after the conditional so it runs exactly once regardless of path. Co-Authored-By: Claude --- website/readme_parser.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/website/readme_parser.py b/website/readme_parser.py index c736b7cc..b527c24e 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -324,16 +324,13 @@ def _parse_grouped_sections( def flush_group() -> None: nonlocal current_group_name, current_group_cats - if not current_group_cats: - current_group_name = None - current_group_cats = [] - return - name = current_group_name or "Other" - groups.append(ParsedGroup( - name=name, - slug=slugify(name), - categories=list(current_group_cats), - )) + if current_group_cats: + name = current_group_name or "Other" + groups.append(ParsedGroup( + name=name, + slug=slugify(name), + categories=list(current_group_cats), + )) current_group_name = None current_group_cats = [] From a358d45ca48073932ce085a34ab691c8b36124d2 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:54:55 +0800 Subject: [PATCH 03/23] refactor: use datetime.UTC alias instead of timezone.utc Python 3.11 introduced datetime.UTC as a cleaner alias for datetime.timezone.utc. Both build.py and fetch_github_stars.py are updated to use the shorter form. Co-Authored-By: Claude --- website/build.py | 4 ++-- website/fetch_github_stars.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/website/build.py b/website/build.py index f1d00c93..9e141a39 100644 --- a/website/build.py +++ b/website/build.py @@ -4,7 +4,7 @@ import json import re import shutil -from datetime import datetime, timezone +from datetime import UTC, datetime from pathlib import Path from typing import TypedDict @@ -189,7 +189,7 @@ def build(repo_root: str) -> None: total_entries=total_entries, total_categories=len(categories), repo_stars=repo_stars, - build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"), + build_date=datetime.now(UTC).strftime("%B %d, %Y"), sponsors=sponsors, ), encoding="utf-8", diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index 2ac0ef65..bf4666a1 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -5,7 +5,7 @@ import json import os import re import sys -from datetime import datetime, timezone +from datetime import UTC, datetime from pathlib import Path import httpx @@ -110,7 +110,7 @@ def main() -> None: print(f"Found {len(current_repos)} GitHub repos in README.md") cache = load_stars(CACHE_FILE) - now = datetime.now(timezone.utc) + now = datetime.now(UTC) # Prune entries not in current README pruned = {k: v for k, v in cache.items() if k in current_repos} From b9236c4925840d78bec05ea07783422fc7b34fac Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:55:32 +0800 Subject: [PATCH 04/23] refactor(fetch_github_stars): drop unnecessary keyword-only marker on fetch_batch client is the only non-first param and is always required, so the * separator adds no clarity. Update the call site accordingly. Co-Authored-By: Claude --- website/fetch_github_stars.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index bf4666a1..f918ae42 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -80,9 +80,7 @@ def parse_graphql_response( return result -def fetch_batch( - repos: list[str], *, client: httpx.Client, -) -> dict[str, dict]: +def fetch_batch(repos: list[str], client: httpx.Client) -> dict[str, dict]: """Fetch star data for a batch of repos via GitHub GraphQL API.""" query = build_graphql_query(repos) if not query: @@ -154,7 +152,7 @@ def main() -> None: print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...") try: - results = fetch_batch(batch, client=client) + results = fetch_batch(batch, client) except httpx.HTTPStatusError as e: print(f"HTTP error {e.response.status_code}", file=sys.stderr) if e.response.status_code == 401: From c85f81bb24530061373ae35adcbd46400f4912a4 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:56:06 +0800 Subject: [PATCH 05/23] refactor(build): accept Path directly in build() signature Remove internal str->Path conversion; callers and tests now pass Path objects directly. Co-Authored-By: Claude --- website/build.py | 9 ++++----- website/tests/test_build.py | 10 +++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/website/build.py b/website/build.py index 9e141a39..5f532f3c 100644 --- a/website/build.py +++ b/website/build.py @@ -133,11 +133,10 @@ def format_stars_short(stars: int) -> str: return str(stars) -def build(repo_root: str) -> None: +def build(repo_root: Path) -> None: """Main build: parse README, render single-page HTML via Jinja2 templates.""" - repo = Path(repo_root) - website = repo / "website" - readme_text = (repo / "README.md").read_text(encoding="utf-8") + website = repo_root / "website" + readme_text = (repo_root / "README.md").read_text(encoding="utf-8") subtitle = "" for line in readme_text.split("\n"): @@ -208,4 +207,4 @@ def build(repo_root: str) -> None: if __name__ == "__main__": - build(str(Path(__file__).parent.parent)) + build(Path(__file__).parent.parent) diff --git a/website/tests/test_build.py b/website/tests/test_build.py index 3b406607..878e84b6 100644 --- a/website/tests/test_build.py +++ b/website/tests/test_build.py @@ -108,7 +108,7 @@ class TestBuild: Help! """) self._make_repo(tmp_path, readme) - build(str(tmp_path)) + build(tmp_path) site = tmp_path / "website" / "output" assert (site / "index.html").exists() @@ -135,7 +135,7 @@ class TestBuild: stale.mkdir(parents=True) (stale / "index.html").write_text("old", encoding="utf-8") - build(str(tmp_path)) + build(tmp_path) assert not (tmp_path / "website" / "output" / "categories" / "stale").exists() @@ -162,7 +162,7 @@ class TestBuild: Done. """) self._make_repo(tmp_path, readme) - build(str(tmp_path)) + build(tmp_path) index_html = (tmp_path / "website" / "output" / "index.html").read_text() assert "Alpha" in index_html @@ -186,7 +186,7 @@ class TestBuild: Done. """) self._make_repo(tmp_path, readme) - build(str(tmp_path)) + build(tmp_path) index_html = (tmp_path / "website" / "output" / "index.html").read_text() assert "django" in index_html @@ -224,7 +224,7 @@ class TestBuild: } (data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8") - build(str(tmp_path)) + build(tmp_path) html = (tmp_path / "website" / "output" / "index.html").read_text(encoding="utf-8") # Star-sorted: high-stars (5000) before low-stars (100) before no-stars (None) From 7f4a163534b0fae0eb95947607c4aaa803372a5d Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:56:46 +0800 Subject: [PATCH 06/23] refactor(build): tighten extract_entries parameter types to ParsedSection/ParsedGroup Replace loose list[dict] annotations with concrete TypedDicts imported from readme_parser so ty can verify call-site compatibility. Co-Authored-By: Claude --- website/build.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/build.py b/website/build.py index 5f532f3c..f9deb480 100644 --- a/website/build.py +++ b/website/build.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import TypedDict from jinja2 import Environment, FileSystemLoader -from readme_parser import parse_readme, parse_sponsors +from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors class StarData(TypedDict): @@ -76,8 +76,8 @@ def sort_entries(entries: list[dict]) -> list[dict]: def extract_entries( - categories: list[dict], - groups: list[dict], + categories: list[ParsedSection], + groups: list[ParsedGroup], ) -> list[dict]: """Flatten categories into individual library entries for table display. From 7e7de19ef6c3bcbebc4d2227755767ceff2430e2 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:57:30 +0800 Subject: [PATCH 07/23] refactor(build): remove StarData TypedDict, loosen load_stars return to dict[str, dict] Cache-write shape mismatches the TypedDict and callers mix .get() and direct access, so the stricter type was providing false safety. Using dict[str, dict] accurately reflects the actual runtime contract. Co-Authored-By: Claude --- website/build.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/website/build.py b/website/build.py index f9deb480..6f864a11 100644 --- a/website/build.py +++ b/website/build.py @@ -6,19 +6,10 @@ import re import shutil from datetime import UTC, datetime from pathlib import Path -from typing import TypedDict from jinja2 import Environment, FileSystemLoader from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors - -class StarData(TypedDict): - stars: int - owner: str - last_commit_at: str - fetched_at: str - - GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$") SOURCE_TYPE_DOMAINS = { @@ -46,7 +37,7 @@ def extract_github_repo(url: str) -> str | None: return m.group(1) if m else None -def load_stars(path: Path) -> dict[str, StarData]: +def load_stars(path: Path) -> dict[str, dict]: """Load star data from JSON. Returns empty dict if file doesn't exist or is corrupt.""" if path.exists(): try: From e0b0dc9c0af3177cb6397a1930a3cdeeb5d89f53 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:58:27 +0800 Subject: [PATCH 08/23] refactor(readme_parser): add _href helper to narrow attrGet return type Extracts a _href(link) helper that returns link.attrGet('href') narrowed to str (falling back to '') instead of the raw str|int|float|None union. Replaces all four attrGet('href') or '' call sites with _href(), fixing ty errors where the widened union leaked into TypedDict url fields. Co-Authored-By: Claude --- website/readme_parser.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/website/readme_parser.py b/website/readme_parser.py index b527c24e..ca650985 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -72,7 +72,7 @@ def render_inline_html(children: list[SyntaxTreeNode]) -> str: case "softbreak": parts.append(" ") case "link": - href = str(escape(child.attrGet("href") or "")) + href = str(escape(_href(child))) inner = render_inline_html(child.children) parts.append( f'{inner}' @@ -147,6 +147,12 @@ def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None: return None +def _href(link: SyntaxTreeNode) -> str: + """Return the link's href attribute as a string, or '' if missing.""" + href = link.attrGet("href") + return href if isinstance(href, str) else "" + + def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None: """Find the inline node in a list_item's paragraph.""" para = _find_child(node, "paragraph") @@ -223,7 +229,7 @@ def _parse_list_entries( # Entry with a link name = render_inline_text(first_link.children) - url = first_link.attrGet("href") or "" + url = _href(first_link) desc_html = _extract_description_html(inline, first_link) # Collect also_see from nested bullet_list @@ -239,7 +245,7 @@ def _parse_list_entries( if sub_link: also_see.append(AlsoSee( name=render_inline_text(sub_link.children), - url=sub_link.attrGet("href") or "", + url=_href(sub_link), )) entries.append(ParsedEntry( @@ -373,7 +379,7 @@ def _parse_sponsor_item(inline: SyntaxTreeNode) -> ParsedSponsor | None: if link is None: return None name = render_inline_text(link.children) - url = link.attrGet("href") or "" + url = _href(link) split_idx = None for i, child in enumerate(inline.children): From 486fbf218505669474206d73ada2003100855418 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:59:16 +0800 Subject: [PATCH 09/23] refactor(readme_parser): replace _find_first_link with _find_child(inline, "link") The private helper duplicated _find_child with a hardcoded type filter. Remove it and call the general helper directly at both call sites. Co-Authored-By: Claude --- website/readme_parser.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/website/readme_parser.py b/website/readme_parser.py index ca650985..0aa5e2da 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -161,14 +161,6 @@ def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None: return _find_child(para, "inline") -def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None: - """Find the first link node among inline children.""" - for child in inline.children: - if child.type == "link": - return child - return None - - def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool: """Check if the link is the first child of inline (a real entry, not a subcategory label).""" return bool(inline.children) and inline.children[0] is link @@ -212,7 +204,7 @@ def _parse_list_entries( if inline is None: continue - first_link = _find_first_link(inline) + first_link = _find_child(inline, "link") if first_link is None or not _is_leading_link(inline, first_link): # Subcategory label: take text before the first link, strip trailing separators @@ -241,7 +233,7 @@ def _parse_list_entries( continue sub_inline = _find_inline(sub_item) if sub_inline: - sub_link = _find_first_link(sub_inline) + sub_link = _find_child(sub_inline, "link") if sub_link: also_see.append(AlsoSee( name=render_inline_text(sub_link.children), From 85b55efb2830471aa922648c6eefa6cbea58eaeb Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 21:59:59 +0800 Subject: [PATCH 10/23] refactor(readme_parser): inline _is_leading_link at its call site The helper was only called once and the bool(inline.children) guard was redundant: first_link being non-None already implies inline.children is non-empty. Co-Authored-By: Claude --- website/readme_parser.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/website/readme_parser.py b/website/readme_parser.py index 0aa5e2da..1c067d6c 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -161,11 +161,6 @@ def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None: return _find_child(para, "inline") -def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool: - """Check if the link is the first child of inline (a real entry, not a subcategory label).""" - return bool(inline.children) and inline.children[0] is link - - def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str: """Extract description HTML from inline content after the first link. @@ -206,7 +201,7 @@ def _parse_list_entries( first_link = _find_child(inline, "link") - if first_link is None or not _is_leading_link(inline, first_link): + if first_link is None or inline.children[0] is not first_link: # Subcategory label: take text before the first link, strip trailing separators pre_link = [] for child in inline.children: From 39b65bc9941de8b61f4710780472301fb9210313 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:00:45 +0800 Subject: [PATCH 11/23] refactor(build): inline format_stars_short into its call site The helper only appeared once and the logic is two lines, so the named function added indirection without clarity. Removed the four dedicated unit tests that covered the function directly. Co-Authored-By: Claude --- website/build.py | 12 ++++-------- website/tests/test_build.py | 20 -------------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/website/build.py b/website/build.py index 6f864a11..8689ca90 100644 --- a/website/build.py +++ b/website/build.py @@ -117,13 +117,6 @@ def extract_entries( return entries -def format_stars_short(stars: int) -> str: - """Format star count as compact string like '230k'.""" - if stars >= 1000: - return f"{stars // 1000}k" - return str(stars) - - def build(repo_root: Path) -> None: """Main build: parse README, render single-page HTML via Jinja2 templates.""" website = repo_root / "website" @@ -146,7 +139,10 @@ def build(repo_root: Path) -> None: stars_data = load_stars(website / "data" / "github_stars.json") repo_self = stars_data.get("vinta/awesome-python", {}) - repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None + repo_stars = None + if "stars" in repo_self: + stars_val = repo_self["stars"] + repo_stars = f"{stars_val // 1000}k" if stars_val >= 1000 else str(stars_val) for entry in entries: repo_key = extract_github_repo(entry["url"]) diff --git a/website/tests/test_build.py b/website/tests/test_build.py index 878e84b6..0b22609a 100644 --- a/website/tests/test_build.py +++ b/website/tests/test_build.py @@ -10,7 +10,6 @@ from build import ( detect_source_type, extract_entries, extract_github_repo, - format_stars_short, load_stars, sort_entries, ) @@ -363,25 +362,6 @@ class TestDetectSourceType: assert detect_source_type("https://github.com/org/repo/wiki") is None -# --------------------------------------------------------------------------- -# format_stars_short -# --------------------------------------------------------------------------- - - -class TestFormatStarsShort: - def test_under_1000(self): - assert format_stars_short(500) == "500" - - def test_exactly_1000(self): - assert format_stars_short(1000) == "1k" - - def test_large_number(self): - assert format_stars_short(52000) == "52k" - - def test_zero(self): - assert format_stars_short(0) == "0" - - # --------------------------------------------------------------------------- # extract_entries # --------------------------------------------------------------------------- From 95115f794905037e4d0b24725bf2abea487945e3 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:01:35 +0800 Subject: [PATCH 12/23] refactor(fetch_github_stars): replace manual slice loop with itertools.batched Use itertools.batched (stdlib since Python 3.12, targeted by this project) instead of manual range(0, N, BATCH_SIZE) slicing. Loosen fetch_batch, build_graphql_query, and parse_graphql_response signatures from list[str] to Sequence[str] since batched yields tuples. Co-Authored-By: Claude --- website/fetch_github_stars.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index f918ae42..6c33adc7 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -5,7 +5,9 @@ import json import os import re import sys +from collections.abc import Sequence from datetime import UTC, datetime +from itertools import batched from pathlib import Path import httpx @@ -44,7 +46,7 @@ def save_cache(cache: dict) -> None: ) -def build_graphql_query(repos: list[str]) -> str: +def build_graphql_query(repos: Sequence[str]) -> str: """Build a GraphQL query with aliases for up to 100 repos.""" parts = [] for i, repo in enumerate(repos): @@ -62,7 +64,7 @@ def build_graphql_query(repos: list[str]) -> str: def parse_graphql_response( data: dict, - repos: list[str], + repos: Sequence[str], ) -> dict[str, dict]: """Parse GraphQL response into {owner/repo: {stars, owner}} dict.""" result = {} @@ -80,7 +82,7 @@ def parse_graphql_response( return result -def fetch_batch(repos: list[str], client: httpx.Client) -> dict[str, dict]: +def fetch_batch(repos: Sequence[str], client: httpx.Client) -> dict[str, dict]: """Fetch star data for a batch of repos via GitHub GraphQL API.""" query = build_graphql_query(repos) if not query: @@ -146,9 +148,7 @@ def main() -> None: transport=httpx.HTTPTransport(retries=2), timeout=30, ) as client: - for i in range(0, len(to_fetch), BATCH_SIZE): - batch = to_fetch[i : i + BATCH_SIZE] - batch_num = i // BATCH_SIZE + 1 + for batch_num, batch in enumerate(batched(to_fetch, BATCH_SIZE), 1): print(f"Fetching batch {batch_num}/{total_batches} ({len(batch)} repos)...") try: From 6ae7c89688cd3f635d7c1927d34ca7069bef87c9 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:02:14 +0800 Subject: [PATCH 13/23] refactor: replace manual total_seconds()/3600 with timedelta comparison Use timedelta(hours=CACHE_MAX_AGE_HOURS) so the cache-age check reads at the intended hours unit directly, removing the conversion arithmetic. Co-Authored-By: Claude --- website/fetch_github_stars.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py index 6c33adc7..c93ef4ec 100644 --- a/website/fetch_github_stars.py +++ b/website/fetch_github_stars.py @@ -6,7 +6,7 @@ import os import re import sys from collections.abc import Sequence -from datetime import UTC, datetime +from datetime import UTC, datetime, timedelta from itertools import batched from pathlib import Path @@ -119,13 +119,13 @@ def main() -> None: cache = pruned # Determine which repos need fetching (missing or stale) + max_age = timedelta(hours=CACHE_MAX_AGE_HOURS) to_fetch = [] for repo in sorted(current_repos): entry = cache.get(repo) if entry and "fetched_at" in entry: fetched = datetime.fromisoformat(entry["fetched_at"]) - age_hours = (now - fetched).total_seconds() / 3600 - if age_hours < CACHE_MAX_AGE_HOURS: + if now - fetched < max_age: continue to_fetch.append(repo) From 0630ee973be495768cadded90faa2f45333b27e1 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:04:14 +0800 Subject: [PATCH 14/23] refactor(build): flatten extract_entries and annotate result dict Collapse the if-seen/else-new branches so the category/group/subcategory merge logic runs once per entry unconditionally, appending to empty lists on first sight instead of duplicating the append logic in the else branch. Annotate seen and entries as dict[str, Any] so ty can resolve the mixed value types (str, list, None) in each entry dict. Co-Authored-By: Claude --- website/build.py | 52 ++++++++++++++++++++++-------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/website/build.py b/website/build.py index 8689ca90..c223ef18 100644 --- a/website/build.py +++ b/website/build.py @@ -6,6 +6,7 @@ import re import shutil from datetime import UTC, datetime from pathlib import Path +from typing import Any from jinja2 import Environment, FileSystemLoader from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors @@ -75,45 +76,40 @@ def extract_entries( Entries appearing in multiple categories are merged into a single entry with lists of categories and groups. """ - cat_to_group: dict[str, str] = {} - for group in groups: - for cat in group["categories"]: - cat_to_group[cat["name"]] = group["name"] + cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]} - seen: dict[tuple[str, str], dict] = {} # (url, name) -> entry - entries: list[dict] = [] + seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry + entries: list[dict[str, Any]] = [] for cat in categories: group_name = cat_to_group.get(cat["name"], "Other") for entry in cat["entries"]: - url = entry["url"] - key = (url, entry["name"]) - if key in seen: - existing = seen[key] - if cat["name"] not in existing["categories"]: - existing["categories"].append(cat["name"]) - if group_name not in existing["groups"]: - existing["groups"].append(group_name) - subcat = entry["subcategory"] - if subcat: - scoped = f"{cat['name']} > {subcat}" - if not any(s["value"] == scoped for s in existing["subcategories"]): - existing["subcategories"].append({"name": subcat, "value": scoped}) - else: - merged = { + key = (entry["url"], entry["name"]) + existing: dict[str, Any] | None = seen.get(key) + if existing is None: + existing = { "name": entry["name"], - "url": url, + "url": entry["url"], "description": entry["description"], - "categories": [cat["name"]], - "groups": [group_name], - "subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [], + "categories": [], + "groups": [], + "subcategories": [], "stars": None, "owner": None, "last_commit_at": None, - "source_type": detect_source_type(url), + "source_type": detect_source_type(entry["url"]), "also_see": entry["also_see"], } - seen[key] = merged - entries.append(merged) + seen[key] = existing + entries.append(existing) + if cat["name"] not in existing["categories"]: + existing["categories"].append(cat["name"]) + if group_name not in existing["groups"]: + existing["groups"].append(group_name) + subcat = entry["subcategory"] + if subcat: + scoped = f"{cat['name']} > {subcat}" + if not any(s["value"] == scoped for s in existing["subcategories"]): + existing["subcategories"].append({"name": subcat, "value": scoped}) return entries From 420bf8cd9d873139b8161f7537014b6bd75d4160 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:05:35 +0800 Subject: [PATCH 15/23] refactor(readme_parser): collapse render_inline_html/text into _render_inline helper Merge the two inline-renderer implementations into a single _render_inline(children, *, html) function that handles both output modes. The original public functions become one-line wrappers so all dispatch logic lives in one place. Also aligns html_inline handling: the html=True path now escapes the raw content instead of silently dropping it in the plain-text path. Co-Authored-By: Claude --- website/readme_parser.py | 54 +++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/website/readme_parser.py b/website/readme_parser.py index 1c067d6c..cbbc30a2 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -62,46 +62,44 @@ def slugify(name: str) -> str: # --- Inline renderers ------------------------------------------------------- -def render_inline_html(children: list[SyntaxTreeNode]) -> str: - """Render inline AST nodes to HTML with proper escaping.""" +def _render_inline(children: list[SyntaxTreeNode], *, html: bool) -> str: + """Render inline AST nodes to HTML or plain text.""" parts: list[str] = [] for child in children: match child.type: case "text": - parts.append(str(escape(child.content))) + parts.append(str(escape(child.content)) if html else child.content) + case "html_inline": + if html: + parts.append(str(escape(child.content))) case "softbreak": parts.append(" ") - case "link": - href = str(escape(_href(child))) - inner = render_inline_html(child.children) - parts.append( - f'{inner}' - ) - case "em": - parts.append(f"{render_inline_html(child.children)}") - case "strong": - parts.append(f"{render_inline_html(child.children)}") case "code_inline": - parts.append(f"{escape(child.content)}") - case "html_inline": - parts.append(str(escape(child.content))) + parts.append(f"{escape(child.content)}" if html else child.content) + case "link": + inner = _render_inline(child.children, html=html) + if html: + href = str(escape(_href(child))) + parts.append(f'{inner}') + else: + parts.append(inner) + case "em": + inner = _render_inline(child.children, html=html) + parts.append(f"{inner}" if html else inner) + case "strong": + inner = _render_inline(child.children, html=html) + parts.append(f"{inner}" if html else inner) return "".join(parts) +def render_inline_html(children: list[SyntaxTreeNode]) -> str: + """Render inline AST nodes to HTML with proper escaping.""" + return _render_inline(children, html=True) + + def render_inline_text(children: list[SyntaxTreeNode]) -> str: """Render inline AST nodes to plain text (links become their text).""" - parts: list[str] = [] - for child in children: - match child.type: - case "text": - parts.append(child.content) - case "softbreak": - parts.append(" ") - case "code_inline": - parts.append(child.content) - case "em" | "strong" | "link": - parts.append(render_inline_text(child.children)) - return "".join(parts) + return _render_inline(children, html=False) # --- AST helpers ------------------------------------------------------------- From 92936964b666bbba7dc03505b4f200f68307614e Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:06:41 +0800 Subject: [PATCH 16/23] refactor(readme_parser): fuse _parse_sponsor_item into single pass Eliminate the redundant _find_link_deep precheck by merging the two walks over inline.children into one loop that simultaneously locates the link and records its top-level index. Co-Authored-By: Claude --- website/readme_parser.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/website/readme_parser.py b/website/readme_parser.py index cbbc30a2..10d26886 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -360,22 +360,17 @@ def _find_link_deep(node: SyntaxTreeNode) -> SyntaxTreeNode | None: def _parse_sponsor_item(inline: SyntaxTreeNode) -> ParsedSponsor | None: """Parse `**[name](url)**: description` (or `[name](url) - description`).""" - link = _find_link_deep(inline) - if link is None: - return None - name = render_inline_text(link.children) - url = _href(link) - - split_idx = None - for i, child in enumerate(inline.children): - if child is link or _find_link_deep(child) is link: - split_idx = i - break - if split_idx is None: - return None - desc_html = render_inline_html(inline.children[split_idx + 1 :]) - desc_html = _SPONSOR_SEP_RE.sub("", desc_html) - return ParsedSponsor(name=name, url=url, description=desc_html) + for split_idx, child in enumerate(inline.children): + link = child if child.type == "link" else _find_link_deep(child) + if link is None: + continue + desc_html = render_inline_html(inline.children[split_idx + 1 :]) + return ParsedSponsor( + name=render_inline_text(link.children), + url=_href(link), + description=_SPONSOR_SEP_RE.sub("", desc_html), + ) + return None def parse_sponsors(text: str) -> list[ParsedSponsor]: From f10337bb319654f8f45bd2abd2d4cf79eb2c9b3f Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:07:16 +0800 Subject: [PATCH 17/23] refactor(tests): modernize test_readme_parser to use pathlib.Path Replace os.path.join + manual open() with Path(__file__).resolve().parents[2] and Path.read_text() for locating and reading README.md. Co-Authored-By: Claude --- website/tests/test_readme_parser.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/website/tests/test_readme_parser.py b/website/tests/test_readme_parser.py index 6999f155..0b4940a8 100644 --- a/website/tests/test_readme_parser.py +++ b/website/tests/test_readme_parser.py @@ -1,7 +1,7 @@ """Tests for the readme_parser module.""" -import os import textwrap +from pathlib import Path import pytest @@ -437,9 +437,8 @@ class TestParseSectionEntries: class TestParseRealReadme: @pytest.fixture(autouse=True) def load_readme(self): - readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md") - with open(readme_path, encoding="utf-8") as f: - self.readme_text = f.read() + readme_path = Path(__file__).resolve().parents[2] / "README.md" + self.readme_text = readme_path.read_text(encoding="utf-8") self.groups = parse_readme(self.readme_text) self.cats = [c for g in self.groups for c in g["categories"]] From 257b69a93289feac8ec5196917ed3682df77f492 Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:40:31 +0800 Subject: [PATCH 18/23] style(sponsors): bump section-label to --text-lg within sponsor scope Override font-size to var(--text-lg) inside .sponsor-meta so the Sponsors heading is larger, while the shared .section-label class remains --text-sm everywhere else. Co-Authored-By: Claude --- website/static/style.css | 1 + 1 file changed, 1 insertion(+) diff --git a/website/static/style.css b/website/static/style.css index c8a96fb4..5fa96638 100644 --- a/website/static/style.css +++ b/website/static/style.css @@ -414,6 +414,7 @@ kbd { .sponsor-meta .section-label { margin-bottom: 0; + font-size: var(--text-lg); } .sponsor-become { From f3c8377bd45d675076767e524c457bf50004593f Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 19 Apr 2026 22:43:19 +0800 Subject: [PATCH 19/23] chore: remove arrow from 'Become a sponsor' link and its CSS rules Co-Authored-By: Claude --- website/static/style.css | 8 -------- website/templates/index.html | 1 - 2 files changed, 9 deletions(-) diff --git a/website/static/style.css b/website/static/style.css index 5fa96638..6edbc46c 100644 --- a/website/static/style.css +++ b/website/static/style.css @@ -438,14 +438,6 @@ kbd { border-bottom-color: var(--accent); } -.sponsor-become-arrow { - transition: transform 180ms cubic-bezier(0.22, 1, 0.36, 1); -} - -.sponsor-become:hover .sponsor-become-arrow { - transform: translateX(0.3rem); -} - .sponsor-list { list-style: none; padding: 0; diff --git a/website/templates/index.html b/website/templates/index.html index 87cfbf29..53e968d3 100644 --- a/website/templates/index.html +++ b/website/templates/index.html @@ -77,7 +77,6 @@ rel="noopener" > Become a sponsor -