Merge pull request #2971 from vinta/feature/markdown-it-py-parser

feat: replace regex README parser with markdown-it-py AST parser
2026-05-28 21:15:53 +08:00 · 2026-03-18 20:35:06 +08:00
parent c5caa5a5e1 280f250ce0
commit 539edc4e20
15 changed files with 2529 additions and 1443 deletions
@@ -1,17 +1,24 @@
 -include .env
 export
-site_install:
+install:
-	uv sync --no-dev
+	uv sync
-site_fetch_stats:
+fetch_stats:
 	uv run python website/fetch_github_stars.py
-site_build:
+test:
 	uv run pytest website/tests/ -v
 build:
 	uv run python website/build.py
-site_preview: site_build
+preview: build
-	python -m http.server -d website/output/ 8000
+	@echo "Check the website on http://localhost:8000"
-
+	uv run watchmedo shell-command \
-site_deploy: site_build
+		--patterns='*.md;*.html;*.css;*.js;*.py' \
-	@echo "Deploy via GitHub Actions (push to master)"
+		--recursive \
 		--wait --drop \
 		--command='uv run python website/build.py' \
 		README.md website/templates website/static website/data & \
 	python -m http.server -b 127.0.0.1 -d website/output/ 8000
@@ -2,22 +2,30 @@
 name = "awesome-python"
 version = "0.1.0"
 description = "An opinionated list of awesome Python frameworks, libraries, software and resources."
 authors = [{ name = "Vinta Chen", email = "vinta.chen@gmail.com" }]
 readme = "README.md"
 license = "MIT"
 requires-python = ">=3.13"
-dependencies = [
+dependencies = []
-    "httpx==0.28.1",
+
-    "jinja2==3.1.6",
+[project.urls]
-    "markdown==3.10.2",
+Homepage = "https://awesome-python.com/"
-]
+Repository = "https://github.com/vinta/awesome-python"
 [dependency-groups]
 build = ["httpx==0.28.1", "jinja2==3.1.6", "markdown-it-py==4.0.0"]
 lint = ["ruff==0.15.6"]
 test = ["pytest==9.0.2"]
 dev = [
-    "pytest==9.0.2",
+    { include-group = "build" },
-    "ruff==0.15.6",
+    { include-group = "lint" },
    { include-group = "test" },
    "watchdog==6.0.0",
 ]
 [tool.pytest.ini_options]
 testpaths = ["website/tests"]
 pythonpath = ["website"]
 [tool.ruff]
-target-version = "py313"
+line-length = 200
 line-length = 100
@@ -18,30 +18,46 @@ wheels = [
 name = "awesome-python"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
    { name = "httpx" },
    { name = "jinja2" },
    { name = "markdown" },
 ]
 [package.dev-dependencies]
 build = [
    { name = "httpx" },
    { name = "jinja2" },
    { name = "markdown-it-py" },
 ]
 dev = [
    { name = "httpx" },
    { name = "jinja2" },
    { name = "markdown-it-py" },
    { name = "pytest" },
    { name = "ruff" },
    { name = "watchdog" },
 ]
 lint = [
    { name = "ruff" },
 ]
 test = [
    { name = "pytest" },
 ]
 [package.metadata]
 requires-dist = [
    { name = "httpx", specifier = "==0.28.1" },
    { name = "jinja2", specifier = "==3.1.6" },
    { name = "markdown", specifier = "==3.10.2" },
 ]
 [package.metadata.requires-dev]
 build = [
    { name = "httpx", specifier = "==0.28.1" },
    { name = "jinja2", specifier = "==3.1.6" },
    { name = "markdown-it-py", specifier = "==4.0.0" },
 ]
 dev = [
    { name = "httpx", specifier = "==0.28.1" },
    { name = "jinja2", specifier = "==3.1.6" },
    { name = "markdown-it-py", specifier = "==4.0.0" },
    { name = "pytest", specifier = "==9.0.2" },
    { name = "ruff", specifier = "==0.15.6" },
    { name = "watchdog", specifier = "==6.0.0" },
 ]
 lint = [{ name = "ruff", specifier = "==0.15.6" }]
 test = [{ name = "pytest", specifier = "==9.0.2" }]
 [[package]]
 name = "certifi"
@@ -129,12 +145,15 @@ wheels = [
 ]
 [[package]]
-name = "markdown"
+name = "markdown-it-py"
-version = "3.10.2"
+version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" }
+dependencies = [
    { name = "mdurl" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" },
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
 ]
 [[package]]
@@ -189,6 +208,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]
 [[package]]
 name = "mdurl"
 version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -256,3 +284,24 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
    { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
 ]
 [[package]]
 name = "watchdog"
 version = "6.0.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" },
    { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" },
    { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" },
    { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" },
    { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" },
    { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" },
    { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" },
    { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" },
    { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" },
    { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" },
    { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" },
    { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" },
    { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" },
 ]
@@ -7,9 +7,10 @@ import shutil
 from pathlib import Path
 from typing import TypedDict
 import markdown
 from jinja2 import Environment, FileSystemLoader
 from readme_parser import parse_readme, slugify
 # Thematic grouping of categories. Each category name must match exactly
 # as it appears in README.md (the ## heading text).
 SECTION_GROUPS: list[tuple[str, list[str]]] = [
@@ -67,217 +68,6 @@ SECTION_GROUPS: list[tuple[str, list[str]]] = [
 ]
 def slugify(name: str) -> str:
    """Convert a category name to a URL-friendly slug."""
    slug = name.lower()
    slug = re.sub(r"[^a-z0-9\s-]", "", slug)
    slug = re.sub(r"[\s]+", "-", slug.strip())
    slug = re.sub(r"-+", "-", slug)
    return slug
 def count_entries(content: str) -> int:
    """Count library entries (lines starting with * [ or - [) in a content block."""
    return sum(1 for line in content.split("\n") if re.match(r"\s*[-*]\s+\[", line))
 def extract_preview(content: str, *, max_names: int = 4) -> str:
    """Extract first N main library names from markdown content for preview text.
    Only includes top-level or single-indent entries (indent <= 3 spaces),
    skipping subcategory labels (items without links) and deep sub-entries.
    """
    names = []
    for m in re.finditer(r"^(\s*)[-*]\s+\[([^\]]+)\]", content, re.MULTILINE):
        indent_len = len(m.group(1))
        if indent_len > 3:
            continue
        names.append(m.group(2))
        if len(names) >= max_names:
            break
    return ", ".join(names)
 def render_content_html(content: str) -> str:
    """Render category markdown content to HTML with subcategory detection.
    Lines that are list items without links (e.g., "- Synchronous") are
    treated as subcategory headers and rendered as bold dividers.
    Indent levels in the README:
    - 0 spaces: top-level entry or subcategory label
    - 2 spaces: entry under a subcategory (still a main entry)
    - 4+ spaces: sub-entry (e.g., awesome-django under django)
    """
    lines = content.split("\n")
    out: list[str] = []
    for line in lines:
        stripped = line.strip()
        indent_len = len(line) - len(line.lstrip())
        # Detect subcategory labels: list items without links
        m = re.match(r"^[-*]\s+(.+)$", stripped)
        if m and "[" not in stripped:
            label = m.group(1)
            out.append(f'<div class="subcat">{label}</div>')
            continue
        # Entry with link and description: * [name](url) - Description.
        m = re.match(
            r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*[-\u2013\u2014]\s*(.+)$",
            line,
        )
        if m:
            name, url, desc = m.groups()
            if indent_len > 3:
                out.append(
                    f'<div class="entry-sub">'
                    f'<a href="{url}">{name}</a>'
                    f"</div>"
                )
            else:
                out.append(
                    f'<div class="entry">'
                    f'<a href="{url}">{name}</a>'
                    f'<span class="sep">&mdash;</span>{desc}'
                    f"</div>"
                )
            continue
        # Link-only entry (no description): * [name](url)
        m = re.match(r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
        if m:
            name, url = m.groups()
            if indent_len > 3:
                out.append(
                    f'<div class="entry-sub">'
                    f'<a href="{url}">{name}</a>'
                    f"</div>"
                )
            else:
                out.append(
                    f'<div class="entry">'
                    f'<a href="{url}">{name}</a>'
                    f"</div>"
                )
            continue
    return "\n".join(out)
 def parse_readme(text: str) -> tuple[list[dict], list[dict]]:
    """Parse README.md text into categories and resources.
    Returns:
        (categories, resources) where each is a list of dicts with keys:
        name, slug, description, content
    """
    lines = text.split("\n")
    separator_idx = None
    for i, line in enumerate(lines):
        if line.strip() == "---" and i > 0:
            separator_idx = i
            break
    if separator_idx is None:
        return [], []
    resources_idx = None
    contributing_idx = None
    for i, line in enumerate(lines):
        if line.strip() == "# Resources":
            resources_idx = i
        elif line.strip() == "# Contributing":
            contributing_idx = i
    cat_end = resources_idx if resources_idx is not None else len(lines)
    category_lines = lines[separator_idx + 1 : cat_end]
    resource_lines = []
    if resources_idx is not None:
        res_end = contributing_idx if contributing_idx is not None else len(lines)
        resource_lines = lines[resources_idx:res_end]
    categories = _extract_sections(category_lines, level=2)
    resources = _extract_sections(resource_lines, level=2)
    return categories, resources
 def _extract_sections(lines: list[str], *, level: int) -> list[dict]:
    """Extract ## sections from a block of lines."""
    prefix = "#" * level + " "
    sections = []
    current_name = None
    current_lines: list[str] = []
    for line in lines:
        if line.startswith(prefix) and not line.startswith(prefix + "#"):
            if current_name is not None:
                sections.append(_build_section(current_name, current_lines))
            current_name = line[len(prefix) :].strip()
            current_lines = []
        elif current_name is not None:
            current_lines.append(line)
    if current_name is not None:
        sections.append(_build_section(current_name, current_lines))
    return sections
 def _build_section(name: str, lines: list[str]) -> dict:
    """Build a section dict from a name and its content lines."""
    while lines and not lines[0].strip():
        lines = lines[1:]
    while lines and not lines[-1].strip():
        lines = lines[:-1]
    description = ""
    content_lines = lines
    if lines:
        m = re.match(r"^_(.+)_$", lines[0].strip())
        if m:
            description = m.group(1)
            content_lines = lines[1:]
            while content_lines and not content_lines[0].strip():
                content_lines = content_lines[1:]
    content = "\n".join(content_lines).strip()
    return {
        "name": name,
        "slug": slugify(name),
        "description": description,
        "content": content,
    }
 def render_markdown(text: str) -> str:
    """Render markdown text to HTML."""
    md = markdown.Markdown(extensions=["extra"])
    return md.convert(text)
 def strip_markdown_links(text: str) -> str:
    """Replace [text](url) with just text for plain-text contexts."""
    return re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
 def render_inline_markdown(text: str) -> str:
    """Render inline markdown (links, bold, italic) to HTML."""
    from markupsafe import Markup
    html = markdown.markdown(text)
    # Strip wrapping <p>...</p> since this is inline content
    html = re.sub(r"^<p>(.*)</p>$", r"\1", html.strip())
    # Add target/rel to links for external navigation
    html = html.replace("<a ", '<a target="_blank" rel="noopener" ')
    return Markup(html)
 def group_categories(
    categories: list[dict],
    resources: list[dict],
@@ -285,10 +75,11 @@ def group_categories(
    """Organize categories and resources into thematic section groups."""
    cat_by_name = {c["name"]: c for c in categories}
    groups = []
    grouped_names: set[str] = set()
    for group_name, cat_names in SECTION_GROUPS:
        grouped_names.update(cat_names)
        if group_name == "Resources":
            # Resources group uses parsed resources directly
            group_cats = list(resources)
        else:
            group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
@@ -301,9 +92,6 @@ def group_categories(
            })
    # Any categories not in a group go into "Other"
    grouped_names = set()
    for _, cat_names in SECTION_GROUPS:
        grouped_names.update(cat_names)
    ungrouped = [c for c in categories if c["name"] not in grouped_names]
    if ungrouped:
        groups.append({
@@ -323,13 +111,13 @@ class Entry(TypedDict):
    group: str
    stars: int | None
    owner: str | None
-    pushed_at: str | None
+    last_commit_at: str | None
 class StarData(TypedDict):
    stars: int
    owner: str
-    pushed_at: str
+    last_commit_at: str
    fetched_at: str
@@ -367,7 +155,6 @@ def sort_entries(entries: list[dict]) -> list[dict]:
 def extract_entries(
    categories: list[dict],
    resources: list[dict],
    groups: list[dict],
 ) -> list[dict]:
    """Flatten categories into individual library entries for table display."""
@@ -379,37 +166,17 @@ def extract_entries(
    entries: list[dict] = []
    for cat in categories:
        group_name = cat_to_group.get(cat["name"], "Other")
-        last_entry_indent = -1
+        for entry in cat["entries"]:
        for line in cat["content"].split("\n"):
            indent_len = len(line) - len(line.lstrip())
            # Link-only sub-item deeper than parent → "also see"
            m_sub = re.match(r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
            if m_sub and indent_len > last_entry_indent >= 0 and entries:
                entries[-1]["also_see"].append({
                    "name": m_sub.group(1),
                    "url": m_sub.group(2),
                })
                continue
            if indent_len > 3:
                continue
            m = re.match(
                r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*(?:[-\u2013\u2014]\s*(.+))?$",
                line,
            )
            if m:
                last_entry_indent = indent_len
            entries.append({
-                    "name": m.group(1),
+                "name": entry["name"],
-                    "url": m.group(2),
+                "url": entry["url"],
-                    "description": render_inline_markdown(m.group(3)) if m.group(3) else "",
+                "description": entry["description"],
                "category": cat["name"],
                "group": group_name,
                "stars": None,
                "owner": None,
-                    "pushed_at": None,
+                "last_commit_at": None,
-                    "also_see": [],
+                "also_see": entry["also_see"],
            })
    return entries
@@ -420,7 +187,6 @@ def build(repo_root: str) -> None:
    website = repo / "website"
    readme_text = (repo / "README.md").read_text(encoding="utf-8")
    # Extract subtitle from the first non-empty, non-heading line
    subtitle = ""
    for line in readme_text.split("\n"):
        stripped = line.strip()
@@ -429,47 +195,33 @@ def build(repo_root: str) -> None:
            break
    categories, resources = parse_readme(readme_text)
-
+    # All fields pre-computed: entry_count, content_html, preview, description
    # Enrich with entry counts, rendered HTML, previews, and clean descriptions
    for cat in categories + resources:
        cat["entry_count"] = count_entries(cat["content"])
        cat["content_html"] = render_content_html(cat["content"])
        cat["preview"] = extract_preview(cat["content"])
        cat["description"] = strip_markdown_links(cat["description"])
    total_entries = sum(c["entry_count"] for c in categories)
    # Organize into groups
    groups = group_categories(categories, resources)
    entries = extract_entries(categories, groups)
    # Flatten entries for table view
    entries = extract_entries(categories, resources, groups)
    # Load and merge GitHub star data
    stars_data = load_stars(website / "data" / "github_stars.json")
    for entry in entries:
        repo_key = extract_github_repo(entry["url"])
        if repo_key and repo_key in stars_data:
-            entry["stars"] = stars_data[repo_key]["stars"]
+            sd = stars_data[repo_key]
-            entry["owner"] = stars_data[repo_key]["owner"]
+            entry["stars"] = sd["stars"]
-            entry["pushed_at"] = stars_data[repo_key].get("pushed_at", "")
+            entry["owner"] = sd["owner"]
            entry["last_commit_at"] = sd.get("last_commit_at", "")
    # Sort by stars descending
    entries = sort_entries(entries)
    # Set up Jinja2
    env = Environment(
        loader=FileSystemLoader(website / "templates"),
        autoescape=True,
    )
    # Output directory
    site_dir = website / "output"
    if site_dir.exists():
        shutil.rmtree(site_dir)
    site_dir.mkdir(parents=True)
    # Generate single index.html
    tpl_index = env.get_template("index.html")
    (site_dir / "index.html").write_text(
        tpl_index.render(
@@ -484,14 +236,10 @@ def build(repo_root: str) -> None:
        encoding="utf-8",
    )
    # Copy static assets
    static_src = website / "static"
    static_dst = site_dir / "static"
    if static_src.exists():
-        shutil.copytree(static_src, static_dst)
+        shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
    # Write CNAME
    (site_dir / "CNAME").write_text("awesome-python.com\n", encoding="utf-8")
    print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
    print(f"Total entries: {total_entries}")
@@ -10,14 +10,14 @@ from pathlib import Path
 import httpx
-from build import extract_github_repo
+from build import extract_github_repo, load_stars
 CACHE_MAX_AGE_DAYS = 7
 DATA_DIR = Path(__file__).parent / "data"
 CACHE_FILE = DATA_DIR / "github_stars.json"
 README_PATH = Path(__file__).parent.parent / "README.md"
 GRAPHQL_URL = "https://api.github.com/graphql"
-BATCH_SIZE = 100
+BATCH_SIZE = 50
 def extract_github_repos(text: str) -> set[str]:
@@ -30,17 +30,6 @@ def extract_github_repos(text: str) -> set[str]:
    return repos
 def load_cache() -> dict:
    """Load the star cache from disk. Returns empty dict if missing or corrupt."""
    if CACHE_FILE.exists():
        try:
            return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
        except json.JSONDecodeError:
            print(f"Warning: corrupt cache at {CACHE_FILE}, starting fresh.", file=sys.stderr)
            return {}
    return {}
 def save_cache(cache: dict) -> None:
    """Write the star cache to disk, creating data/ dir if needed."""
    DATA_DIR.mkdir(parents=True, exist_ok=True)
@@ -61,7 +50,7 @@ def build_graphql_query(repos: list[str]) -> str:
            continue
        parts.append(
            f'repo_{i}: repository(owner: "{owner}", name: "{name}") '
-            f"{{ stargazerCount pushedAt owner {{ login }} }}"
+            f"{{ stargazerCount owner {{ login }} defaultBranchRef {{ target {{ ... on Commit {{ committedDate }} }} }} }}"
        )
    if not parts:
        return ""
@@ -78,10 +67,12 @@ def parse_graphql_response(
        node = data.get(f"repo_{i}")
        if node is None:
            continue
        default_branch = node.get("defaultBranchRef") or {}
        target = default_branch.get("target") or {}
        result[repo] = {
            "stars": node.get("stargazerCount", 0),
            "owner": node.get("owner", {}).get("login", ""),
-            "pushed_at": node.get("pushedAt", ""),
+            "last_commit_at": target.get("committedDate", ""),
        }
    return result
@@ -114,7 +105,7 @@ def main() -> None:
    current_repos = extract_github_repos(readme_text)
    print(f"Found {len(current_repos)} GitHub repos in README.md")
-    cache = load_cache()
+    cache = load_stars(CACHE_FILE)
    now = datetime.now(timezone.utc)
    # Prune entries not in current README
@@ -173,7 +164,7 @@ def main() -> None:
                    cache[repo] = {
                        "stars": results[repo]["stars"],
                        "owner": results[repo]["owner"],
-                        "pushed_at": results[repo]["pushed_at"],
+                        "last_commit_at": results[repo]["last_commit_at"],
                        "fetched_at": now_iso,
                    }
                    fetched_count += 1
@@ -0,0 +1,388 @@
 """Parse README.md into structured section data using markdown-it-py AST."""
 from __future__ import annotations
 import re
 from typing import TypedDict
 from markdown_it import MarkdownIt
 from markdown_it.tree import SyntaxTreeNode
 from markupsafe import escape
 class AlsoSee(TypedDict):
    name: str
    url: str
 class ParsedEntry(TypedDict):
    name: str
    url: str
    description: str  # inline HTML, properly escaped
    also_see: list[AlsoSee]
 class ParsedSection(TypedDict):
    name: str
    slug: str
    description: str  # plain text, links resolved to text
    entries: list[ParsedEntry]
    entry_count: int
    preview: str
    content_html: str  # rendered HTML, properly escaped
 # --- Slugify ----------------------------------------------------------------
 _SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
 _SLUG_WHITESPACE_RE = re.compile(r"[\s]+")
 _SLUG_MULTI_DASH_RE = re.compile(r"-+")
 def slugify(name: str) -> str:
    """Convert a category name to a URL-friendly slug."""
    slug = name.lower()
    slug = _SLUG_NON_ALNUM_RE.sub("", slug)
    slug = _SLUG_WHITESPACE_RE.sub("-", slug.strip())
    slug = _SLUG_MULTI_DASH_RE.sub("-", slug)
    return slug
 # --- Inline renderers -------------------------------------------------------
 def render_inline_html(children: list[SyntaxTreeNode]) -> str:
    """Render inline AST nodes to HTML with proper escaping."""
    parts: list[str] = []
    for child in children:
        match child.type:
            case "text":
                parts.append(str(escape(child.content)))
            case "softbreak":
                parts.append(" ")
            case "link":
                href = str(escape(child.attrGet("href") or ""))
                inner = render_inline_html(child.children)
                parts.append(
                    f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>'
                )
            case "em":
                parts.append(f"<em>{render_inline_html(child.children)}</em>")
            case "strong":
                parts.append(f"<strong>{render_inline_html(child.children)}</strong>")
            case "code_inline":
                parts.append(f"<code>{escape(child.content)}</code>")
            case "html_inline":
                parts.append(str(escape(child.content)))
    return "".join(parts)
 def render_inline_text(children: list[SyntaxTreeNode]) -> str:
    """Render inline AST nodes to plain text (links become their text)."""
    parts: list[str] = []
    for child in children:
        match child.type:
            case "text":
                parts.append(child.content)
            case "softbreak":
                parts.append(" ")
            case "code_inline":
                parts.append(child.content)
            case "em" | "strong" | "link":
                parts.append(render_inline_text(child.children))
    return "".join(parts)
 # --- AST helpers -------------------------------------------------------------
 def _heading_text(node: SyntaxTreeNode) -> str:
    """Extract plain text from a heading node."""
    for child in node.children:
        if child.type == "inline":
            return render_inline_text(child.children)
    return ""
 def _extract_description(nodes: list[SyntaxTreeNode]) -> str:
    """Extract description from the first paragraph if it's a single <em> block.
    Pattern: _Libraries for foo._ -> "Libraries for foo."
    """
    if not nodes:
        return ""
    first = nodes[0]
    if first.type != "paragraph":
        return ""
    for child in first.children:
        if child.type == "inline" and len(child.children) == 1:
            em = child.children[0]
            if em.type == "em":
                return render_inline_text(em.children)
    return ""
 # --- Entry extraction --------------------------------------------------------
 _DESC_SEP_RE = re.compile(r"^\s*[-\u2013\u2014]\s*")
 def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
    """Find first direct child of a given type."""
    for child in node.children:
        if child.type == child_type:
            return child
    return None
 def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
    """Find the inline node in a list_item's paragraph."""
    para = _find_child(node, "paragraph")
    if para is None:
        return None
    return _find_child(para, "inline")
 def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
    """Find the first link node among inline children."""
    for child in inline.children:
        if child.type == "link":
            return child
    return None
 def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
    """Check if the link is the first child of inline (a real entry, not a subcategory label)."""
    return bool(inline.children) and inline.children[0] is link
 def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
    """Extract description HTML from inline content after the first link.
    AST: [link("name"), text(" - Description.")]  ->  "Description."
    The separator (- / en-dash / em-dash) is stripped.
    """
    link_idx = next((i for i, c in enumerate(inline.children) if c is first_link), None)
    if link_idx is None:
        return ""
    desc_children = inline.children[link_idx + 1 :]
    if not desc_children:
        return ""
    html = render_inline_html(desc_children)
    return _DESC_SEP_RE.sub("", html)
 def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
    """Extract entries from a bullet_list AST node.
    Handles three patterns:
    - Text-only list_item -> subcategory label -> recurse into nested list
    - Link list_item with nested link-only items -> entry with also_see
    - Link list_item without nesting -> simple entry
    """
    entries: list[ParsedEntry] = []
    for list_item in bullet_list.children:
        if list_item.type != "list_item":
            continue
        inline = _find_inline(list_item)
        if inline is None:
            continue
        first_link = _find_first_link(inline)
        if first_link is None or not _is_leading_link(inline, first_link):
            # Subcategory label (plain text or text-before-link) — recurse into nested list
            nested = _find_child(list_item, "bullet_list")
            if nested:
                entries.extend(_parse_list_entries(nested))
            continue
        # Entry with a link
        name = render_inline_text(first_link.children)
        url = first_link.attrGet("href") or ""
        desc_html = _extract_description_html(inline, first_link)
        # Collect also_see from nested bullet_list
        also_see: list[AlsoSee] = []
        nested = _find_child(list_item, "bullet_list")
        if nested:
            for sub_item in nested.children:
                if sub_item.type != "list_item":
                    continue
                sub_inline = _find_inline(sub_item)
                if sub_inline:
                    sub_link = _find_first_link(sub_inline)
                    if sub_link:
                        also_see.append(AlsoSee(
                            name=render_inline_text(sub_link.children),
                            url=sub_link.attrGet("href") or "",
                        ))
        entries.append(ParsedEntry(
            name=name,
            url=url,
            description=desc_html,
            also_see=also_see,
        ))
    return entries
 def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEntry]:
    """Extract all entries from a section's content nodes."""
    entries: list[ParsedEntry] = []
    for node in content_nodes:
        if node.type == "bullet_list":
            entries.extend(_parse_list_entries(node))
    return entries
 # --- Content HTML rendering --------------------------------------------------
 def _render_bullet_list_html(
    bullet_list: SyntaxTreeNode,
    *,
    is_sub: bool = False,
 ) -> str:
    """Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
    out: list[str] = []
    for list_item in bullet_list.children:
        if list_item.type != "list_item":
            continue
        inline = _find_inline(list_item)
        if inline is None:
            continue
        first_link = _find_first_link(inline)
        if first_link is None or not _is_leading_link(inline, first_link):
            # Subcategory label (plain text or text-before-link)
            label = str(escape(render_inline_text(inline.children)))
            out.append(f'<div class="subcat">{label}</div>')
            nested = _find_child(list_item, "bullet_list")
            if nested:
                out.append(_render_bullet_list_html(nested, is_sub=False))
            continue
        # Entry with a link
        name = str(escape(render_inline_text(first_link.children)))
        url = str(escape(first_link.attrGet("href") or ""))
        if is_sub:
            out.append(f'<div class="entry-sub"><a href="{url}">{name}</a></div>')
        else:
            desc = _extract_description_html(inline, first_link)
            if desc:
                out.append(
                    f'<div class="entry"><a href="{url}">{name}</a>'
                    f'<span class="sep">&mdash;</span>{desc}</div>'
                )
            else:
                out.append(f'<div class="entry"><a href="{url}">{name}</a></div>')
        # Nested items under an entry with a link are sub-entries
        nested = _find_child(list_item, "bullet_list")
        if nested:
            out.append(_render_bullet_list_html(nested, is_sub=True))
    return "\n".join(out)
 def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
    """Render a section's content nodes to HTML."""
    parts: list[str] = []
    for node in content_nodes:
        if node.type == "bullet_list":
            parts.append(_render_bullet_list_html(node))
    return "\n".join(parts)
 # --- Section splitting -------------------------------------------------------
 def _group_by_h2(
    nodes: list[SyntaxTreeNode],
 ) -> list[ParsedSection]:
    """Group AST nodes into sections by h2 headings."""
    sections: list[ParsedSection] = []
    current_name: str | None = None
    current_body: list[SyntaxTreeNode] = []
    def flush() -> None:
        nonlocal current_name
        if current_name is None:
            return
        desc = _extract_description(current_body)
        content_nodes = current_body[1:] if desc else current_body
        entries = _parse_section_entries(content_nodes)
        entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
        preview = ", ".join(e["name"] for e in entries[:4])
        content_html = _render_section_html(content_nodes)
        sections.append(ParsedSection(
            name=current_name,
            slug=slugify(current_name),
            description=desc,
            entries=entries,
            entry_count=entry_count,
            preview=preview,
            content_html=content_html,
        ))
        current_name = None
    for node in nodes:
        if node.type == "heading" and node.tag == "h2":
            flush()
            current_name = _heading_text(node)
            current_body = []
        elif current_name is not None:
            current_body.append(node)
    flush()
    return sections
 def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
    """Parse README.md text into categories and resources.
    Returns (categories, resources) where each is a list of ParsedSection dicts.
    """
    md = MarkdownIt("commonmark")
    tokens = md.parse(text)
    root = SyntaxTreeNode(tokens)
    children = root.children
    # Find thematic break (---), # Resources, and # Contributing in one pass
    hr_idx = None
    resources_idx = None
    contributing_idx = None
    for i, node in enumerate(children):
        if hr_idx is None and node.type == "hr":
            hr_idx = i
        elif node.type == "heading" and node.tag == "h1":
            text_content = _heading_text(node)
            if text_content == "Resources":
                resources_idx = i
            elif text_content == "Contributing":
                contributing_idx = i
    if hr_idx is None:
        return [], []
    # Slice into category and resource ranges
    cat_end = resources_idx or contributing_idx or len(children)
    cat_nodes = children[hr_idx + 1 : cat_end]
    res_nodes: list[SyntaxTreeNode] = []
    if resources_idx is not None:
        res_end = contributing_idx or len(children)
        res_nodes = children[resources_idx + 1 : res_end]
    categories = _group_by_h2(cat_nodes)
    resources = _group_by_h2(res_nodes)
    return categories, resources
@@ -0,0 +1,6 @@
 <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
  <path d="M8 2h16a6 6 0 0 1 6 6v8H2V8a6 6 0 0 1 6-6z" fill="#1d5fa6"/>
  <path d="M2 16h28v8a6 6 0 0 1-6 6H8a6 6 0 0 1-6-6z" fill="#f0c73e"/>
  <circle cx="11.5" cy="9.5" r="2.2" fill="#f0c73e"/>
  <circle cx="20.5" cy="22.5" r="2.2" fill="#1d5fa6"/>
 </svg>
@@ -1,15 +1,44 @@
 // State
 var activeFilter = null; // { type: "cat"|"group", value: "..." }
 var activeSort = { col: 'stars', order: 'desc' };
 var searchInput = document.querySelector('.search');
 var filterBar = document.querySelector('.filter-bar');
 var filterValue = document.querySelector('.filter-value');
 var filterClear = document.querySelector('.filter-clear');
 var noResults = document.querySelector('.no-results');
 var countEl = document.querySelector('.count');
 var rows = document.querySelectorAll('.table tbody tr.row');
 var tags = document.querySelectorAll('.tag');
 var tbody = document.querySelector('.table tbody');
 // Relative time formatting
 function relativeTime(isoStr) {
  var date = new Date(isoStr);
  var now = new Date();
  var diffMs = now - date;
  var diffHours = Math.floor(diffMs / 3600000);
  var diffDays = Math.floor(diffMs / 86400000);
  if (diffHours < 1) return 'just now';
  if (diffHours < 24) return diffHours === 1 ? '1 hour ago' : diffHours + ' hours ago';
  if (diffDays === 1) return 'yesterday';
  if (diffDays < 30) return diffDays + ' days ago';
  var diffMonths = Math.floor(diffDays / 30);
  if (diffMonths < 12) return diffMonths === 1 ? '1 month ago' : diffMonths + ' months ago';
  var diffYears = Math.floor(diffDays / 365);
  return diffYears === 1 ? '1 year ago' : diffYears + ' years ago';
 }
 // Format all commit date cells
 document.querySelectorAll('.col-commit[data-commit]').forEach(function (td) {
  var time = td.querySelector('time');
  if (time) time.textContent = relativeTime(td.dataset.commit);
 });
 // Store original row order for sort reset
 rows.forEach(function (row, i) {
  row._origIndex = i;
  row._expandRow = row.nextElementSibling;
 });
 function collapseAll() {
  var openRows = document.querySelectorAll('.table tbody tr.row.open');
  openRows.forEach(function (row) {
@@ -46,16 +75,18 @@ function applyFilters() {
      show = row._searchText.includes(query);
    }
-    row.hidden = !show;
+    if (row.hidden !== !show) row.hidden = !show;
    if (show) {
      visibleCount++;
-      row.querySelector('.col-num').textContent = String(visibleCount);
+      var numCell = row.cells[0];
      if (numCell.textContent !== String(visibleCount)) {
        numCell.textContent = String(visibleCount);
      }
    }
  });
  if (noResults) noResults.hidden = visibleCount > 0;
  if (countEl) countEl.textContent = visibleCount;
  // Update tag highlights
  tags.forEach(function (tag) {
@@ -74,6 +105,76 @@ function applyFilters() {
      filterBar.hidden = true;
    }
  }
  updateURL();
 }
 function updateURL() {
  var params = new URLSearchParams();
  var query = searchInput ? searchInput.value.trim() : '';
  if (query) params.set('q', query);
  if (activeFilter) {
    params.set(activeFilter.type === 'cat' ? 'category' : 'group', activeFilter.value);
  }
  if (activeSort.col !== 'stars' || activeSort.order !== 'desc') {
    params.set('sort', activeSort.col);
    params.set('order', activeSort.order);
  }
  var qs = params.toString();
  history.replaceState(null, '', qs ? '?' + qs : location.pathname);
 }
 function getSortValue(row, col) {
  if (col === 'name') {
    return row.querySelector('.col-name a').textContent.trim().toLowerCase();
  }
  if (col === 'stars') {
    var text = row.querySelector('.col-stars').textContent.trim().replace(/,/g, '');
    var num = parseInt(text, 10);
    return isNaN(num) ? -1 : num;
  }
  if (col === 'commit-time') {
    var attr = row.querySelector('.col-commit').getAttribute('data-commit');
    return attr ? new Date(attr).getTime() : 0;
  }
  return 0;
 }
 function sortRows() {
  var arr = Array.prototype.slice.call(rows);
  if (activeSort) {
    arr.sort(function (a, b) {
      var aVal = getSortValue(a, activeSort.col);
      var bVal = getSortValue(b, activeSort.col);
      if (activeSort.col === 'name') {
        var cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
        if (cmp === 0) return a._origIndex - b._origIndex;
        return activeSort.order === 'desc' ? -cmp : cmp;
      }
      if (aVal <= 0 && bVal <= 0) return a._origIndex - b._origIndex;
      if (aVal <= 0) return 1;
      if (bVal <= 0) return -1;
      var cmp = aVal - bVal;
      if (cmp === 0) return a._origIndex - b._origIndex;
      return activeSort.order === 'desc' ? -cmp : cmp;
    });
  } else {
    arr.sort(function (a, b) { return a._origIndex - b._origIndex; });
  }
  arr.forEach(function (row) {
    tbody.appendChild(row);
    tbody.appendChild(row._expandRow);
  });
  applyFilters();
 }
 function updateSortIndicators() {
  document.querySelectorAll('th[data-sort]').forEach(function (th) {
    th.classList.remove('sort-asc', 'sort-desc');
    if (activeSort && th.dataset.sort === activeSort.col) {
      th.classList.add('sort-' + activeSort.order);
    }
  });
 }
 // Expand/collapse: event delegation on tbody
@@ -130,6 +231,23 @@ if (filterClear) {
  });
 }
 // Column sorting
 document.querySelectorAll('th[data-sort]').forEach(function (th) {
  th.addEventListener('click', function () {
    var col = th.dataset.sort;
    var defaultOrder = col === 'name' ? 'asc' : 'desc';
    var altOrder = defaultOrder === 'asc' ? 'desc' : 'asc';
    if (activeSort && activeSort.col === col) {
      if (activeSort.order === defaultOrder) activeSort = { col: col, order: altOrder };
      else activeSort = { col: 'stars', order: 'desc' };
    } else {
      activeSort = { col: col, order: defaultOrder };
    }
    sortRows();
    updateSortIndicators();
  });
 });
 // Search input
 if (searchInput) {
  var searchTimer;
@@ -152,3 +270,23 @@ if (searchInput) {
    }
  });
 }
 // Restore state from URL
 (function () {
  var params = new URLSearchParams(location.search);
  var q = params.get('q');
  var cat = params.get('category');
  var group = params.get('group');
  var sort = params.get('sort');
  var order = params.get('order');
  if (q && searchInput) searchInput.value = q;
  if (cat) activeFilter = { type: 'cat', value: cat };
  else if (group) activeFilter = { type: 'group', value: group };
  if ((sort === 'name' || sort === 'stars' || sort === 'commit-time') && (order === 'desc' || order === 'asc')) {
    activeSort = { col: sort, order: order };
  }
  if (q || cat || group || sort) {
    sortRows();
  }
  updateSortIndicators();
 })();
@@ -23,6 +23,8 @@
  --accent-light: oklch(97% 0.015 240);
  --highlight: oklch(93% 0.10 90);
  --highlight-text: oklch(35% 0.10 90);
  --tag-text: oklch(45% 0.06 240);
  --tag-hover-bg: oklch(93% 0.025 240);
 }
 html { font-size: 16px; }
@@ -65,8 +67,10 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 .hero-main {
  display: flex;
  flex-wrap: wrap;
  justify-content: space-between;
  align-items: flex-start;
  gap: 1rem;
 }
 .hero-submit {
@@ -78,14 +82,21 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  color: var(--text);
  text-decoration: none;
  white-space: nowrap;
  transition: border-color 0.2s, background 0.2s, color 0.2s;
 }
 .hero-submit:hover {
  border-color: var(--accent);
  background: var(--accent-light);
  color: var(--accent);
  text-decoration: none;
 }
 .hero-submit:focus-visible {
  outline: 2px solid var(--accent);
  outline-offset: 2px;
 }
 .hero h1 {
  font-family: var(--font-display);
  font-size: clamp(2rem, 5vw, 3rem);
@@ -144,6 +155,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  font-family: var(--font-body);
  font-size: var(--text-sm);
  color: var(--text);
  transition: border-color 0.15s, background 0.15s;
 }
 .search::placeholder { color: var(--text-muted); }
@@ -174,11 +186,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  background: none;
  border: 1px solid var(--border);
  border-radius: 4px;
-  padding: 0.15rem 0.5rem;
+  padding: 0.35rem 0.65rem;
  font-family: inherit;
  font-size: var(--text-xs);
  color: var(--text-muted);
  cursor: pointer;
  transition: border-color 0.15s, color 0.15s;
 }
 .filter-clear:hover {
@@ -186,14 +199,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  color: var(--text);
 }
-.stats {
+.filter-clear:focus-visible {
-  font-size: var(--text-sm);
+  outline: 2px solid var(--accent);
-  color: var(--text-muted);
+  outline-offset: 2px;
  font-variant-numeric: tabular-nums;
 }
 .stats strong { color: var(--text-secondary); }
 /* === Table === */
 .table-wrap {
  width: 100%;
@@ -201,6 +211,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  overflow-x: auto;
 }
 .table-wrap:focus {
  outline: 2px solid var(--accent);
  outline-offset: -2px;
 }
 .table {
  width: 100%;
  border-collapse: separate;
@@ -236,6 +251,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  padding: 0.7rem 0.75rem;
  border-bottom: 1px solid var(--border);
  vertical-align: top;
  transition: background 0.15s;
 }
 .table tbody tr.row:not(.open):hover td {
@@ -253,9 +269,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 .col-name {
  width: 35%;
-  overflow-wrap: break-word;
+  overflow-wrap: anywhere;
  word-wrap: break-word;
  word-break: break-word;
 }
 .col-name > a {
@@ -266,12 +280,47 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 .col-name > a:hover { text-decoration: underline; color: var(--accent-hover); }
 /* === Sortable Headers === */
 th[data-sort] {
  cursor: pointer;
  user-select: none;
 }
 th[data-sort]:hover {
  color: var(--accent);
 }
 th[data-sort]::after {
  content: " ▼";
  opacity: 0;
  transition: opacity 0.15s;
 }
 th[data-sort="name"]::after {
  content: " ▲";
 }
 th[data-sort]:hover::after {
  opacity: 1;
 }
 th[data-sort].sort-desc::after {
  content: " ▼";
  opacity: 1;
 }
 th[data-sort].sort-asc::after {
  content: " ▲";
  opacity: 1;
 }
 /* === Stars Column === */
 .col-stars {
  width: 5rem;
  font-variant-numeric: tabular-nums;
  white-space: nowrap;
  color: var(--text-secondary);
  text-align: right;
 }
 /* === Arrow Column === */
@@ -294,6 +343,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 /* === Row Click === */
 .row { cursor: pointer; }
 .row:focus-visible td {
  outline: none;
  background: var(--bg-hover);
  box-shadow: inset 2px 0 0 var(--accent);
 }
 /* === Expand Row === */
 .expand-row {
  display: none;
@@ -315,10 +370,36 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  border-bottom: 1px solid var(--border);
 }
@keyframes expand-in {
  from {
    opacity: 0;
    transform: translateY(-4px);
  }
  to {
    opacity: 1;
    transform: translateY(0);
  }
 }
 .expand-content {
  font-size: var(--text-sm);
  color: var(--text-secondary);
  line-height: 1.6;
  animation: expand-in 0.2s cubic-bezier(0.25, 1, 0.5, 1);
 }
 .expand-tags {
  display: flex;
  gap: 0.4rem;
  margin-bottom: 0.4rem;
 }
 .expand-tag {
  font-size: var(--text-xs);
  color: var(--tag-text);
  background: var(--bg);
  padding: 0.15rem 0.4rem;
  border-radius: 3px;
 }
 .expand-also-see {
@@ -357,35 +438,63 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  color: var(--border);
 }
-.col-cat, .col-group {
+.col-cat {
  width: 13%;
  white-space: nowrap;
 }
 /* === Last Commit Column === */
 .col-commit {
  width: 9rem;
  white-space: nowrap;
  color: var(--text-muted);
 }
 /* === Tags === */
 .tag {
  position: relative;
  background: var(--accent-light);
  border: none;
  font-family: inherit;
  font-size: var(--text-xs);
-  color: oklch(45% 0.06 240);
+  color: var(--tag-text);
  cursor: pointer;
-  padding: 0.15rem 0.35rem;
+  padding: 0.25rem 0.5rem;
  border-radius: 3px;
  white-space: nowrap;
  transition: background 0.15s, color 0.15s;
 }
 /* Expand touch target to 44x44px minimum */
 .tag::after {
  content: "";
  position: absolute;
  inset: -0.5rem -0.25rem;
 }
 .tag:hover {
-  background: var(--accent-light);
+  background: var(--tag-hover-bg);
  color: var(--accent);
 }
 .tag:focus-visible {
  outline: 2px solid var(--accent);
  outline-offset: 1px;
 }
 .tag.active {
  background: var(--highlight);
  color: var(--highlight-text);
  font-weight: 600;
 }
 /* === Noscript === */
 .noscript-msg {
  text-align: center;
  padding: 1rem;
  color: var(--text-muted);
 }
 /* === No Results === */
 .no-results {
  max-width: 1400px;
@@ -407,20 +516,18 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  background: var(--bg-input);
  display: flex;
  align-items: center;
-  justify-content: space-between;
+  justify-content: flex-end;
  gap: 0.5rem;
 }
-.footer a { color: var(--text-muted); text-decoration: none; }
+.footer a { color: var(--accent); text-decoration: none; }
-.footer a:hover { color: var(--accent); }
+.footer a:hover { color: var(--accent-hover); text-decoration: underline; }
-.footer-links {
+.footer-sep { color: var(--border-strong); }
  display: flex;
  gap: 1rem;
 }
 /* === Responsive === */
@media (max-width: 900px) {
-  .col-group { display: none; }
+  .col-commit { display: none; }
 }
@media (max-width: 640px) {
@@ -435,7 +542,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  .col-cat { display: none; }
  .col-name { white-space: normal; }
-  .footer { padding: 1.25rem; flex-direction: column; gap: 0.5rem; }
+  .footer { padding: 1.25rem; justify-content: center; flex-wrap: wrap; }
 }
 /* === Screen Reader Only === */
@@ -454,6 +561,8 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 /* === Reduced Motion === */
@media (prefers-reduced-motion: reduce) {
  *, *::before, *::after {
    animation-duration: 0.01ms !important;
    animation-iteration-count: 1 !important;
    transition-duration: 0.01ms !important;
  }
 }
@@ -17,10 +17,7 @@
    />
    <meta property="og:url" content="https://awesome-python.com/" />
    <meta name="twitter:card" content="summary" />
-    <link
+    <link rel="icon" href="/static/favicon.svg" type="image/svg+xml" />
      rel="icon"
      href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🐍</text></svg>"
    />
    <link rel="stylesheet" href="/static/style.css" />
    <script
      async
@@ -41,24 +38,24 @@
    <main id="content">{% block content %}{% endblock %}</main>
    <footer class="footer">
-      <div class="footer-links">
+      <span
        >Made by
        <a href="https://vinta.ws/" target="_blank" rel="noopener"
          >Vinta</a
        ></span
      >
      <span class="footer-sep">/</span>
      <a href="https://github.com/vinta" target="_blank" rel="noopener"
        >GitHub</a
      >
      <span class="footer-sep">/</span>
      <a href="https://twitter.com/vinta" target="_blank" rel="noopener"
        >Twitter</a
      >
      </div>
      <span
        >Curated by
        <a href="https://github.com/vinta" target="_blank" rel="noopener"
          >Vinta</a
        ></span
      >
    </footer>
    <noscript
-      ><p style="text-align: center; padding: 1rem; color: #666">
+      ><p class="noscript-msg">
        JavaScript is needed for search and filtering.
      </p></noscript
    >
@@ -29,6 +29,7 @@
  </div>
 </header>
 <h2 class="sr-only">Search and filter</h2>
 <div class="controls">
  <div class="search-wrap">
    <svg
@@ -60,22 +61,24 @@
  </div>
 </div>
-<div class="table-wrap">
+<h2 class="sr-only">Results</h2>
 <div class="table-wrap" tabindex="0" role="region" aria-label="Libraries table">
  <table class="table">
    <thead>
      <tr>
        <th class="col-num"><span class="sr-only">#</span></th>
-        <th class="col-name">Project Name</th>
+        <th class="col-name" data-sort="name">Project Name</th>
-        <th class="col-stars">GitHub Stars</th>
+        <th class="col-stars" data-sort="stars">GitHub Stars</th>
        <th class="col-commit" data-sort="commit-time">Last Commit</th>
        <th class="col-cat">Category</th>
-        <th class="col-group">Group</th>
+        <th class="col-arrow"><span class="sr-only">Details</span></th>
        <th class="col-arrow"></th>
      </tr>
    </thead>
    <tbody>
      {% for entry in entries %}
      <tr
        class="row"
        role="button"
        data-cat="{{ entry.category }}"
        data-group="{{ entry.group }}"
        tabindex="0"
@@ -92,25 +95,24 @@
          {% if entry.stars is not none %}{{ "{:,}".format(entry.stars) }}{%
          else %}&mdash;{% endif %}
        </td>
        <td class="col-commit"
          {% if entry.last_commit_at %}data-commit="{{ entry.last_commit_at }}"{% endif %}
        >{% if entry.last_commit_at %}<time datetime="{{ entry.last_commit_at }}">{{ entry.last_commit_at[:10] }}</time>{% else %}&mdash;{% endif %}</td>
        <td class="col-cat">
          <button class="tag" data-type="cat" data-value="{{ entry.category }}">
            {{ entry.category }}
          </button>
        </td>
        <td class="col-group">
          <button class="tag" data-type="group" data-value="{{ entry.group }}">
            {{ entry.group }}
          </button>
        </td>
        <td class="col-arrow"><span class="arrow">&rarr;</span></td>
      </tr>
      <tr class="expand-row" id="expand-{{ loop.index }}">
        <td></td>
-        <td colspan="5">
+        <td colspan="3">
          <div class="expand-content">
            {% if entry.description %}
            <div class="expand-desc">{{ entry.description | safe }}</div>
-            {% endif %} {% if entry.also_see %}
+            {% endif %}
            {% if entry.also_see %}
            <div class="expand-also-see">
              Also see: {% for see in entry.also_see %}<a
                href="{{ see.url }}"
@@ -131,11 +133,16 @@
                target="_blank"
                rel="noopener"
                >{{ entry.url | replace("https://", "") }}</a
-              >{% if entry.pushed_at %}<span class="expand-sep">&middot;</span
+              >
              >Last pushed {{ entry.pushed_at[:10] }}{% endif %}
            </div>
          </div>
        </td>
        <td class="col-cat">
          <button class="tag" data-type="group" data-value="{{ entry.group }}">
            {{ entry.group }}
          </button>
        </td>
        <td></td>
      </tr>
      {% endfor %}
    </tbody>
@@ -1,27 +1,18 @@
 """Tests for the build module."""
 import json
 import os
 import shutil
 import sys
 import textwrap
 from pathlib import Path
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from build import (
    build,
    count_entries,
    extract_github_repo,
    extract_preview,
    group_categories,
    load_stars,
    parse_readme,
    render_content_html,
    slugify,
    sort_entries,
 )
 from readme_parser import slugify
 # ---------------------------------------------------------------------------
 # slugify
@@ -51,244 +42,6 @@ class TestSlugify:
        assert slugify("  Date  and  Time  ") == "date-and-time"
 # ---------------------------------------------------------------------------
 # count_entries
 # ---------------------------------------------------------------------------
 class TestCountEntries:
    def test_counts_dash_entries(self):
        assert count_entries("- [a](url) - Desc.\n- [b](url) - Desc.") == 2
    def test_counts_star_entries(self):
        assert count_entries("* [a](url) - Desc.") == 1
    def test_ignores_non_entries(self):
        assert count_entries("Some text\n- [a](url) - Desc.\nMore text") == 1
    def test_counts_indented_entries(self):
        assert count_entries("    - [a](url) - Desc.") == 1
    def test_empty_content(self):
        assert count_entries("") == 0
 # ---------------------------------------------------------------------------
 # extract_preview
 # ---------------------------------------------------------------------------
 class TestExtractPreview:
    def test_basic(self):
        content = "* [alpha](url) - A.\n* [beta](url) - B.\n* [gamma](url) - C."
        assert extract_preview(content) == "alpha, beta, gamma"
    def test_max_four(self):
        content = "\n".join(f"* [lib{i}](url) - Desc." for i in range(10))
        assert extract_preview(content) == "lib0, lib1, lib2, lib3"
    def test_empty(self):
        assert extract_preview("") == ""
    def test_skips_subcategory_labels(self):
        content = "* Synchronous\n* [django](url) - Framework.\n* [flask](url) - Micro."
        assert extract_preview(content) == "django, flask"
 # ---------------------------------------------------------------------------
 # render_content_html
 # ---------------------------------------------------------------------------
 class TestRenderContentHtml:
    def test_basic_entry(self):
        content = "* [django](https://example.com) - A web framework."
        html = render_content_html(content)
        assert 'href="https://example.com"' in html
        assert "django" in html
        assert "A web framework." in html
        assert 'class="entry"' in html
    def test_subcategory_label(self):
        content = "* Synchronous\n* [django](https://x.com) - Framework."
        html = render_content_html(content)
        assert 'class="subcat"' in html
        assert "Synchronous" in html
    def test_sub_entry(self):
        content = "* [django](https://x.com) - Framework.\n    * [awesome-django](https://y.com)"
        html = render_content_html(content)
        assert 'class="entry-sub"' in html
        assert "awesome-django" in html
    def test_link_only_entry(self):
        content = "* [tool](https://x.com)"
        html = render_content_html(content)
        assert 'href="https://x.com"' in html
        assert "tool" in html
 # ---------------------------------------------------------------------------
 # parse_readme
 # ---------------------------------------------------------------------------
 MINIMAL_README = textwrap.dedent("""\
    # Awesome Python
    Some intro text.
    ---
    ## Alpha
    _Libraries for alpha stuff._
    - [lib-a](https://example.com/a) - Does A.
    - [lib-b](https://example.com/b) - Does B.
    ## Beta
    _Tools for beta._
    - [lib-c](https://example.com/c) - Does C.
    # Resources
    Where to discover resources.
    ## Newsletters
    - [News One](https://example.com/n1)
    - [News Two](https://example.com/n2)
    ## Podcasts
    - [Pod One](https://example.com/p1)
    # Contributing
    Please contribute!
 """)
 class TestParseReadme:
    def test_category_count(self):
        cats, resources = parse_readme(MINIMAL_README)
        assert len(cats) == 2
    def test_resource_count(self):
        cats, resources = parse_readme(MINIMAL_README)
        assert len(resources) == 2
    def test_category_names(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert cats[0]["name"] == "Alpha"
        assert cats[1]["name"] == "Beta"
    def test_category_slugs(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert cats[0]["slug"] == "alpha"
        assert cats[1]["slug"] == "beta"
    def test_category_description(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert cats[0]["description"] == "Libraries for alpha stuff."
        assert cats[1]["description"] == "Tools for beta."
    def test_category_content_has_entries(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert "lib-a" in cats[0]["content"]
        assert "lib-b" in cats[0]["content"]
    def test_resources_names(self):
        _, resources = parse_readme(MINIMAL_README)
        assert resources[0]["name"] == "Newsletters"
        assert resources[1]["name"] == "Podcasts"
    def test_resources_content(self):
        _, resources = parse_readme(MINIMAL_README)
        assert "News One" in resources[0]["content"]
        assert "Pod One" in resources[1]["content"]
    def test_contributing_skipped(self):
        cats, resources = parse_readme(MINIMAL_README)
        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
        assert "Contributing" not in all_names
    def test_no_separator(self):
        cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
        assert cats == []
        assert resources == []
    def test_no_description(self):
        readme = textwrap.dedent("""\
            # Title
            ---
            ## NullDesc
            - [item](https://x.com) - Thing.
            # Resources
            ## Tips
            - [tip](https://x.com)
            # Contributing
            Done.
        """)
        cats, resources = parse_readme(readme)
        assert cats[0]["description"] == ""
        assert "item" in cats[0]["content"]
 # ---------------------------------------------------------------------------
 # parse_readme on real README
 # ---------------------------------------------------------------------------
 class TestParseRealReadme:
    @pytest.fixture(autouse=True)
    def load_readme(self):
        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
        with open(readme_path, encoding="utf-8") as f:
            self.readme_text = f.read()
        self.cats, self.resources = parse_readme(self.readme_text)
    def test_at_least_83_categories(self):
        assert len(self.cats) >= 83
    def test_resources_has_newsletters_and_podcasts(self):
        names = [r["name"] for r in self.resources]
        assert "Newsletters" in names
        assert "Podcasts" in names
    def test_contributing_not_in_results(self):
        all_names = [c["name"] for c in self.cats] + [
            r["name"] for r in self.resources
        ]
        assert "Contributing" not in all_names
    def test_first_category_is_admin_panels(self):
        assert self.cats[0]["name"] == "Admin Panels"
        assert self.cats[0]["slug"] == "admin-panels"
    def test_last_category_is_wsgi_servers(self):
        assert self.cats[-1]["name"] == "WSGI Servers"
        assert self.cats[-1]["slug"] == "wsgi-servers"
    def test_restful_api_slug(self):
        slugs = [c["slug"] for c in self.cats]
        assert "restful-api" in slugs
    def test_descriptions_extracted(self):
        admin = self.cats[0]
        assert admin["description"] == "Libraries for administrative interfaces."
 # ---------------------------------------------------------------------------
 # group_categories
 # ---------------------------------------------------------------------------
@@ -318,26 +71,6 @@ class TestGroupCategories:
        assert "Resources" in group_names
 # ---------------------------------------------------------------------------
 # render_markdown (kept for compatibility)
 # ---------------------------------------------------------------------------
 class TestRenderMarkdown:
    def test_renders_link_list(self):
        from build import render_markdown
        html = render_markdown("- [lib](https://example.com) - Does stuff.")
        assert "<li>" in html
        assert '<a href="https://example.com">lib</a>' in html
    def test_renders_plain_text(self):
        from build import render_markdown
        html = render_markdown("Hello world")
        assert "<p>Hello world</p>" in html
 # ---------------------------------------------------------------------------
 # build (integration)
 # ---------------------------------------------------------------------------
@@ -413,27 +146,6 @@ class TestBuild:
        # No category sub-pages
        assert not (site / "categories").exists()
    def test_build_creates_cname(self, tmp_path):
        readme = textwrap.dedent("""\
            # T
            ---
            ## Only
            - [x](https://x.com) - X.
            # Contributing
            Done.
        """)
        self._make_repo(tmp_path, readme)
        build(str(tmp_path))
        cname = tmp_path / "website" / "output" / "CNAME"
        assert cname.exists()
        assert "awesome-python.com" in cname.read_text()
    def test_build_cleans_stale_output(self, tmp_path):
        readme = textwrap.dedent("""\
            # T
@@ -8,7 +8,6 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from fetch_github_stars import (
    build_graphql_query,
    extract_github_repos,
    load_cache,
    parse_graphql_response,
    save_cache,
 )
@@ -65,27 +64,6 @@ class TestExtractGithubRepos:
        assert result == {"org/repo"}
 class TestLoadCache:
    def test_returns_empty_when_missing(self, tmp_path, monkeypatch):
        monkeypatch.setattr("fetch_github_stars.CACHE_FILE", tmp_path / "nonexistent.json")
        result = load_cache()
        assert result == {}
    def test_loads_valid_cache(self, tmp_path, monkeypatch):
        cache_file = tmp_path / "stars.json"
        cache_file.write_text('{"a/b": {"stars": 1}}', encoding="utf-8")
        monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
        result = load_cache()
        assert result == {"a/b": {"stars": 1}}
    def test_returns_empty_on_corrupt_json(self, tmp_path, monkeypatch):
        cache_file = tmp_path / "stars.json"
        cache_file.write_text("not json", encoding="utf-8")
        monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
        result = load_cache()
        assert result == {}
 class TestSaveCache:
    def test_creates_directory_and_writes_json(self, tmp_path, monkeypatch):
        data_dir = tmp_path / "data"
@@ -0,0 +1,424 @@
 """Tests for the readme_parser module."""
 import os
 import textwrap
 import pytest
 from readme_parser import (
    _parse_section_entries,
    _render_section_html,
    parse_readme,
    render_inline_html,
    render_inline_text,
 )
 from markdown_it import MarkdownIt
 from markdown_it.tree import SyntaxTreeNode
 def _parse_inline(md_text: str) -> list[SyntaxTreeNode]:
    """Helper: parse a single paragraph and return its inline children."""
    md = MarkdownIt("commonmark")
    root = SyntaxTreeNode(md.parse(md_text))
    # root > paragraph > inline > children
    return root.children[0].children[0].children
 class TestRenderInlineHtml:
    def test_plain_text_escapes_html(self):
        children = _parse_inline("Hello <world> & friends")
        assert render_inline_html(children) == "Hello &lt;world&gt; &amp; friends"
    def test_link_with_target(self):
        children = _parse_inline("[name](https://example.com)")
        html = render_inline_html(children)
        assert 'href="https://example.com"' in html
        assert 'target="_blank"' in html
        assert 'rel="noopener"' in html
        assert ">name</a>" in html
    def test_emphasis(self):
        children = _parse_inline("*italic* text")
        assert "<em>italic</em>" in render_inline_html(children)
    def test_strong(self):
        children = _parse_inline("**bold** text")
        assert "<strong>bold</strong>" in render_inline_html(children)
    def test_code_inline(self):
        children = _parse_inline("`some code`")
        assert "<code>some code</code>" in render_inline_html(children)
    def test_mixed_link_and_text(self):
        children = _parse_inline("See [foo](https://x.com) for details.")
        html = render_inline_html(children)
        assert "See " in html
        assert ">foo</a>" in html
        assert " for details." in html
 class TestRenderInlineText:
    def test_plain_text(self):
        children = _parse_inline("Hello world")
        assert render_inline_text(children) == "Hello world"
    def test_link_becomes_text(self):
        children = _parse_inline("See [awesome-algos](https://github.com/x/y).")
        assert render_inline_text(children) == "See awesome-algos."
    def test_emphasis_stripped(self):
        children = _parse_inline("*italic* text")
        assert render_inline_text(children) == "italic text"
    def test_code_inline_kept(self):
        children = _parse_inline("`code` here")
        assert render_inline_text(children) == "code here"
 MINIMAL_README = textwrap.dedent("""\
    # Awesome Python
    Some intro text.
    ---
    ## Alpha
    _Libraries for alpha stuff._
    - [lib-a](https://example.com/a) - Does A.
    - [lib-b](https://example.com/b) - Does B.
    ## Beta
    _Tools for beta._
    - [lib-c](https://example.com/c) - Does C.
    # Resources
    Where to discover resources.
    ## Newsletters
    - [News One](https://example.com/n1)
    - [News Two](https://example.com/n2)
    ## Podcasts
    - [Pod One](https://example.com/p1)
    # Contributing
    Please contribute!
 """)
 class TestParseReadmeSections:
    def test_category_count(self):
        cats, resources = parse_readme(MINIMAL_README)
        assert len(cats) == 2
    def test_resource_count(self):
        cats, resources = parse_readme(MINIMAL_README)
        assert len(resources) == 2
    def test_category_names(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert cats[0]["name"] == "Alpha"
        assert cats[1]["name"] == "Beta"
    def test_category_slugs(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert cats[0]["slug"] == "alpha"
        assert cats[1]["slug"] == "beta"
    def test_category_description(self):
        cats, _ = parse_readme(MINIMAL_README)
        assert cats[0]["description"] == "Libraries for alpha stuff."
        assert cats[1]["description"] == "Tools for beta."
    def test_resource_names(self):
        _, resources = parse_readme(MINIMAL_README)
        assert resources[0]["name"] == "Newsletters"
        assert resources[1]["name"] == "Podcasts"
    def test_contributing_skipped(self):
        cats, resources = parse_readme(MINIMAL_README)
        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
        assert "Contributing" not in all_names
    def test_no_separator(self):
        cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
        assert cats == []
        assert resources == []
    def test_no_description(self):
        readme = textwrap.dedent("""\
            # Title
            ---
            ## NullDesc
            - [item](https://x.com) - Thing.
            # Resources
            ## Tips
            - [tip](https://x.com)
            # Contributing
            Done.
        """)
        cats, resources = parse_readme(readme)
        assert cats[0]["description"] == ""
        assert cats[0]["entries"][0]["name"] == "item"
    def test_description_with_link_stripped(self):
        readme = textwrap.dedent("""\
            # T
            ---
            ## Algos
            _Algorithms. Also see [awesome-algos](https://example.com)._
            - [lib](https://x.com) - Lib.
            # Contributing
            Done.
        """)
        cats, _ = parse_readme(readme)
        assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
 def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
    """Helper: parse markdown and return all block nodes."""
    md = MarkdownIt("commonmark")
    root = SyntaxTreeNode(md.parse(md_text))
    return root.children
 class TestParseSectionEntries:
    def test_flat_entries(self):
        nodes = _content_nodes(
            "- [django](https://example.com/d) - A web framework.\n"
            "- [flask](https://example.com/f) - A micro framework.\n"
        )
        entries = _parse_section_entries(nodes)
        assert len(entries) == 2
        assert entries[0]["name"] == "django"
        assert entries[0]["url"] == "https://example.com/d"
        assert "web framework" in entries[0]["description"]
        assert entries[0]["also_see"] == []
        assert entries[1]["name"] == "flask"
    def test_link_only_entry(self):
        nodes = _content_nodes("- [tool](https://x.com)\n")
        entries = _parse_section_entries(nodes)
        assert len(entries) == 1
        assert entries[0]["name"] == "tool"
        assert entries[0]["description"] == ""
    def test_subcategorized_entries(self):
        nodes = _content_nodes(
            "- Algorithms\n"
            "  - [algos](https://x.com/a) - Algo lib.\n"
            "  - [sorts](https://x.com/s) - Sort lib.\n"
            "- Design Patterns\n"
            "  - [patterns](https://x.com/p) - Pattern lib.\n"
        )
        entries = _parse_section_entries(nodes)
        assert len(entries) == 3
        assert entries[0]["name"] == "algos"
        assert entries[2]["name"] == "patterns"
    def test_text_before_link_is_subcategory(self):
        nodes = _content_nodes(
            "- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
            "  - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
            "  - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
        )
        entries = _parse_section_entries(nodes)
        # awesome-mysql is a subcategory label, not an entry
        assert len(entries) == 2
        names = [e["name"] for e in entries]
        assert "awesome-mysql" not in names
        assert "mysqlclient" in names
        assert "pymysql" in names
    def test_also_see_sub_entries(self):
        nodes = _content_nodes(
            "- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"
            "  - [awesome-asyncio](https://github.com/timofurrer/awesome-asyncio)\n"
            "- [trio](https://github.com/python-trio/trio) - Friendly async.\n"
        )
        entries = _parse_section_entries(nodes)
        assert len(entries) == 2
        assert entries[0]["name"] == "asyncio"
        assert len(entries[0]["also_see"]) == 1
        assert entries[0]["also_see"][0]["name"] == "awesome-asyncio"
        assert entries[1]["name"] == "trio"
        assert entries[1]["also_see"] == []
    def test_entry_count_includes_also_see(self):
        readme = textwrap.dedent("""\
            # T
            ---
            ## Async
            - [asyncio](https://x.com) - Async I/O.
              - [awesome-asyncio](https://y.com)
            - [trio](https://z.com) - Friendly async.
            # Contributing
            Done.
        """)
        cats, _ = parse_readme(readme)
        # 2 main entries + 1 also_see = 3
        assert cats[0]["entry_count"] == 3
    def test_preview_first_four_names(self):
        readme = textwrap.dedent("""\
            # T
            ---
            ## Libs
            - [alpha](https://x.com) - A.
            - [beta](https://x.com) - B.
            - [gamma](https://x.com) - C.
            - [delta](https://x.com) - D.
            - [epsilon](https://x.com) - E.
            # Contributing
            Done.
        """)
        cats, _ = parse_readme(readme)
        assert cats[0]["preview"] == "alpha, beta, gamma, delta"
    def test_description_html_escapes_xss(self):
        nodes = _content_nodes('- [lib](https://x.com) - A <script>alert(1)</script> lib.\n')
        entries = _parse_section_entries(nodes)
        assert "<script>" not in entries[0]["description"]
        assert "&lt;script&gt;" in entries[0]["description"]
 class TestRenderSectionHtml:
    def test_basic_entry(self):
        nodes = _content_nodes("- [django](https://example.com) - A web framework.\n")
        html = _render_section_html(nodes)
        assert 'class="entry"' in html
        assert 'href="https://example.com"' in html
        assert "django" in html
        assert "A web framework." in html
    def test_subcategory_label(self):
        nodes = _content_nodes(
            "- Synchronous\n  - [django](https://x.com) - Framework.\n"
        )
        html = _render_section_html(nodes)
        assert 'class="subcat"' in html
        assert "Synchronous" in html
        assert 'class="entry"' in html
    def test_sub_entry(self):
        nodes = _content_nodes(
            "- [django](https://x.com) - Framework.\n"
            "  - [awesome-django](https://y.com)\n"
        )
        html = _render_section_html(nodes)
        assert 'class="entry-sub"' in html
        assert "awesome-django" in html
    def test_link_only_entry(self):
        nodes = _content_nodes("- [tool](https://x.com)\n")
        html = _render_section_html(nodes)
        assert 'class="entry"' in html
        assert 'href="https://x.com"' in html
        assert "tool" in html
    def test_xss_escaped_in_name(self):
        nodes = _content_nodes('- [<img onerror=alert(1)>](https://x.com) - Bad.\n')
        html = _render_section_html(nodes)
        assert "onerror" not in html or "&" in html
    def test_xss_escaped_in_subcat(self):
        nodes = _content_nodes("- <script>alert(1)</script>\n")
        html = _render_section_html(nodes)
        assert "<script>" not in html
 class TestParseRealReadme:
    @pytest.fixture(autouse=True)
    def load_readme(self):
        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
        with open(readme_path, encoding="utf-8") as f:
            self.readme_text = f.read()
        self.cats, self.resources = parse_readme(self.readme_text)
    def test_at_least_83_categories(self):
        assert len(self.cats) >= 83
    def test_resources_has_newsletters_and_podcasts(self):
        names = [r["name"] for r in self.resources]
        assert "Newsletters" in names
        assert "Podcasts" in names
    def test_contributing_not_in_results(self):
        all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
        assert "Contributing" not in all_names
    def test_first_category_is_admin_panels(self):
        assert self.cats[0]["name"] == "Admin Panels"
        assert self.cats[0]["slug"] == "admin-panels"
    def test_last_category_is_wsgi_servers(self):
        assert self.cats[-1]["name"] == "WSGI Servers"
        assert self.cats[-1]["slug"] == "wsgi-servers"
    def test_restful_api_slug(self):
        slugs = [c["slug"] for c in self.cats]
        assert "restful-api" in slugs
    def test_descriptions_extracted(self):
        admin = self.cats[0]
        assert admin["description"] == "Libraries for administrative interfaces."
    def test_entry_counts_nonzero(self):
        for cat in self.cats:
            assert cat["entry_count"] > 0, f"{cat['name']} has 0 entries"
    def test_previews_nonempty(self):
        for cat in self.cats:
            assert cat["preview"], f"{cat['name']} has empty preview"
    def test_content_html_nonempty(self):
        for cat in self.cats:
            assert cat["content_html"], f"{cat['name']} has empty content_html"
    def test_algorithms_has_subcategories(self):
        algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
        assert 'class="subcat"' in algos["content_html"]
    def test_async_has_also_see(self):
        async_cat = next(c for c in self.cats if c["name"] == "Asynchronous Programming")
        asyncio_entry = next(e for e in async_cat["entries"] if e["name"] == "asyncio")
        assert len(asyncio_entry["also_see"]) >= 1
        assert asyncio_entry["also_see"][0]["name"] == "awesome-asyncio"
    def test_description_links_stripped_to_text(self):
        algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
        assert "awesome-algorithms" in algos["description"]
        assert "https://" not in algos["description"]