Merge pull request #2971 from vinta/feature/markdown-it-py-parser

feat: replace regex README parser with markdown-it-py AST parser
2026-05-11 10:17:34 +08:00 · 2026-03-18 20:35:06 +08:00
parent c5caa5a5e1 280f250ce0
commit 539edc4e20
15 changed files with 2529 additions and 1443 deletions
@@ -1,17 +1,24 @@
 -include .env
 export

-site_install:
-	uv sync --no-dev
+install:
+	uv sync

-site_fetch_stats:
+fetch_stats:
 	uv run python website/fetch_github_stars.py

-site_build:
+test:
+	uv run pytest website/tests/ -v
+
+build:
 	uv run python website/build.py

-site_preview: site_build
-	python -m http.server -d website/output/ 8000
-
-site_deploy: site_build
-	@echo "Deploy via GitHub Actions (push to master)"
+preview: build
+	@echo "Check the website on http://localhost:8000"
+	uv run watchmedo shell-command \
+		--patterns='*.md;*.html;*.css;*.js;*.py' \
+		--recursive \
+		--wait --drop \
+		--command='uv run python website/build.py' \
+		README.md website/templates website/static website/data & \
+	python -m http.server -b 127.0.0.1 -d website/output/ 8000
@@ -2,22 +2,30 @@
 name = "awesome-python"
 version = "0.1.0"
 description = "An opinionated list of awesome Python frameworks, libraries, software and resources."
+authors = [{ name = "Vinta Chen", email = "vinta.chen@gmail.com" }]
+readme = "README.md"
+license = "MIT"
 requires-python = ">=3.13"
-dependencies = [
-    "httpx==0.28.1",
-    "jinja2==3.1.6",
-    "markdown==3.10.2",
-]
+dependencies = []
+
+[project.urls]
+Homepage = "https://awesome-python.com/"
+Repository = "https://github.com/vinta/awesome-python"

 [dependency-groups]
+build = ["httpx==0.28.1", "jinja2==3.1.6", "markdown-it-py==4.0.0"]
+lint = ["ruff==0.15.6"]
+test = ["pytest==9.0.2"]
 dev = [
-    "pytest==9.0.2",
-    "ruff==0.15.6",
+    { include-group = "build" },
+    { include-group = "lint" },
+    { include-group = "test" },
+    "watchdog==6.0.0",
 ]

 [tool.pytest.ini_options]
 testpaths = ["website/tests"]
+pythonpath = ["website"]

 [tool.ruff]
-target-version = "py313"
-line-length = 100
+line-length = 200
@@ -18,30 +18,46 @@ wheels = [
 name = "awesome-python"
 version = "0.1.0"
 source = { virtual = "." }
-dependencies = [
-    { name = "httpx" },
-    { name = "jinja2" },
-    { name = "markdown" },
-]

 [package.dev-dependencies]
+build = [
+    { name = "httpx" },
+    { name = "jinja2" },
+    { name = "markdown-it-py" },
+]
 dev = [
+    { name = "httpx" },
+    { name = "jinja2" },
+    { name = "markdown-it-py" },
    { name = "pytest" },
    { name = "ruff" },
+    { name = "watchdog" },
+]
+lint = [
+    { name = "ruff" },
+]
+test = [
+    { name = "pytest" },
 ]

 [package.metadata]
-requires-dist = [
-    { name = "httpx", specifier = "==0.28.1" },
-    { name = "jinja2", specifier = "==3.1.6" },
-    { name = "markdown", specifier = "==3.10.2" },
-]

 [package.metadata.requires-dev]
+build = [
+    { name = "httpx", specifier = "==0.28.1" },
+    { name = "jinja2", specifier = "==3.1.6" },
+    { name = "markdown-it-py", specifier = "==4.0.0" },
+]
 dev = [
+    { name = "httpx", specifier = "==0.28.1" },
+    { name = "jinja2", specifier = "==3.1.6" },
+    { name = "markdown-it-py", specifier = "==4.0.0" },
    { name = "pytest", specifier = "==9.0.2" },
    { name = "ruff", specifier = "==0.15.6" },
+    { name = "watchdog", specifier = "==6.0.0" },
 ]
+lint = [{ name = "ruff", specifier = "==0.15.6" }]
+test = [{ name = "pytest", specifier = "==9.0.2" }]

 [[package]]
 name = "certifi"
@@ -129,12 +145,15 @@ wheels = [
 ]

 [[package]]
-name = "markdown"
-version = "3.10.2"
+name = "markdown-it-py"
+version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" },
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
 ]

 [[package]]
@@ -189,6 +208,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]

+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -256,3 +284,24 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
    { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
 ]
+
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" },
+    { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" },
+    { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" },
+    { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" },
+    { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" },
+    { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" },
+]
@@ -7,9 +7,10 @@ import shutil
 from pathlib import Path
 from typing import TypedDict

-import markdown
 from jinja2 import Environment, FileSystemLoader

+from readme_parser import parse_readme, slugify
+
 # Thematic grouping of categories. Each category name must match exactly
 # as it appears in README.md (the ## heading text).
 SECTION_GROUPS: list[tuple[str, list[str]]] = [
@@ -67,217 +68,6 @@ SECTION_GROUPS: list[tuple[str, list[str]]] = [
 ]


-def slugify(name: str) -> str:
-    """Convert a category name to a URL-friendly slug."""
-    slug = name.lower()
-    slug = re.sub(r"[^a-z0-9\s-]", "", slug)
-    slug = re.sub(r"[\s]+", "-", slug.strip())
-    slug = re.sub(r"-+", "-", slug)
-    return slug
-
-
-def count_entries(content: str) -> int:
-    """Count library entries (lines starting with * [ or - [) in a content block."""
-    return sum(1 for line in content.split("\n") if re.match(r"\s*[-*]\s+\[", line))
-
-
-def extract_preview(content: str, *, max_names: int = 4) -> str:
-    """Extract first N main library names from markdown content for preview text.
-
-    Only includes top-level or single-indent entries (indent <= 3 spaces),
-    skipping subcategory labels (items without links) and deep sub-entries.
-    """
-    names = []
-    for m in re.finditer(r"^(\s*)[-*]\s+\[([^\]]+)\]", content, re.MULTILINE):
-        indent_len = len(m.group(1))
-        if indent_len > 3:
-            continue
-        names.append(m.group(2))
-        if len(names) >= max_names:
-            break
-    return ", ".join(names)
-
-
-def render_content_html(content: str) -> str:
-    """Render category markdown content to HTML with subcategory detection.
-
-    Lines that are list items without links (e.g., "- Synchronous") are
-    treated as subcategory headers and rendered as bold dividers.
-
-    Indent levels in the README:
-    - 0 spaces: top-level entry or subcategory label
-    - 2 spaces: entry under a subcategory (still a main entry)
-    - 4+ spaces: sub-entry (e.g., awesome-django under django)
-    """
-    lines = content.split("\n")
-    out: list[str] = []
-
-    for line in lines:
-        stripped = line.strip()
-        indent_len = len(line) - len(line.lstrip())
-
-        # Detect subcategory labels: list items without links
-        m = re.match(r"^[-*]\s+(.+)$", stripped)
-        if m and "[" not in stripped:
-            label = m.group(1)
-            out.append(f'<div class="subcat">{label}</div>')
-            continue
-
-        # Entry with link and description: * [name](url) - Description.
-        m = re.match(
-            r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*[-\u2013\u2014]\s*(.+)$",
-            line,
-        )
-        if m:
-            name, url, desc = m.groups()
-            if indent_len > 3:
-                out.append(
-                    f'<div class="entry-sub">'
-                    f'<a href="{url}">{name}</a>'
-                    f"</div>"
-                )
-            else:
-                out.append(
-                    f'<div class="entry">'
-                    f'<a href="{url}">{name}</a>'
-                    f'<span class="sep">&mdash;</span>{desc}'
-                    f"</div>"
-                )
-            continue
-
-        # Link-only entry (no description): * [name](url)
-        m = re.match(r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
-        if m:
-            name, url = m.groups()
-            if indent_len > 3:
-                out.append(
-                    f'<div class="entry-sub">'
-                    f'<a href="{url}">{name}</a>'
-                    f"</div>"
-                )
-            else:
-                out.append(
-                    f'<div class="entry">'
-                    f'<a href="{url}">{name}</a>'
-                    f"</div>"
-                )
-            continue
-
-    return "\n".join(out)
-
-
-def parse_readme(text: str) -> tuple[list[dict], list[dict]]:
-    """Parse README.md text into categories and resources.
-
-    Returns:
-        (categories, resources) where each is a list of dicts with keys:
-        name, slug, description, content
-    """
-    lines = text.split("\n")
-
-    separator_idx = None
-    for i, line in enumerate(lines):
-        if line.strip() == "---" and i > 0:
-            separator_idx = i
-            break
-
-    if separator_idx is None:
-        return [], []
-
-    resources_idx = None
-    contributing_idx = None
-    for i, line in enumerate(lines):
-        if line.strip() == "# Resources":
-            resources_idx = i
-        elif line.strip() == "# Contributing":
-            contributing_idx = i
-
-    cat_end = resources_idx if resources_idx is not None else len(lines)
-    category_lines = lines[separator_idx + 1 : cat_end]
-
-    resource_lines = []
-    if resources_idx is not None:
-        res_end = contributing_idx if contributing_idx is not None else len(lines)
-        resource_lines = lines[resources_idx:res_end]
-
-    categories = _extract_sections(category_lines, level=2)
-    resources = _extract_sections(resource_lines, level=2)
-
-    return categories, resources
-
-
-def _extract_sections(lines: list[str], *, level: int) -> list[dict]:
-    """Extract ## sections from a block of lines."""
-    prefix = "#" * level + " "
-    sections = []
-    current_name = None
-    current_lines: list[str] = []
-
-    for line in lines:
-        if line.startswith(prefix) and not line.startswith(prefix + "#"):
-            if current_name is not None:
-                sections.append(_build_section(current_name, current_lines))
-            current_name = line[len(prefix) :].strip()
-            current_lines = []
-        elif current_name is not None:
-            current_lines.append(line)
-
-    if current_name is not None:
-        sections.append(_build_section(current_name, current_lines))
-
-    return sections
-
-
-def _build_section(name: str, lines: list[str]) -> dict:
-    """Build a section dict from a name and its content lines."""
-    while lines and not lines[0].strip():
-        lines = lines[1:]
-    while lines and not lines[-1].strip():
-        lines = lines[:-1]
-
-    description = ""
-    content_lines = lines
-    if lines:
-        m = re.match(r"^_(.+)_$", lines[0].strip())
-        if m:
-            description = m.group(1)
-            content_lines = lines[1:]
-            while content_lines and not content_lines[0].strip():
-                content_lines = content_lines[1:]
-
-    content = "\n".join(content_lines).strip()
-
-    return {
-        "name": name,
-        "slug": slugify(name),
-        "description": description,
-        "content": content,
-    }
-
-
-def render_markdown(text: str) -> str:
-    """Render markdown text to HTML."""
-    md = markdown.Markdown(extensions=["extra"])
-    return md.convert(text)
-
-
-def strip_markdown_links(text: str) -> str:
-    """Replace [text](url) with just text for plain-text contexts."""
-    return re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
-
-
-def render_inline_markdown(text: str) -> str:
-    """Render inline markdown (links, bold, italic) to HTML."""
-    from markupsafe import Markup
-
-    html = markdown.markdown(text)
-    # Strip wrapping <p>...</p> since this is inline content
-    html = re.sub(r"^<p>(.*)</p>$", r"\1", html.strip())
-    # Add target/rel to links for external navigation
-    html = html.replace("<a ", '<a target="_blank" rel="noopener" ')
-    return Markup(html)
-
-
 def group_categories(
    categories: list[dict],
    resources: list[dict],
@@ -285,10 +75,11 @@ def group_categories(
    """Organize categories and resources into thematic section groups."""
    cat_by_name = {c["name"]: c for c in categories}
    groups = []
+    grouped_names: set[str] = set()

    for group_name, cat_names in SECTION_GROUPS:
+        grouped_names.update(cat_names)
        if group_name == "Resources":
-            # Resources group uses parsed resources directly
            group_cats = list(resources)
        else:
            group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
@@ -301,9 +92,6 @@ def group_categories(
            })

    # Any categories not in a group go into "Other"
-    grouped_names = set()
-    for _, cat_names in SECTION_GROUPS:
-        grouped_names.update(cat_names)
    ungrouped = [c for c in categories if c["name"] not in grouped_names]
    if ungrouped:
        groups.append({
@@ -323,13 +111,13 @@ class Entry(TypedDict):
    group: str
    stars: int | None
    owner: str | None
-    pushed_at: str | None
+    last_commit_at: str | None


 class StarData(TypedDict):
    stars: int
    owner: str
-    pushed_at: str
+    last_commit_at: str
    fetched_at: str


@@ -367,7 +155,6 @@ def sort_entries(entries: list[dict]) -> list[dict]:

 def extract_entries(
    categories: list[dict],
-    resources: list[dict],
    groups: list[dict],
 ) -> list[dict]:
    """Flatten categories into individual library entries for table display."""
@@ -379,38 +166,18 @@ def extract_entries(
    entries: list[dict] = []
    for cat in categories:
        group_name = cat_to_group.get(cat["name"], "Other")
-        last_entry_indent = -1
-        for line in cat["content"].split("\n"):
-            indent_len = len(line) - len(line.lstrip())
-
-            # Link-only sub-item deeper than parent → "also see"
-            m_sub = re.match(r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
-            if m_sub and indent_len > last_entry_indent >= 0 and entries:
-                entries[-1]["also_see"].append({
-                    "name": m_sub.group(1),
-                    "url": m_sub.group(2),
-                })
-                continue
-
-            if indent_len > 3:
-                continue
-            m = re.match(
-                r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*(?:[-\u2013\u2014]\s*(.+))?$",
-                line,
-            )
-            if m:
-                last_entry_indent = indent_len
-                entries.append({
-                    "name": m.group(1),
-                    "url": m.group(2),
-                    "description": render_inline_markdown(m.group(3)) if m.group(3) else "",
-                    "category": cat["name"],
-                    "group": group_name,
-                    "stars": None,
-                    "owner": None,
-                    "pushed_at": None,
-                    "also_see": [],
-                })
+        for entry in cat["entries"]:
+            entries.append({
+                "name": entry["name"],
+                "url": entry["url"],
+                "description": entry["description"],
+                "category": cat["name"],
+                "group": group_name,
+                "stars": None,
+                "owner": None,
+                "last_commit_at": None,
+                "also_see": entry["also_see"],
+            })
    return entries


@@ -420,7 +187,6 @@ def build(repo_root: str) -> None:
    website = repo / "website"
    readme_text = (repo / "README.md").read_text(encoding="utf-8")

-    # Extract subtitle from the first non-empty, non-heading line
    subtitle = ""
    for line in readme_text.split("\n"):
        stripped = line.strip()
@@ -429,47 +195,33 @@ def build(repo_root: str) -> None:
            break

    categories, resources = parse_readme(readme_text)
-
-    # Enrich with entry counts, rendered HTML, previews, and clean descriptions
-    for cat in categories + resources:
-        cat["entry_count"] = count_entries(cat["content"])
-        cat["content_html"] = render_content_html(cat["content"])
-        cat["preview"] = extract_preview(cat["content"])
-        cat["description"] = strip_markdown_links(cat["description"])
+    # All fields pre-computed: entry_count, content_html, preview, description

    total_entries = sum(c["entry_count"] for c in categories)
-
-    # Organize into groups
    groups = group_categories(categories, resources)
+    entries = extract_entries(categories, groups)

-    # Flatten entries for table view
-    entries = extract_entries(categories, resources, groups)
-
-    # Load and merge GitHub star data
    stars_data = load_stars(website / "data" / "github_stars.json")
    for entry in entries:
        repo_key = extract_github_repo(entry["url"])
        if repo_key and repo_key in stars_data:
-            entry["stars"] = stars_data[repo_key]["stars"]
-            entry["owner"] = stars_data[repo_key]["owner"]
-            entry["pushed_at"] = stars_data[repo_key].get("pushed_at", "")
+            sd = stars_data[repo_key]
+            entry["stars"] = sd["stars"]
+            entry["owner"] = sd["owner"]
+            entry["last_commit_at"] = sd.get("last_commit_at", "")

-    # Sort by stars descending
    entries = sort_entries(entries)

-    # Set up Jinja2
    env = Environment(
        loader=FileSystemLoader(website / "templates"),
        autoescape=True,
    )

-    # Output directory
    site_dir = website / "output"
    if site_dir.exists():
        shutil.rmtree(site_dir)
    site_dir.mkdir(parents=True)

-    # Generate single index.html
    tpl_index = env.get_template("index.html")
    (site_dir / "index.html").write_text(
        tpl_index.render(
@@ -484,14 +236,10 @@ def build(repo_root: str) -> None:
        encoding="utf-8",
    )

-    # Copy static assets
    static_src = website / "static"
    static_dst = site_dir / "static"
    if static_src.exists():
-        shutil.copytree(static_src, static_dst)
-
-    # Write CNAME
-    (site_dir / "CNAME").write_text("awesome-python.com\n", encoding="utf-8")
+        shutil.copytree(static_src, static_dst, dirs_exist_ok=True)

    print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
    print(f"Total entries: {total_entries}")
@@ -10,14 +10,14 @@ from pathlib import Path

 import httpx

-from build import extract_github_repo
+from build import extract_github_repo, load_stars

 CACHE_MAX_AGE_DAYS = 7
 DATA_DIR = Path(__file__).parent / "data"
 CACHE_FILE = DATA_DIR / "github_stars.json"
 README_PATH = Path(__file__).parent.parent / "README.md"
 GRAPHQL_URL = "https://api.github.com/graphql"
-BATCH_SIZE = 100
+BATCH_SIZE = 50


 def extract_github_repos(text: str) -> set[str]:
@@ -30,17 +30,6 @@ def extract_github_repos(text: str) -> set[str]:
    return repos


-def load_cache() -> dict:
-    """Load the star cache from disk. Returns empty dict if missing or corrupt."""
-    if CACHE_FILE.exists():
-        try:
-            return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
-        except json.JSONDecodeError:
-            print(f"Warning: corrupt cache at {CACHE_FILE}, starting fresh.", file=sys.stderr)
-            return {}
-    return {}
-
-
 def save_cache(cache: dict) -> None:
    """Write the star cache to disk, creating data/ dir if needed."""
    DATA_DIR.mkdir(parents=True, exist_ok=True)
@@ -61,7 +50,7 @@ def build_graphql_query(repos: list[str]) -> str:
            continue
        parts.append(
            f'repo_{i}: repository(owner: "{owner}", name: "{name}") '
-            f"{{ stargazerCount pushedAt owner {{ login }} }}"
+            f"{{ stargazerCount owner {{ login }} defaultBranchRef {{ target {{ ... on Commit {{ committedDate }} }} }} }}"
        )
    if not parts:
        return ""
@@ -78,10 +67,12 @@ def parse_graphql_response(
        node = data.get(f"repo_{i}")
        if node is None:
            continue
+        default_branch = node.get("defaultBranchRef") or {}
+        target = default_branch.get("target") or {}
        result[repo] = {
            "stars": node.get("stargazerCount", 0),
            "owner": node.get("owner", {}).get("login", ""),
-            "pushed_at": node.get("pushedAt", ""),
+            "last_commit_at": target.get("committedDate", ""),
        }
    return result

@@ -114,7 +105,7 @@ def main() -> None:
    current_repos = extract_github_repos(readme_text)
    print(f"Found {len(current_repos)} GitHub repos in README.md")

-    cache = load_cache()
+    cache = load_stars(CACHE_FILE)
    now = datetime.now(timezone.utc)

    # Prune entries not in current README
@@ -173,7 +164,7 @@ def main() -> None:
                    cache[repo] = {
                        "stars": results[repo]["stars"],
                        "owner": results[repo]["owner"],
-                        "pushed_at": results[repo]["pushed_at"],
+                        "last_commit_at": results[repo]["last_commit_at"],
                        "fetched_at": now_iso,
                    }
                    fetched_count += 1
@@ -0,0 +1,388 @@
+"""Parse README.md into structured section data using markdown-it-py AST."""
+
+from __future__ import annotations
+
+import re
+from typing import TypedDict
+
+from markdown_it import MarkdownIt
+from markdown_it.tree import SyntaxTreeNode
+from markupsafe import escape
+
+
+class AlsoSee(TypedDict):
+    name: str
+    url: str
+
+
+class ParsedEntry(TypedDict):
+    name: str
+    url: str
+    description: str  # inline HTML, properly escaped
+    also_see: list[AlsoSee]
+
+
+class ParsedSection(TypedDict):
+    name: str
+    slug: str
+    description: str  # plain text, links resolved to text
+    entries: list[ParsedEntry]
+    entry_count: int
+    preview: str
+    content_html: str  # rendered HTML, properly escaped
+
+
+# --- Slugify ----------------------------------------------------------------
+
+_SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
+_SLUG_WHITESPACE_RE = re.compile(r"[\s]+")
+_SLUG_MULTI_DASH_RE = re.compile(r"-+")
+
+
+def slugify(name: str) -> str:
+    """Convert a category name to a URL-friendly slug."""
+    slug = name.lower()
+    slug = _SLUG_NON_ALNUM_RE.sub("", slug)
+    slug = _SLUG_WHITESPACE_RE.sub("-", slug.strip())
+    slug = _SLUG_MULTI_DASH_RE.sub("-", slug)
+    return slug
+
+
+# --- Inline renderers -------------------------------------------------------
+
+
+def render_inline_html(children: list[SyntaxTreeNode]) -> str:
+    """Render inline AST nodes to HTML with proper escaping."""
+    parts: list[str] = []
+    for child in children:
+        match child.type:
+            case "text":
+                parts.append(str(escape(child.content)))
+            case "softbreak":
+                parts.append(" ")
+            case "link":
+                href = str(escape(child.attrGet("href") or ""))
+                inner = render_inline_html(child.children)
+                parts.append(
+                    f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>'
+                )
+            case "em":
+                parts.append(f"<em>{render_inline_html(child.children)}</em>")
+            case "strong":
+                parts.append(f"<strong>{render_inline_html(child.children)}</strong>")
+            case "code_inline":
+                parts.append(f"<code>{escape(child.content)}</code>")
+            case "html_inline":
+                parts.append(str(escape(child.content)))
+    return "".join(parts)
+
+
+def render_inline_text(children: list[SyntaxTreeNode]) -> str:
+    """Render inline AST nodes to plain text (links become their text)."""
+    parts: list[str] = []
+    for child in children:
+        match child.type:
+            case "text":
+                parts.append(child.content)
+            case "softbreak":
+                parts.append(" ")
+            case "code_inline":
+                parts.append(child.content)
+            case "em" | "strong" | "link":
+                parts.append(render_inline_text(child.children))
+    return "".join(parts)
+
+
+# --- AST helpers -------------------------------------------------------------
+
+
+def _heading_text(node: SyntaxTreeNode) -> str:
+    """Extract plain text from a heading node."""
+    for child in node.children:
+        if child.type == "inline":
+            return render_inline_text(child.children)
+    return ""
+
+
+def _extract_description(nodes: list[SyntaxTreeNode]) -> str:
+    """Extract description from the first paragraph if it's a single <em> block.
+
+    Pattern: _Libraries for foo._ -> "Libraries for foo."
+    """
+    if not nodes:
+        return ""
+    first = nodes[0]
+    if first.type != "paragraph":
+        return ""
+    for child in first.children:
+        if child.type == "inline" and len(child.children) == 1:
+            em = child.children[0]
+            if em.type == "em":
+                return render_inline_text(em.children)
+    return ""
+
+
+# --- Entry extraction --------------------------------------------------------
+
+_DESC_SEP_RE = re.compile(r"^\s*[-\u2013\u2014]\s*")
+
+
+def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
+    """Find first direct child of a given type."""
+    for child in node.children:
+        if child.type == child_type:
+            return child
+    return None
+
+
+def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
+    """Find the inline node in a list_item's paragraph."""
+    para = _find_child(node, "paragraph")
+    if para is None:
+        return None
+    return _find_child(para, "inline")
+
+
+def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
+    """Find the first link node among inline children."""
+    for child in inline.children:
+        if child.type == "link":
+            return child
+    return None
+
+
+def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
+    """Check if the link is the first child of inline (a real entry, not a subcategory label)."""
+    return bool(inline.children) and inline.children[0] is link
+
+
+def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
+    """Extract description HTML from inline content after the first link.
+
+    AST: [link("name"), text(" - Description.")]  ->  "Description."
+    The separator (- / en-dash / em-dash) is stripped.
+    """
+    link_idx = next((i for i, c in enumerate(inline.children) if c is first_link), None)
+    if link_idx is None:
+        return ""
+    desc_children = inline.children[link_idx + 1 :]
+    if not desc_children:
+        return ""
+    html = render_inline_html(desc_children)
+    return _DESC_SEP_RE.sub("", html)
+
+
+def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
+    """Extract entries from a bullet_list AST node.
+
+    Handles three patterns:
+    - Text-only list_item -> subcategory label -> recurse into nested list
+    - Link list_item with nested link-only items -> entry with also_see
+    - Link list_item without nesting -> simple entry
+    """
+    entries: list[ParsedEntry] = []
+
+    for list_item in bullet_list.children:
+        if list_item.type != "list_item":
+            continue
+
+        inline = _find_inline(list_item)
+        if inline is None:
+            continue
+
+        first_link = _find_first_link(inline)
+
+        if first_link is None or not _is_leading_link(inline, first_link):
+            # Subcategory label (plain text or text-before-link) — recurse into nested list
+            nested = _find_child(list_item, "bullet_list")
+            if nested:
+                entries.extend(_parse_list_entries(nested))
+            continue
+
+        # Entry with a link
+        name = render_inline_text(first_link.children)
+        url = first_link.attrGet("href") or ""
+        desc_html = _extract_description_html(inline, first_link)
+
+        # Collect also_see from nested bullet_list
+        also_see: list[AlsoSee] = []
+        nested = _find_child(list_item, "bullet_list")
+        if nested:
+            for sub_item in nested.children:
+                if sub_item.type != "list_item":
+                    continue
+                sub_inline = _find_inline(sub_item)
+                if sub_inline:
+                    sub_link = _find_first_link(sub_inline)
+                    if sub_link:
+                        also_see.append(AlsoSee(
+                            name=render_inline_text(sub_link.children),
+                            url=sub_link.attrGet("href") or "",
+                        ))
+
+        entries.append(ParsedEntry(
+            name=name,
+            url=url,
+            description=desc_html,
+            also_see=also_see,
+        ))
+
+    return entries
+
+
+def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEntry]:
+    """Extract all entries from a section's content nodes."""
+    entries: list[ParsedEntry] = []
+    for node in content_nodes:
+        if node.type == "bullet_list":
+            entries.extend(_parse_list_entries(node))
+    return entries
+
+
+# --- Content HTML rendering --------------------------------------------------
+
+
+def _render_bullet_list_html(
+    bullet_list: SyntaxTreeNode,
+    *,
+    is_sub: bool = False,
+) -> str:
+    """Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
+    out: list[str] = []
+
+    for list_item in bullet_list.children:
+        if list_item.type != "list_item":
+            continue
+
+        inline = _find_inline(list_item)
+        if inline is None:
+            continue
+
+        first_link = _find_first_link(inline)
+
+        if first_link is None or not _is_leading_link(inline, first_link):
+            # Subcategory label (plain text or text-before-link)
+            label = str(escape(render_inline_text(inline.children)))
+            out.append(f'<div class="subcat">{label}</div>')
+            nested = _find_child(list_item, "bullet_list")
+            if nested:
+                out.append(_render_bullet_list_html(nested, is_sub=False))
+            continue
+
+        # Entry with a link
+        name = str(escape(render_inline_text(first_link.children)))
+        url = str(escape(first_link.attrGet("href") or ""))
+
+        if is_sub:
+            out.append(f'<div class="entry-sub"><a href="{url}">{name}</a></div>')
+        else:
+            desc = _extract_description_html(inline, first_link)
+            if desc:
+                out.append(
+                    f'<div class="entry"><a href="{url}">{name}</a>'
+                    f'<span class="sep">&mdash;</span>{desc}</div>'
+                )
+            else:
+                out.append(f'<div class="entry"><a href="{url}">{name}</a></div>')
+
+        # Nested items under an entry with a link are sub-entries
+        nested = _find_child(list_item, "bullet_list")
+        if nested:
+            out.append(_render_bullet_list_html(nested, is_sub=True))
+
+    return "\n".join(out)
+
+
+def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
+    """Render a section's content nodes to HTML."""
+    parts: list[str] = []
+    for node in content_nodes:
+        if node.type == "bullet_list":
+            parts.append(_render_bullet_list_html(node))
+    return "\n".join(parts)
+
+
+# --- Section splitting -------------------------------------------------------
+
+
+def _group_by_h2(
+    nodes: list[SyntaxTreeNode],
+) -> list[ParsedSection]:
+    """Group AST nodes into sections by h2 headings."""
+    sections: list[ParsedSection] = []
+    current_name: str | None = None
+    current_body: list[SyntaxTreeNode] = []
+
+    def flush() -> None:
+        nonlocal current_name
+        if current_name is None:
+            return
+        desc = _extract_description(current_body)
+        content_nodes = current_body[1:] if desc else current_body
+        entries = _parse_section_entries(content_nodes)
+        entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
+        preview = ", ".join(e["name"] for e in entries[:4])
+        content_html = _render_section_html(content_nodes)
+
+        sections.append(ParsedSection(
+            name=current_name,
+            slug=slugify(current_name),
+            description=desc,
+            entries=entries,
+            entry_count=entry_count,
+            preview=preview,
+            content_html=content_html,
+        ))
+        current_name = None
+
+    for node in nodes:
+        if node.type == "heading" and node.tag == "h2":
+            flush()
+            current_name = _heading_text(node)
+            current_body = []
+        elif current_name is not None:
+            current_body.append(node)
+
+    flush()
+    return sections
+
+
+def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
+    """Parse README.md text into categories and resources.
+
+    Returns (categories, resources) where each is a list of ParsedSection dicts.
+    """
+    md = MarkdownIt("commonmark")
+    tokens = md.parse(text)
+    root = SyntaxTreeNode(tokens)
+    children = root.children
+
+    # Find thematic break (---), # Resources, and # Contributing in one pass
+    hr_idx = None
+    resources_idx = None
+    contributing_idx = None
+    for i, node in enumerate(children):
+        if hr_idx is None and node.type == "hr":
+            hr_idx = i
+        elif node.type == "heading" and node.tag == "h1":
+            text_content = _heading_text(node)
+            if text_content == "Resources":
+                resources_idx = i
+            elif text_content == "Contributing":
+                contributing_idx = i
+    if hr_idx is None:
+        return [], []
+
+    # Slice into category and resource ranges
+    cat_end = resources_idx or contributing_idx or len(children)
+    cat_nodes = children[hr_idx + 1 : cat_end]
+
+    res_nodes: list[SyntaxTreeNode] = []
+    if resources_idx is not None:
+        res_end = contributing_idx or len(children)
+        res_nodes = children[resources_idx + 1 : res_end]
+
+    categories = _group_by_h2(cat_nodes)
+    resources = _group_by_h2(res_nodes)
+
+    return categories, resources
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
+  <path d="M8 2h16a6 6 0 0 1 6 6v8H2V8a6 6 0 0 1 6-6z" fill="#1d5fa6"/>
+  <path d="M2 16h28v8a6 6 0 0 1-6 6H8a6 6 0 0 1-6-6z" fill="#f0c73e"/>
+  <circle cx="11.5" cy="9.5" r="2.2" fill="#f0c73e"/>
+  <circle cx="20.5" cy="22.5" r="2.2" fill="#1d5fa6"/>
+</svg>
@@ -1,15 +1,44 @@
 // State
 var activeFilter = null; // { type: "cat"|"group", value: "..." }
+var activeSort = { col: 'stars', order: 'desc' };
 var searchInput = document.querySelector('.search');
 var filterBar = document.querySelector('.filter-bar');
 var filterValue = document.querySelector('.filter-value');
 var filterClear = document.querySelector('.filter-clear');
 var noResults = document.querySelector('.no-results');
-var countEl = document.querySelector('.count');
 var rows = document.querySelectorAll('.table tbody tr.row');
 var tags = document.querySelectorAll('.tag');
 var tbody = document.querySelector('.table tbody');

+// Relative time formatting
+function relativeTime(isoStr) {
+  var date = new Date(isoStr);
+  var now = new Date();
+  var diffMs = now - date;
+  var diffHours = Math.floor(diffMs / 3600000);
+  var diffDays = Math.floor(diffMs / 86400000);
+  if (diffHours < 1) return 'just now';
+  if (diffHours < 24) return diffHours === 1 ? '1 hour ago' : diffHours + ' hours ago';
+  if (diffDays === 1) return 'yesterday';
+  if (diffDays < 30) return diffDays + ' days ago';
+  var diffMonths = Math.floor(diffDays / 30);
+  if (diffMonths < 12) return diffMonths === 1 ? '1 month ago' : diffMonths + ' months ago';
+  var diffYears = Math.floor(diffDays / 365);
+  return diffYears === 1 ? '1 year ago' : diffYears + ' years ago';
+}
+
+// Format all commit date cells
+document.querySelectorAll('.col-commit[data-commit]').forEach(function (td) {
+  var time = td.querySelector('time');
+  if (time) time.textContent = relativeTime(td.dataset.commit);
+});
+
+// Store original row order for sort reset
+rows.forEach(function (row, i) {
+  row._origIndex = i;
+  row._expandRow = row.nextElementSibling;
+});
+
 function collapseAll() {
  var openRows = document.querySelectorAll('.table tbody tr.row.open');
  openRows.forEach(function (row) {
@@ -46,16 +75,18 @@ function applyFilters() {
      show = row._searchText.includes(query);
    }

-    row.hidden = !show;
+    if (row.hidden !== !show) row.hidden = !show;

    if (show) {
      visibleCount++;
-      row.querySelector('.col-num').textContent = String(visibleCount);
+      var numCell = row.cells[0];
+      if (numCell.textContent !== String(visibleCount)) {
+        numCell.textContent = String(visibleCount);
+      }
    }
  });

  if (noResults) noResults.hidden = visibleCount > 0;
-  if (countEl) countEl.textContent = visibleCount;

  // Update tag highlights
  tags.forEach(function (tag) {
@@ -74,6 +105,76 @@ function applyFilters() {
      filterBar.hidden = true;
    }
  }
+
+  updateURL();
+}
+
+function updateURL() {
+  var params = new URLSearchParams();
+  var query = searchInput ? searchInput.value.trim() : '';
+  if (query) params.set('q', query);
+  if (activeFilter) {
+    params.set(activeFilter.type === 'cat' ? 'category' : 'group', activeFilter.value);
+  }
+  if (activeSort.col !== 'stars' || activeSort.order !== 'desc') {
+    params.set('sort', activeSort.col);
+    params.set('order', activeSort.order);
+  }
+  var qs = params.toString();
+  history.replaceState(null, '', qs ? '?' + qs : location.pathname);
+}
+
+function getSortValue(row, col) {
+  if (col === 'name') {
+    return row.querySelector('.col-name a').textContent.trim().toLowerCase();
+  }
+  if (col === 'stars') {
+    var text = row.querySelector('.col-stars').textContent.trim().replace(/,/g, '');
+    var num = parseInt(text, 10);
+    return isNaN(num) ? -1 : num;
+  }
+  if (col === 'commit-time') {
+    var attr = row.querySelector('.col-commit').getAttribute('data-commit');
+    return attr ? new Date(attr).getTime() : 0;
+  }
+  return 0;
+}
+
+function sortRows() {
+  var arr = Array.prototype.slice.call(rows);
+  if (activeSort) {
+    arr.sort(function (a, b) {
+      var aVal = getSortValue(a, activeSort.col);
+      var bVal = getSortValue(b, activeSort.col);
+      if (activeSort.col === 'name') {
+        var cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
+        if (cmp === 0) return a._origIndex - b._origIndex;
+        return activeSort.order === 'desc' ? -cmp : cmp;
+      }
+      if (aVal <= 0 && bVal <= 0) return a._origIndex - b._origIndex;
+      if (aVal <= 0) return 1;
+      if (bVal <= 0) return -1;
+      var cmp = aVal - bVal;
+      if (cmp === 0) return a._origIndex - b._origIndex;
+      return activeSort.order === 'desc' ? -cmp : cmp;
+    });
+  } else {
+    arr.sort(function (a, b) { return a._origIndex - b._origIndex; });
+  }
+  arr.forEach(function (row) {
+    tbody.appendChild(row);
+    tbody.appendChild(row._expandRow);
+  });
+  applyFilters();
+}
+
+function updateSortIndicators() {
+  document.querySelectorAll('th[data-sort]').forEach(function (th) {
+    th.classList.remove('sort-asc', 'sort-desc');
+    if (activeSort && th.dataset.sort === activeSort.col) {
+      th.classList.add('sort-' + activeSort.order);
+    }
+  });
 }

 // Expand/collapse: event delegation on tbody
@@ -130,6 +231,23 @@ if (filterClear) {
  });
 }

+// Column sorting
+document.querySelectorAll('th[data-sort]').forEach(function (th) {
+  th.addEventListener('click', function () {
+    var col = th.dataset.sort;
+    var defaultOrder = col === 'name' ? 'asc' : 'desc';
+    var altOrder = defaultOrder === 'asc' ? 'desc' : 'asc';
+    if (activeSort && activeSort.col === col) {
+      if (activeSort.order === defaultOrder) activeSort = { col: col, order: altOrder };
+      else activeSort = { col: 'stars', order: 'desc' };
+    } else {
+      activeSort = { col: col, order: defaultOrder };
+    }
+    sortRows();
+    updateSortIndicators();
+  });
+});
+
 // Search input
 if (searchInput) {
  var searchTimer;
@@ -152,3 +270,23 @@ if (searchInput) {
    }
  });
 }
+
+// Restore state from URL
+(function () {
+  var params = new URLSearchParams(location.search);
+  var q = params.get('q');
+  var cat = params.get('category');
+  var group = params.get('group');
+  var sort = params.get('sort');
+  var order = params.get('order');
+  if (q && searchInput) searchInput.value = q;
+  if (cat) activeFilter = { type: 'cat', value: cat };
+  else if (group) activeFilter = { type: 'group', value: group };
+  if ((sort === 'name' || sort === 'stars' || sort === 'commit-time') && (order === 'desc' || order === 'asc')) {
+    activeSort = { col: sort, order: order };
+  }
+  if (q || cat || group || sort) {
+    sortRows();
+  }
+  updateSortIndicators();
+})();
@@ -23,6 +23,8 @@
  --accent-light: oklch(97% 0.015 240);
  --highlight: oklch(93% 0.10 90);
  --highlight-text: oklch(35% 0.10 90);
+  --tag-text: oklch(45% 0.06 240);
+  --tag-hover-bg: oklch(93% 0.025 240);
 }

 html { font-size: 16px; }
@@ -65,8 +67,10 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }

 .hero-main {
  display: flex;
+  flex-wrap: wrap;
  justify-content: space-between;
  align-items: flex-start;
+  gap: 1rem;
 }

 .hero-submit {
@@ -78,14 +82,21 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  color: var(--text);
  text-decoration: none;
  white-space: nowrap;
+  transition: border-color 0.2s, background 0.2s, color 0.2s;
 }

 .hero-submit:hover {
  border-color: var(--accent);
+  background: var(--accent-light);
  color: var(--accent);
  text-decoration: none;
 }

+.hero-submit:focus-visible {
+  outline: 2px solid var(--accent);
+  outline-offset: 2px;
+}
+
 .hero h1 {
  font-family: var(--font-display);
  font-size: clamp(2rem, 5vw, 3rem);
@@ -144,6 +155,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  font-family: var(--font-body);
  font-size: var(--text-sm);
  color: var(--text);
+  transition: border-color 0.15s, background 0.15s;
 }

 .search::placeholder { color: var(--text-muted); }
@@ -174,11 +186,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  background: none;
  border: 1px solid var(--border);
  border-radius: 4px;
-  padding: 0.15rem 0.5rem;
+  padding: 0.35rem 0.65rem;
  font-family: inherit;
  font-size: var(--text-xs);
  color: var(--text-muted);
  cursor: pointer;
+  transition: border-color 0.15s, color 0.15s;
 }

 .filter-clear:hover {
@@ -186,14 +199,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  color: var(--text);
 }

-.stats {
-  font-size: var(--text-sm);
-  color: var(--text-muted);
-  font-variant-numeric: tabular-nums;
+.filter-clear:focus-visible {
+  outline: 2px solid var(--accent);
+  outline-offset: 2px;
 }

-.stats strong { color: var(--text-secondary); }
-
 /* === Table === */
 .table-wrap {
  width: 100%;
@@ -201,6 +211,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  overflow-x: auto;
 }

+.table-wrap:focus {
+  outline: 2px solid var(--accent);
+  outline-offset: -2px;
+}
+
 .table {
  width: 100%;
  border-collapse: separate;
@@ -236,6 +251,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  padding: 0.7rem 0.75rem;
  border-bottom: 1px solid var(--border);
  vertical-align: top;
+  transition: background 0.15s;
 }

 .table tbody tr.row:not(.open):hover td {
@@ -253,9 +269,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }

 .col-name {
  width: 35%;
-  overflow-wrap: break-word;
-  word-wrap: break-word;
-  word-break: break-word;
+  overflow-wrap: anywhere;
 }

 .col-name > a {
@@ -266,12 +280,47 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }

 .col-name > a:hover { text-decoration: underline; color: var(--accent-hover); }

+/* === Sortable Headers === */
+th[data-sort] {
+  cursor: pointer;
+  user-select: none;
+}
+
+th[data-sort]:hover {
+  color: var(--accent);
+}
+
+th[data-sort]::after {
+  content: " ▼";
+  opacity: 0;
+  transition: opacity 0.15s;
+}
+
+th[data-sort="name"]::after {
+  content: " ▲";
+}
+
+th[data-sort]:hover::after {
+  opacity: 1;
+}
+
+th[data-sort].sort-desc::after {
+  content: " ▼";
+  opacity: 1;
+}
+
+th[data-sort].sort-asc::after {
+  content: " ▲";
+  opacity: 1;
+}
+
 /* === Stars Column === */
 .col-stars {
  width: 5rem;
  font-variant-numeric: tabular-nums;
  white-space: nowrap;
  color: var(--text-secondary);
+  text-align: right;
 }

 /* === Arrow Column === */
@@ -294,6 +343,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 /* === Row Click === */
 .row { cursor: pointer; }

+.row:focus-visible td {
+  outline: none;
+  background: var(--bg-hover);
+  box-shadow: inset 2px 0 0 var(--accent);
+}
+
 /* === Expand Row === */
 .expand-row {
  display: none;
@@ -315,10 +370,36 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  border-bottom: 1px solid var(--border);
 }

+@keyframes expand-in {
+  from {
+    opacity: 0;
+    transform: translateY(-4px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
 .expand-content {
  font-size: var(--text-sm);
  color: var(--text-secondary);
  line-height: 1.6;
+  animation: expand-in 0.2s cubic-bezier(0.25, 1, 0.5, 1);
+}
+
+.expand-tags {
+  display: flex;
+  gap: 0.4rem;
+  margin-bottom: 0.4rem;
+}
+
+.expand-tag {
+  font-size: var(--text-xs);
+  color: var(--tag-text);
+  background: var(--bg);
+  padding: 0.15rem 0.4rem;
+  border-radius: 3px;
 }

 .expand-also-see {
@@ -357,35 +438,63 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  color: var(--border);
 }

-.col-cat, .col-group {
+.col-cat {
  width: 13%;
  white-space: nowrap;
 }

+/* === Last Commit Column === */
+.col-commit {
+  width: 9rem;
+  white-space: nowrap;
+  color: var(--text-muted);
+}
+
 /* === Tags === */
 .tag {
+  position: relative;
  background: var(--accent-light);
  border: none;
  font-family: inherit;
  font-size: var(--text-xs);
-  color: oklch(45% 0.06 240);
+  color: var(--tag-text);
  cursor: pointer;
-  padding: 0.15rem 0.35rem;
+  padding: 0.25rem 0.5rem;
  border-radius: 3px;
  white-space: nowrap;
+  transition: background 0.15s, color 0.15s;
+}
+
+/* Expand touch target to 44x44px minimum */
+.tag::after {
+  content: "";
+  position: absolute;
+  inset: -0.5rem -0.25rem;
 }

 .tag:hover {
-  background: var(--accent-light);
+  background: var(--tag-hover-bg);
  color: var(--accent);
 }

+.tag:focus-visible {
+  outline: 2px solid var(--accent);
+  outline-offset: 1px;
+}
+
 .tag.active {
  background: var(--highlight);
  color: var(--highlight-text);
  font-weight: 600;
 }

+/* === Noscript === */
+.noscript-msg {
+  text-align: center;
+  padding: 1rem;
+  color: var(--text-muted);
+}
+
 /* === No Results === */
 .no-results {
  max-width: 1400px;
@@ -407,20 +516,18 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
  background: var(--bg-input);
  display: flex;
  align-items: center;
-  justify-content: space-between;
+  justify-content: flex-end;
+  gap: 0.5rem;
 }

-.footer a { color: var(--text-muted); text-decoration: none; }
-.footer a:hover { color: var(--accent); }
+.footer a { color: var(--accent); text-decoration: none; }
+.footer a:hover { color: var(--accent-hover); text-decoration: underline; }

-.footer-links {
-  display: flex;
-  gap: 1rem;
-}
+.footer-sep { color: var(--border-strong); }

 /* === Responsive === */
@media (max-width: 900px) {
-  .col-group { display: none; }
+  .col-commit { display: none; }
 }

@media (max-width: 640px) {
@@ -435,7 +542,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }

  .col-cat { display: none; }
  .col-name { white-space: normal; }
-  .footer { padding: 1.25rem; flex-direction: column; gap: 0.5rem; }
+  .footer { padding: 1.25rem; justify-content: center; flex-wrap: wrap; }
 }

 /* === Screen Reader Only === */
@@ -454,6 +561,8 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
 /* === Reduced Motion === */
@media (prefers-reduced-motion: reduce) {
  *, *::before, *::after {
+    animation-duration: 0.01ms !important;
+    animation-iteration-count: 1 !important;
    transition-duration: 0.01ms !important;
  }
 }
@@ -17,10 +17,7 @@
    />
    <meta property="og:url" content="https://awesome-python.com/" />
    <meta name="twitter:card" content="summary" />
-    <link
-      rel="icon"
-      href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🐍</text></svg>"
-    />
+    <link rel="icon" href="/static/favicon.svg" type="image/svg+xml" />
    <link rel="stylesheet" href="/static/style.css" />
    <script
      async
@@ -41,24 +38,24 @@
    <main id="content">{% block content %}{% endblock %}</main>

    <footer class="footer">
-      <div class="footer-links">
-        <a href="https://github.com/vinta" target="_blank" rel="noopener"
-          >GitHub</a
-        >
-        <a href="https://twitter.com/vinta" target="_blank" rel="noopener"
-          >Twitter</a
-        >
-      </div>
      <span
-        >Curated by
-        <a href="https://github.com/vinta" target="_blank" rel="noopener"
+        >Made by
+        <a href="https://vinta.ws/" target="_blank" rel="noopener"
          >Vinta</a
        ></span
      >
+      <span class="footer-sep">/</span>
+      <a href="https://github.com/vinta" target="_blank" rel="noopener"
+        >GitHub</a
+      >
+      <span class="footer-sep">/</span>
+      <a href="https://twitter.com/vinta" target="_blank" rel="noopener"
+        >Twitter</a
+      >
    </footer>

    <noscript
-      ><p style="text-align: center; padding: 1rem; color: #666">
+      ><p class="noscript-msg">
        JavaScript is needed for search and filtering.
      </p></noscript
    >
@@ -29,6 +29,7 @@
  </div>
 </header>

+<h2 class="sr-only">Search and filter</h2>
 <div class="controls">
  <div class="search-wrap">
    <svg
@@ -60,22 +61,24 @@
  </div>
 </div>

-<div class="table-wrap">
+<h2 class="sr-only">Results</h2>
+<div class="table-wrap" tabindex="0" role="region" aria-label="Libraries table">
  <table class="table">
    <thead>
      <tr>
        <th class="col-num"><span class="sr-only">#</span></th>
-        <th class="col-name">Project Name</th>
-        <th class="col-stars">GitHub Stars</th>
+        <th class="col-name" data-sort="name">Project Name</th>
+        <th class="col-stars" data-sort="stars">GitHub Stars</th>
+        <th class="col-commit" data-sort="commit-time">Last Commit</th>
        <th class="col-cat">Category</th>
-        <th class="col-group">Group</th>
-        <th class="col-arrow"></th>
+        <th class="col-arrow"><span class="sr-only">Details</span></th>
      </tr>
    </thead>
    <tbody>
      {% for entry in entries %}
      <tr
        class="row"
+        role="button"
        data-cat="{{ entry.category }}"
        data-group="{{ entry.group }}"
        tabindex="0"
@@ -92,25 +95,24 @@
          {% if entry.stars is not none %}{{ "{:,}".format(entry.stars) }}{%
          else %}&mdash;{% endif %}
        </td>
+        <td class="col-commit"
+          {% if entry.last_commit_at %}data-commit="{{ entry.last_commit_at }}"{% endif %}
+        >{% if entry.last_commit_at %}<time datetime="{{ entry.last_commit_at }}">{{ entry.last_commit_at[:10] }}</time>{% else %}&mdash;{% endif %}</td>
        <td class="col-cat">
          <button class="tag" data-type="cat" data-value="{{ entry.category }}">
            {{ entry.category }}
          </button>
        </td>
-        <td class="col-group">
-          <button class="tag" data-type="group" data-value="{{ entry.group }}">
-            {{ entry.group }}
-          </button>
-        </td>
        <td class="col-arrow"><span class="arrow">&rarr;</span></td>
      </tr>
      <tr class="expand-row" id="expand-{{ loop.index }}">
        <td></td>
-        <td colspan="5">
+        <td colspan="3">
          <div class="expand-content">
            {% if entry.description %}
            <div class="expand-desc">{{ entry.description | safe }}</div>
-            {% endif %} {% if entry.also_see %}
+            {% endif %}
+            {% if entry.also_see %}
            <div class="expand-also-see">
              Also see: {% for see in entry.also_see %}<a
                href="{{ see.url }}"
@@ -131,11 +133,16 @@
                target="_blank"
                rel="noopener"
                >{{ entry.url | replace("https://", "") }}</a
-              >{% if entry.pushed_at %}<span class="expand-sep">&middot;</span
-              >Last pushed {{ entry.pushed_at[:10] }}{% endif %}
+              >
            </div>
          </div>
        </td>
+        <td class="col-cat">
+          <button class="tag" data-type="group" data-value="{{ entry.group }}">
+            {{ entry.group }}
+          </button>
+        </td>
+        <td></td>
      </tr>
      {% endfor %}
    </tbody>
@@ -1,27 +1,18 @@
 """Tests for the build module."""

 import json
-import os
 import shutil
-import sys
 import textwrap
 from pathlib import Path

-import pytest
-
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from build import (
    build,
-    count_entries,
    extract_github_repo,
-    extract_preview,
    group_categories,
    load_stars,
-    parse_readme,
-    render_content_html,
-    slugify,
    sort_entries,
 )
+from readme_parser import slugify

 # ---------------------------------------------------------------------------
 # slugify
@@ -51,244 +42,6 @@ class TestSlugify:
        assert slugify("  Date  and  Time  ") == "date-and-time"


-# ---------------------------------------------------------------------------
-# count_entries
-# ---------------------------------------------------------------------------
-
-
-class TestCountEntries:
-    def test_counts_dash_entries(self):
-        assert count_entries("- [a](url) - Desc.\n- [b](url) - Desc.") == 2
-
-    def test_counts_star_entries(self):
-        assert count_entries("* [a](url) - Desc.") == 1
-
-    def test_ignores_non_entries(self):
-        assert count_entries("Some text\n- [a](url) - Desc.\nMore text") == 1
-
-    def test_counts_indented_entries(self):
-        assert count_entries("    - [a](url) - Desc.") == 1
-
-    def test_empty_content(self):
-        assert count_entries("") == 0
-
-
-# ---------------------------------------------------------------------------
-# extract_preview
-# ---------------------------------------------------------------------------
-
-
-class TestExtractPreview:
-    def test_basic(self):
-        content = "* [alpha](url) - A.\n* [beta](url) - B.\n* [gamma](url) - C."
-        assert extract_preview(content) == "alpha, beta, gamma"
-
-    def test_max_four(self):
-        content = "\n".join(f"* [lib{i}](url) - Desc." for i in range(10))
-        assert extract_preview(content) == "lib0, lib1, lib2, lib3"
-
-    def test_empty(self):
-        assert extract_preview("") == ""
-
-    def test_skips_subcategory_labels(self):
-        content = "* Synchronous\n* [django](url) - Framework.\n* [flask](url) - Micro."
-        assert extract_preview(content) == "django, flask"
-
-
-# ---------------------------------------------------------------------------
-# render_content_html
-# ---------------------------------------------------------------------------
-
-
-class TestRenderContentHtml:
-    def test_basic_entry(self):
-        content = "* [django](https://example.com) - A web framework."
-        html = render_content_html(content)
-        assert 'href="https://example.com"' in html
-        assert "django" in html
-        assert "A web framework." in html
-        assert 'class="entry"' in html
-
-    def test_subcategory_label(self):
-        content = "* Synchronous\n* [django](https://x.com) - Framework."
-        html = render_content_html(content)
-        assert 'class="subcat"' in html
-        assert "Synchronous" in html
-
-    def test_sub_entry(self):
-        content = "* [django](https://x.com) - Framework.\n    * [awesome-django](https://y.com)"
-        html = render_content_html(content)
-        assert 'class="entry-sub"' in html
-        assert "awesome-django" in html
-
-    def test_link_only_entry(self):
-        content = "* [tool](https://x.com)"
-        html = render_content_html(content)
-        assert 'href="https://x.com"' in html
-        assert "tool" in html
-
-
-# ---------------------------------------------------------------------------
-# parse_readme
-# ---------------------------------------------------------------------------
-
-MINIMAL_README = textwrap.dedent("""\
-    # Awesome Python
-
-    Some intro text.
-
-    ---
-
-    ## Alpha
-
-    _Libraries for alpha stuff._
-
-    - [lib-a](https://example.com/a) - Does A.
-    - [lib-b](https://example.com/b) - Does B.
-
-    ## Beta
-
-    _Tools for beta._
-
-    - [lib-c](https://example.com/c) - Does C.
-
-    # Resources
-
-    Where to discover resources.
-
-    ## Newsletters
-
-    - [News One](https://example.com/n1)
-    - [News Two](https://example.com/n2)
-
-    ## Podcasts
-
-    - [Pod One](https://example.com/p1)
-
-    # Contributing
-
-    Please contribute!
-""")
-
-
-class TestParseReadme:
-    def test_category_count(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        assert len(cats) == 2
-
-    def test_resource_count(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        assert len(resources) == 2
-
-    def test_category_names(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert cats[0]["name"] == "Alpha"
-        assert cats[1]["name"] == "Beta"
-
-    def test_category_slugs(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert cats[0]["slug"] == "alpha"
-        assert cats[1]["slug"] == "beta"
-
-    def test_category_description(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert cats[0]["description"] == "Libraries for alpha stuff."
-        assert cats[1]["description"] == "Tools for beta."
-
-    def test_category_content_has_entries(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert "lib-a" in cats[0]["content"]
-        assert "lib-b" in cats[0]["content"]
-
-    def test_resources_names(self):
-        _, resources = parse_readme(MINIMAL_README)
-        assert resources[0]["name"] == "Newsletters"
-        assert resources[1]["name"] == "Podcasts"
-
-    def test_resources_content(self):
-        _, resources = parse_readme(MINIMAL_README)
-        assert "News One" in resources[0]["content"]
-        assert "Pod One" in resources[1]["content"]
-
-    def test_contributing_skipped(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
-        assert "Contributing" not in all_names
-
-    def test_no_separator(self):
-        cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
-        assert cats == []
-        assert resources == []
-
-    def test_no_description(self):
-        readme = textwrap.dedent("""\
-            # Title
-
-            ---
-
-            ## NullDesc
-
-            - [item](https://x.com) - Thing.
-
-            # Resources
-
-            ## Tips
-
-            - [tip](https://x.com)
-
-            # Contributing
-
-            Done.
-        """)
-        cats, resources = parse_readme(readme)
-        assert cats[0]["description"] == ""
-        assert "item" in cats[0]["content"]
-
-
-# ---------------------------------------------------------------------------
-# parse_readme on real README
-# ---------------------------------------------------------------------------
-
-
-class TestParseRealReadme:
-    @pytest.fixture(autouse=True)
-    def load_readme(self):
-        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
-        with open(readme_path, encoding="utf-8") as f:
-            self.readme_text = f.read()
-        self.cats, self.resources = parse_readme(self.readme_text)
-
-    def test_at_least_83_categories(self):
-        assert len(self.cats) >= 83
-
-    def test_resources_has_newsletters_and_podcasts(self):
-        names = [r["name"] for r in self.resources]
-        assert "Newsletters" in names
-        assert "Podcasts" in names
-
-    def test_contributing_not_in_results(self):
-        all_names = [c["name"] for c in self.cats] + [
-            r["name"] for r in self.resources
-        ]
-        assert "Contributing" not in all_names
-
-    def test_first_category_is_admin_panels(self):
-        assert self.cats[0]["name"] == "Admin Panels"
-        assert self.cats[0]["slug"] == "admin-panels"
-
-    def test_last_category_is_wsgi_servers(self):
-        assert self.cats[-1]["name"] == "WSGI Servers"
-        assert self.cats[-1]["slug"] == "wsgi-servers"
-
-    def test_restful_api_slug(self):
-        slugs = [c["slug"] for c in self.cats]
-        assert "restful-api" in slugs
-
-    def test_descriptions_extracted(self):
-        admin = self.cats[0]
-        assert admin["description"] == "Libraries for administrative interfaces."
-
-
 # ---------------------------------------------------------------------------
 # group_categories
 # ---------------------------------------------------------------------------
@@ -318,26 +71,6 @@ class TestGroupCategories:
        assert "Resources" in group_names


-# ---------------------------------------------------------------------------
-# render_markdown (kept for compatibility)
-# ---------------------------------------------------------------------------
-
-
-class TestRenderMarkdown:
-    def test_renders_link_list(self):
-        from build import render_markdown
-
-        html = render_markdown("- [lib](https://example.com) - Does stuff.")
-        assert "<li>" in html
-        assert '<a href="https://example.com">lib</a>' in html
-
-    def test_renders_plain_text(self):
-        from build import render_markdown
-
-        html = render_markdown("Hello world")
-        assert "<p>Hello world</p>" in html
-
-
 # ---------------------------------------------------------------------------
 # build (integration)
 # ---------------------------------------------------------------------------
@@ -413,27 +146,6 @@ class TestBuild:
        # No category sub-pages
        assert not (site / "categories").exists()

-    def test_build_creates_cname(self, tmp_path):
-        readme = textwrap.dedent("""\
-            # T
-
-            ---
-
-            ## Only
-
-            - [x](https://x.com) - X.
-
-            # Contributing
-
-            Done.
-        """)
-        self._make_repo(tmp_path, readme)
-        build(str(tmp_path))
-
-        cname = tmp_path / "website" / "output" / "CNAME"
-        assert cname.exists()
-        assert "awesome-python.com" in cname.read_text()
-
    def test_build_cleans_stale_output(self, tmp_path):
        readme = textwrap.dedent("""\
            # T
@@ -8,7 +8,6 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from fetch_github_stars import (
    build_graphql_query,
    extract_github_repos,
-    load_cache,
    parse_graphql_response,
    save_cache,
 )
@@ -65,27 +64,6 @@ class TestExtractGithubRepos:
        assert result == {"org/repo"}


-class TestLoadCache:
-    def test_returns_empty_when_missing(self, tmp_path, monkeypatch):
-        monkeypatch.setattr("fetch_github_stars.CACHE_FILE", tmp_path / "nonexistent.json")
-        result = load_cache()
-        assert result == {}
-
-    def test_loads_valid_cache(self, tmp_path, monkeypatch):
-        cache_file = tmp_path / "stars.json"
-        cache_file.write_text('{"a/b": {"stars": 1}}', encoding="utf-8")
-        monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
-        result = load_cache()
-        assert result == {"a/b": {"stars": 1}}
-
-    def test_returns_empty_on_corrupt_json(self, tmp_path, monkeypatch):
-        cache_file = tmp_path / "stars.json"
-        cache_file.write_text("not json", encoding="utf-8")
-        monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
-        result = load_cache()
-        assert result == {}
-
-
 class TestSaveCache:
    def test_creates_directory_and_writes_json(self, tmp_path, monkeypatch):
        data_dir = tmp_path / "data"
@@ -0,0 +1,424 @@
+"""Tests for the readme_parser module."""
+
+import os
+import textwrap
+
+import pytest
+
+from readme_parser import (
+    _parse_section_entries,
+    _render_section_html,
+    parse_readme,
+    render_inline_html,
+    render_inline_text,
+)
+
+from markdown_it import MarkdownIt
+from markdown_it.tree import SyntaxTreeNode
+
+
+def _parse_inline(md_text: str) -> list[SyntaxTreeNode]:
+    """Helper: parse a single paragraph and return its inline children."""
+    md = MarkdownIt("commonmark")
+    root = SyntaxTreeNode(md.parse(md_text))
+    # root > paragraph > inline > children
+    return root.children[0].children[0].children
+
+
+class TestRenderInlineHtml:
+    def test_plain_text_escapes_html(self):
+        children = _parse_inline("Hello <world> & friends")
+        assert render_inline_html(children) == "Hello &lt;world&gt; &amp; friends"
+
+    def test_link_with_target(self):
+        children = _parse_inline("[name](https://example.com)")
+        html = render_inline_html(children)
+        assert 'href="https://example.com"' in html
+        assert 'target="_blank"' in html
+        assert 'rel="noopener"' in html
+        assert ">name</a>" in html
+
+    def test_emphasis(self):
+        children = _parse_inline("*italic* text")
+        assert "<em>italic</em>" in render_inline_html(children)
+
+    def test_strong(self):
+        children = _parse_inline("**bold** text")
+        assert "<strong>bold</strong>" in render_inline_html(children)
+
+    def test_code_inline(self):
+        children = _parse_inline("`some code`")
+        assert "<code>some code</code>" in render_inline_html(children)
+
+    def test_mixed_link_and_text(self):
+        children = _parse_inline("See [foo](https://x.com) for details.")
+        html = render_inline_html(children)
+        assert "See " in html
+        assert ">foo</a>" in html
+        assert " for details." in html
+
+
+class TestRenderInlineText:
+    def test_plain_text(self):
+        children = _parse_inline("Hello world")
+        assert render_inline_text(children) == "Hello world"
+
+    def test_link_becomes_text(self):
+        children = _parse_inline("See [awesome-algos](https://github.com/x/y).")
+        assert render_inline_text(children) == "See awesome-algos."
+
+    def test_emphasis_stripped(self):
+        children = _parse_inline("*italic* text")
+        assert render_inline_text(children) == "italic text"
+
+    def test_code_inline_kept(self):
+        children = _parse_inline("`code` here")
+        assert render_inline_text(children) == "code here"
+
+
+MINIMAL_README = textwrap.dedent("""\
+    # Awesome Python
+
+    Some intro text.
+
+    ---
+
+    ## Alpha
+
+    _Libraries for alpha stuff._
+
+    - [lib-a](https://example.com/a) - Does A.
+    - [lib-b](https://example.com/b) - Does B.
+
+    ## Beta
+
+    _Tools for beta._
+
+    - [lib-c](https://example.com/c) - Does C.
+
+    # Resources
+
+    Where to discover resources.
+
+    ## Newsletters
+
+    - [News One](https://example.com/n1)
+    - [News Two](https://example.com/n2)
+
+    ## Podcasts
+
+    - [Pod One](https://example.com/p1)
+
+    # Contributing
+
+    Please contribute!
+""")
+
+
+class TestParseReadmeSections:
+    def test_category_count(self):
+        cats, resources = parse_readme(MINIMAL_README)
+        assert len(cats) == 2
+
+    def test_resource_count(self):
+        cats, resources = parse_readme(MINIMAL_README)
+        assert len(resources) == 2
+
+    def test_category_names(self):
+        cats, _ = parse_readme(MINIMAL_README)
+        assert cats[0]["name"] == "Alpha"
+        assert cats[1]["name"] == "Beta"
+
+    def test_category_slugs(self):
+        cats, _ = parse_readme(MINIMAL_README)
+        assert cats[0]["slug"] == "alpha"
+        assert cats[1]["slug"] == "beta"
+
+    def test_category_description(self):
+        cats, _ = parse_readme(MINIMAL_README)
+        assert cats[0]["description"] == "Libraries for alpha stuff."
+        assert cats[1]["description"] == "Tools for beta."
+
+    def test_resource_names(self):
+        _, resources = parse_readme(MINIMAL_README)
+        assert resources[0]["name"] == "Newsletters"
+        assert resources[1]["name"] == "Podcasts"
+
+    def test_contributing_skipped(self):
+        cats, resources = parse_readme(MINIMAL_README)
+        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
+        assert "Contributing" not in all_names
+
+    def test_no_separator(self):
+        cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
+        assert cats == []
+        assert resources == []
+
+    def test_no_description(self):
+        readme = textwrap.dedent("""\
+            # Title
+
+            ---
+
+            ## NullDesc
+
+            - [item](https://x.com) - Thing.
+
+            # Resources
+
+            ## Tips
+
+            - [tip](https://x.com)
+
+            # Contributing
+
+            Done.
+        """)
+        cats, resources = parse_readme(readme)
+        assert cats[0]["description"] == ""
+        assert cats[0]["entries"][0]["name"] == "item"
+
+    def test_description_with_link_stripped(self):
+        readme = textwrap.dedent("""\
+            # T
+
+            ---
+
+            ## Algos
+
+            _Algorithms. Also see [awesome-algos](https://example.com)._
+
+            - [lib](https://x.com) - Lib.
+
+            # Contributing
+
+            Done.
+        """)
+        cats, _ = parse_readme(readme)
+        assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
+
+
+def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
+    """Helper: parse markdown and return all block nodes."""
+    md = MarkdownIt("commonmark")
+    root = SyntaxTreeNode(md.parse(md_text))
+    return root.children
+
+
+class TestParseSectionEntries:
+    def test_flat_entries(self):
+        nodes = _content_nodes(
+            "- [django](https://example.com/d) - A web framework.\n"
+            "- [flask](https://example.com/f) - A micro framework.\n"
+        )
+        entries = _parse_section_entries(nodes)
+        assert len(entries) == 2
+        assert entries[0]["name"] == "django"
+        assert entries[0]["url"] == "https://example.com/d"
+        assert "web framework" in entries[0]["description"]
+        assert entries[0]["also_see"] == []
+        assert entries[1]["name"] == "flask"
+
+    def test_link_only_entry(self):
+        nodes = _content_nodes("- [tool](https://x.com)\n")
+        entries = _parse_section_entries(nodes)
+        assert len(entries) == 1
+        assert entries[0]["name"] == "tool"
+        assert entries[0]["description"] == ""
+
+    def test_subcategorized_entries(self):
+        nodes = _content_nodes(
+            "- Algorithms\n"
+            "  - [algos](https://x.com/a) - Algo lib.\n"
+            "  - [sorts](https://x.com/s) - Sort lib.\n"
+            "- Design Patterns\n"
+            "  - [patterns](https://x.com/p) - Pattern lib.\n"
+        )
+        entries = _parse_section_entries(nodes)
+        assert len(entries) == 3
+        assert entries[0]["name"] == "algos"
+        assert entries[2]["name"] == "patterns"
+
+    def test_text_before_link_is_subcategory(self):
+        nodes = _content_nodes(
+            "- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
+            "  - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
+            "  - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
+        )
+        entries = _parse_section_entries(nodes)
+        # awesome-mysql is a subcategory label, not an entry
+        assert len(entries) == 2
+        names = [e["name"] for e in entries]
+        assert "awesome-mysql" not in names
+        assert "mysqlclient" in names
+        assert "pymysql" in names
+
+    def test_also_see_sub_entries(self):
+        nodes = _content_nodes(
+            "- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"
+            "  - [awesome-asyncio](https://github.com/timofurrer/awesome-asyncio)\n"
+            "- [trio](https://github.com/python-trio/trio) - Friendly async.\n"
+        )
+        entries = _parse_section_entries(nodes)
+        assert len(entries) == 2
+        assert entries[0]["name"] == "asyncio"
+        assert len(entries[0]["also_see"]) == 1
+        assert entries[0]["also_see"][0]["name"] == "awesome-asyncio"
+        assert entries[1]["name"] == "trio"
+        assert entries[1]["also_see"] == []
+
+    def test_entry_count_includes_also_see(self):
+        readme = textwrap.dedent("""\
+            # T
+
+            ---
+
+            ## Async
+
+            - [asyncio](https://x.com) - Async I/O.
+              - [awesome-asyncio](https://y.com)
+            - [trio](https://z.com) - Friendly async.
+
+            # Contributing
+
+            Done.
+        """)
+        cats, _ = parse_readme(readme)
+        # 2 main entries + 1 also_see = 3
+        assert cats[0]["entry_count"] == 3
+
+    def test_preview_first_four_names(self):
+        readme = textwrap.dedent("""\
+            # T
+
+            ---
+
+            ## Libs
+
+            - [alpha](https://x.com) - A.
+            - [beta](https://x.com) - B.
+            - [gamma](https://x.com) - C.
+            - [delta](https://x.com) - D.
+            - [epsilon](https://x.com) - E.
+
+            # Contributing
+
+            Done.
+        """)
+        cats, _ = parse_readme(readme)
+        assert cats[0]["preview"] == "alpha, beta, gamma, delta"
+
+    def test_description_html_escapes_xss(self):
+        nodes = _content_nodes('- [lib](https://x.com) - A <script>alert(1)</script> lib.\n')
+        entries = _parse_section_entries(nodes)
+        assert "<script>" not in entries[0]["description"]
+        assert "&lt;script&gt;" in entries[0]["description"]
+
+
+class TestRenderSectionHtml:
+    def test_basic_entry(self):
+        nodes = _content_nodes("- [django](https://example.com) - A web framework.\n")
+        html = _render_section_html(nodes)
+        assert 'class="entry"' in html
+        assert 'href="https://example.com"' in html
+        assert "django" in html
+        assert "A web framework." in html
+
+    def test_subcategory_label(self):
+        nodes = _content_nodes(
+            "- Synchronous\n  - [django](https://x.com) - Framework.\n"
+        )
+        html = _render_section_html(nodes)
+        assert 'class="subcat"' in html
+        assert "Synchronous" in html
+        assert 'class="entry"' in html
+
+    def test_sub_entry(self):
+        nodes = _content_nodes(
+            "- [django](https://x.com) - Framework.\n"
+            "  - [awesome-django](https://y.com)\n"
+        )
+        html = _render_section_html(nodes)
+        assert 'class="entry-sub"' in html
+        assert "awesome-django" in html
+
+    def test_link_only_entry(self):
+        nodes = _content_nodes("- [tool](https://x.com)\n")
+        html = _render_section_html(nodes)
+        assert 'class="entry"' in html
+        assert 'href="https://x.com"' in html
+        assert "tool" in html
+
+    def test_xss_escaped_in_name(self):
+        nodes = _content_nodes('- [<img onerror=alert(1)>](https://x.com) - Bad.\n')
+        html = _render_section_html(nodes)
+        assert "onerror" not in html or "&" in html
+
+    def test_xss_escaped_in_subcat(self):
+        nodes = _content_nodes("- <script>alert(1)</script>\n")
+        html = _render_section_html(nodes)
+        assert "<script>" not in html
+
+
+class TestParseRealReadme:
+    @pytest.fixture(autouse=True)
+    def load_readme(self):
+        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
+        with open(readme_path, encoding="utf-8") as f:
+            self.readme_text = f.read()
+        self.cats, self.resources = parse_readme(self.readme_text)
+
+    def test_at_least_83_categories(self):
+        assert len(self.cats) >= 83
+
+    def test_resources_has_newsletters_and_podcasts(self):
+        names = [r["name"] for r in self.resources]
+        assert "Newsletters" in names
+        assert "Podcasts" in names
+
+    def test_contributing_not_in_results(self):
+        all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
+        assert "Contributing" not in all_names
+
+    def test_first_category_is_admin_panels(self):
+        assert self.cats[0]["name"] == "Admin Panels"
+        assert self.cats[0]["slug"] == "admin-panels"
+
+    def test_last_category_is_wsgi_servers(self):
+        assert self.cats[-1]["name"] == "WSGI Servers"
+        assert self.cats[-1]["slug"] == "wsgi-servers"
+
+    def test_restful_api_slug(self):
+        slugs = [c["slug"] for c in self.cats]
+        assert "restful-api" in slugs
+
+    def test_descriptions_extracted(self):
+        admin = self.cats[0]
+        assert admin["description"] == "Libraries for administrative interfaces."
+
+    def test_entry_counts_nonzero(self):
+        for cat in self.cats:
+            assert cat["entry_count"] > 0, f"{cat['name']} has 0 entries"
+
+    def test_previews_nonempty(self):
+        for cat in self.cats:
+            assert cat["preview"], f"{cat['name']} has empty preview"
+
+    def test_content_html_nonempty(self):
+        for cat in self.cats:
+            assert cat["content_html"], f"{cat['name']} has empty content_html"
+
+    def test_algorithms_has_subcategories(self):
+        algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
+        assert 'class="subcat"' in algos["content_html"]
+
+    def test_async_has_also_see(self):
+        async_cat = next(c for c in self.cats if c["name"] == "Asynchronous Programming")
+        asyncio_entry = next(e for e in async_cat["entries"] if e["name"] == "asyncio")
+        assert len(asyncio_entry["also_see"]) >= 1
+        assert asyncio_entry["also_see"][0]["name"] == "awesome-asyncio"
+
+    def test_description_links_stripped_to_text(self):
+        algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
+        assert "awesome-algorithms" in algos["description"]
+        assert "https://" not in algos["description"]