feat: migrate README parser to markdown-it-py and refresh website

Switch readme_parser.py from regex-based parsing to markdown-it-py for more robust and maintainable Markdown AST traversal. Update build pipeline, templates, styles, and JS to support the new parser output. Refresh GitHub stars data and update tests to match new parser behavior. Co-Authored-By: Claude <noreply@anthropic.com>
2026-05-09 22:53:49 +08:00 · 2026-03-18 20:33:36 +08:00
parent 95b6b3cc69
commit 280f250ce0
12 changed files with 1599 additions and 883 deletions
@@ -1,13 +1,10 @@
 """Tests for the build module."""

 import json
-import os
 import shutil
-import sys
 import textwrap
 from pathlib import Path

-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from build import (
    build,
    extract_github_repo,
@@ -149,27 +146,6 @@ class TestBuild:
        # No category sub-pages
        assert not (site / "categories").exists()

-    def test_build_creates_cname(self, tmp_path):
-        readme = textwrap.dedent("""\
-            # T
-
-            ---
-
-            ## Only
-
-            - [x](https://x.com) - X.
-
-            # Contributing
-
-            Done.
-        """)
-        self._make_repo(tmp_path, readme)
-        build(str(tmp_path))
-
-        cname = tmp_path / "website" / "output" / "CNAME"
-        assert cname.exists()
-        assert "awesome-python.com" in cname.read_text()
-
    def test_build_cleans_stale_output(self, tmp_path):
        readme = textwrap.dedent("""\
            # T
@@ -1,12 +1,10 @@
 """Tests for the readme_parser module."""

 import os
-import sys
 import textwrap

 import pytest

-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from readme_parser import (
    _parse_section_entries,
    _render_section_html,
@@ -141,21 +139,11 @@ class TestParseReadmeSections:
        assert cats[0]["description"] == "Libraries for alpha stuff."
        assert cats[1]["description"] == "Tools for beta."

-    def test_category_content_has_entries(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert "lib-a" in cats[0]["content"]
-        assert "lib-b" in cats[0]["content"]
-
    def test_resource_names(self):
        _, resources = parse_readme(MINIMAL_README)
        assert resources[0]["name"] == "Newsletters"
        assert resources[1]["name"] == "Podcasts"

-    def test_resource_content(self):
-        _, resources = parse_readme(MINIMAL_README)
-        assert "News One" in resources[0]["content"]
-        assert "Pod One" in resources[1]["content"]
-
    def test_contributing_skipped(self):
        cats, resources = parse_readme(MINIMAL_README)
        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
@@ -188,7 +176,7 @@ class TestParseReadmeSections:
        """)
        cats, resources = parse_readme(readme)
        assert cats[0]["description"] == ""
-        assert "item" in cats[0]["content"]
+        assert cats[0]["entries"][0]["name"] == "item"

    def test_description_with_link_stripped(self):
        readme = textwrap.dedent("""\
@@ -251,6 +239,20 @@ class TestParseSectionEntries:
        assert entries[0]["name"] == "algos"
        assert entries[2]["name"] == "patterns"

+    def test_text_before_link_is_subcategory(self):
+        nodes = _content_nodes(
+            "- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
+            "  - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
+            "  - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
+        )
+        entries = _parse_section_entries(nodes)
+        # awesome-mysql is a subcategory label, not an entry
+        assert len(entries) == 2
+        names = [e["name"] for e in entries]
+        assert "awesome-mysql" not in names
+        assert "mysqlclient" in names
+        assert "pymysql" in names
+
    def test_also_see_sub_entries(self):
        nodes = _content_nodes(
            "- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"