feat: migrate README parser to markdown-it-py and refresh website

Switch readme_parser.py from regex-based parsing to markdown-it-py for
more robust and maintainable Markdown AST traversal. Update build pipeline,
templates, styles, and JS to support the new parser output. Refresh GitHub
stars data and update tests to match new parser behavior.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen
2026-03-18 20:33:36 +08:00
parent 95b6b3cc69
commit 280f250ce0
12 changed files with 1599 additions and 883 deletions

View File

@@ -1,13 +1,10 @@
"""Tests for the build module."""
import json
import os
import shutil
import sys
import textwrap
from pathlib import Path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from build import (
build,
extract_github_repo,
@@ -149,27 +146,6 @@ class TestBuild:
# No category sub-pages
assert not (site / "categories").exists()
def test_build_creates_cname(self, tmp_path):
readme = textwrap.dedent("""\
# T
---
## Only
- [x](https://x.com) - X.
# Contributing
Done.
""")
self._make_repo(tmp_path, readme)
build(str(tmp_path))
cname = tmp_path / "website" / "output" / "CNAME"
assert cname.exists()
assert "awesome-python.com" in cname.read_text()
def test_build_cleans_stale_output(self, tmp_path):
readme = textwrap.dedent("""\
# T

View File

@@ -1,12 +1,10 @@
"""Tests for the readme_parser module."""
import os
import sys
import textwrap
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from readme_parser import (
_parse_section_entries,
_render_section_html,
@@ -141,21 +139,11 @@ class TestParseReadmeSections:
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_category_content_has_entries(self):
cats, _ = parse_readme(MINIMAL_README)
assert "lib-a" in cats[0]["content"]
assert "lib-b" in cats[0]["content"]
def test_resource_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_resource_content(self):
_, resources = parse_readme(MINIMAL_README)
assert "News One" in resources[0]["content"]
assert "Pod One" in resources[1]["content"]
def test_contributing_skipped(self):
cats, resources = parse_readme(MINIMAL_README)
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
@@ -188,7 +176,7 @@ class TestParseReadmeSections:
""")
cats, resources = parse_readme(readme)
assert cats[0]["description"] == ""
assert "item" in cats[0]["content"]
assert cats[0]["entries"][0]["name"] == "item"
def test_description_with_link_stripped(self):
readme = textwrap.dedent("""\
@@ -251,6 +239,20 @@ class TestParseSectionEntries:
assert entries[0]["name"] == "algos"
assert entries[2]["name"] == "patterns"
def test_text_before_link_is_subcategory(self):
nodes = _content_nodes(
"- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
" - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
" - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
)
entries = _parse_section_entries(nodes)
# awesome-mysql is a subcategory label, not an entry
assert len(entries) == 2
names = [e["name"] for e in entries]
assert "awesome-mysql" not in names
assert "mysqlclient" in names
assert "pymysql" in names
def test_also_see_sub_entries(self):
nodes = _content_nodes(
"- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"