refactor(parser): remove resources parsing, preview, and content_html fields

parse_readme now returns list[ParsedGroup] instead of a tuple. The
resources section (Newsletters, Podcasts), preview string, and
content_html rendering are no longer produced by the parser or consumed
by the build. Removes _render_section_html, _group_by_h2, and the
associated dead code and tests.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen
2026-03-23 01:43:19 +08:00
parent c5dd3060ef
commit 25a3f4d903
4 changed files with 37 additions and 253 deletions
+5 -11
View File
@@ -59,19 +59,13 @@ class TestBuild:
)
(tpl_dir / "index.html").write_text(
'{% extends "base.html" %}{% block content %}'
"{% for group in groups %}"
'<section class="group">'
"<h2>{{ group.name }}</h2>"
"{% for cat in group.categories %}"
'<div class="row" id="{{ cat.slug }}">'
"<span>{{ cat.name }}</span>"
"<span>{{ cat.preview }}</span>"
"<span>{{ cat.entry_count }}</span>"
'<div class="row-content" hidden>{{ cat.content_html | safe }}</div>'
"{% for entry in entries %}"
'<div class="row">'
"<span>{{ entry.name }}</span>"
"<span>{{ entry.categories | join(', ') }}</span>"
"<span>{{ entry.groups | join(', ') }}</span>"
"</div>"
"{% endfor %}"
"</section>"
"{% endfor %}"
"{% endblock %}",
encoding="utf-8",
)
+19 -120
View File
@@ -7,7 +7,6 @@ import pytest
from readme_parser import (
_parse_section_entries,
_render_section_html,
parse_readme,
render_inline_html,
render_inline_text,
@@ -159,50 +158,39 @@ GROUPED_README = textwrap.dedent("""\
class TestParseReadmeSections:
def test_ungrouped_categories_go_to_other(self):
groups, resources = parse_readme(MINIMAL_README)
groups = parse_readme(MINIMAL_README)
assert len(groups) == 1
assert groups[0]["name"] == "Other"
assert len(groups[0]["categories"]) == 2
def test_ungrouped_category_names(self):
groups, _ = parse_readme(MINIMAL_README)
groups = parse_readme(MINIMAL_README)
cats = groups[0]["categories"]
assert cats[0]["name"] == "Alpha"
assert cats[1]["name"] == "Beta"
def test_resource_count(self):
_, resources = parse_readme(MINIMAL_README)
assert len(resources) == 2
def test_category_slugs(self):
groups, _ = parse_readme(MINIMAL_README)
groups = parse_readme(MINIMAL_README)
cats = groups[0]["categories"]
assert cats[0]["slug"] == "alpha"
assert cats[1]["slug"] == "beta"
def test_category_description(self):
groups, _ = parse_readme(MINIMAL_README)
groups = parse_readme(MINIMAL_README)
cats = groups[0]["categories"]
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_resource_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_contributing_skipped(self):
groups, resources = parse_readme(MINIMAL_README)
groups = parse_readme(MINIMAL_README)
all_names = []
for g in groups:
all_names.extend(c["name"] for c in g["categories"])
all_names.extend(r["name"] for r in resources)
assert "Contributing" not in all_names
def test_no_separator(self):
groups, resources = parse_readme("# Just a heading\n\nSome text.\n")
groups = parse_readme("# Just a heading\n\nSome text.\n")
assert groups == []
assert resources == []
def test_no_description(self):
readme = textwrap.dedent("""\
@@ -224,7 +212,7 @@ class TestParseReadmeSections:
Done.
""")
groups, resources = parse_readme(readme)
groups = parse_readme(readme)
cats = groups[0]["categories"]
assert cats[0]["description"] == ""
assert cats[0]["entries"][0]["name"] == "item"
@@ -245,42 +233,37 @@ class TestParseReadmeSections:
Done.
""")
groups, _ = parse_readme(readme)
groups = parse_readme(readme)
cats = groups[0]["categories"]
assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
class TestParseGroupedReadme:
def test_group_count(self):
groups, _ = parse_readme(GROUPED_README)
groups = parse_readme(GROUPED_README)
assert len(groups) == 2
def test_group_names(self):
groups, _ = parse_readme(GROUPED_README)
groups = parse_readme(GROUPED_README)
assert groups[0]["name"] == "Group One"
assert groups[1]["name"] == "Group Two"
def test_group_slugs(self):
groups, _ = parse_readme(GROUPED_README)
groups = parse_readme(GROUPED_README)
assert groups[0]["slug"] == "group-one"
assert groups[1]["slug"] == "group-two"
def test_group_one_has_one_category(self):
groups, _ = parse_readme(GROUPED_README)
groups = parse_readme(GROUPED_README)
assert len(groups[0]["categories"]) == 1
assert groups[0]["categories"][0]["name"] == "Alpha"
def test_group_two_has_two_categories(self):
groups, _ = parse_readme(GROUPED_README)
groups = parse_readme(GROUPED_README)
assert len(groups[1]["categories"]) == 2
assert groups[1]["categories"][0]["name"] == "Beta"
assert groups[1]["categories"][1]["name"] == "Gamma"
def test_resources_still_parsed(self):
_, resources = parse_readme(GROUPED_README)
assert len(resources) == 1
assert resources[0]["name"] == "Newsletters"
def test_empty_group_skipped(self):
readme = textwrap.dedent("""\
# T
@@ -299,7 +282,7 @@ class TestParseGroupedReadme:
Done.
""")
groups, _ = parse_readme(readme)
groups = parse_readme(readme)
assert len(groups) == 1
assert groups[0]["name"] == "HasCats"
@@ -319,7 +302,7 @@ class TestParseGroupedReadme:
Done.
""")
groups, _ = parse_readme(readme)
groups = parse_readme(readme)
# "Note:" has text after the strong node, so it's not a group marker
# Category goes into "Other"
assert len(groups) == 1
@@ -345,7 +328,7 @@ class TestParseGroupedReadme:
Done.
""")
groups, _ = parse_readme(readme)
groups = parse_readme(readme)
assert len(groups) == 2
assert groups[0]["name"] == "Other"
assert groups[0]["categories"][0]["name"] == "Orphan"
@@ -438,33 +421,11 @@ class TestParseSectionEntries:
Done.
""")
groups, _ = parse_readme(readme)
groups = parse_readme(readme)
cats = groups[0]["categories"]
# 2 main entries + 1 also_see = 3
assert cats[0]["entry_count"] == 3
def test_preview_first_four_names(self):
readme = textwrap.dedent("""\
# T
---
## Libs
- [alpha](https://x.com) - A.
- [beta](https://x.com) - B.
- [gamma](https://x.com) - C.
- [delta](https://x.com) - D.
- [epsilon](https://x.com) - E.
# Contributing
Done.
""")
groups, _ = parse_readme(readme)
cats = groups[0]["categories"]
assert cats[0]["preview"] == "alpha, beta, gamma, delta"
def test_description_html_escapes_xss(self):
nodes = _content_nodes('- [lib](https://x.com) - A <script>alert(1)</script> lib.\n')
entries = _parse_section_entries(nodes)
@@ -472,58 +433,13 @@ class TestParseSectionEntries:
assert "&lt;script&gt;" in entries[0]["description"]
class TestRenderSectionHtml:
def test_basic_entry(self):
nodes = _content_nodes("- [django](https://example.com) - A web framework.\n")
html = _render_section_html(nodes)
assert 'class="entry"' in html
assert 'href="https://example.com"' in html
assert "django" in html
assert "A web framework." in html
def test_subcategory_label(self):
nodes = _content_nodes(
"- Synchronous\n - [django](https://x.com) - Framework.\n"
)
html = _render_section_html(nodes)
assert 'class="subcat"' in html
assert "Synchronous" in html
assert 'class="entry"' in html
def test_sub_entry(self):
nodes = _content_nodes(
"- [django](https://x.com) - Framework.\n"
" - [awesome-django](https://y.com)\n"
)
html = _render_section_html(nodes)
assert 'class="entry-sub"' in html
assert "awesome-django" in html
def test_link_only_entry(self):
nodes = _content_nodes("- [tool](https://x.com)\n")
html = _render_section_html(nodes)
assert 'class="entry"' in html
assert 'href="https://x.com"' in html
assert "tool" in html
def test_xss_escaped_in_name(self):
nodes = _content_nodes('- [<img onerror=alert(1)>](https://x.com) - Bad.\n')
html = _render_section_html(nodes)
assert "onerror" not in html or "&" in html
def test_xss_escaped_in_subcat(self):
nodes = _content_nodes("- <script>alert(1)</script>\n")
html = _render_section_html(nodes)
assert "<script>" not in html
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
self.groups, self.resources = parse_readme(self.readme_text)
self.groups = parse_readme(self.readme_text)
self.cats = [c for g in self.groups for c in g["categories"]]
def test_at_least_11_groups(self):
@@ -535,13 +451,8 @@ class TestParseRealReadme:
def test_at_least_69_categories(self):
assert len(self.cats) >= 69
def test_resources_has_newsletters_and_podcasts(self):
names = [r["name"] for r in self.resources]
assert "Newsletters" in names
assert "Podcasts" in names
def test_contributing_not_in_results(self):
all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
all_names = [c["name"] for c in self.cats]
assert "Contributing" not in all_names
def test_first_category_is_ai_and_agents(self):
@@ -560,18 +471,6 @@ class TestParseRealReadme:
for cat in self.cats:
assert cat["entry_count"] > 0, f"{cat['name']} has 0 entries"
def test_previews_nonempty(self):
for cat in self.cats:
assert cat["preview"], f"{cat['name']} has empty preview"
def test_content_html_nonempty(self):
for cat in self.cats:
assert cat["content_html"], f"{cat['name']} has empty content_html"
def test_algorithms_has_subcategories(self):
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
assert 'class="subcat"' in algos["content_html"]
def test_async_has_also_see(self):
async_cat = next(c for c in self.cats if c["name"] == "Asynchronous Programming")
asyncio_entry = next(e for e in async_cat["entries"] if e["name"] == "asyncio")