refactor: extract parsing logic from build.py into readme_parser module

slugify, parse_readme, count_entries, extract_preview, render_content_html,
and related helpers are moved to a dedicated readme_parser module.
build.py now imports from readme_parser rather than defining these inline.
Tests for the removed functions are dropped from test_build.py since they
now live with the module they test.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen
2026-03-18 17:27:14 +08:00
parent 03ac212880
commit 0f374970dd
2 changed files with 20 additions and 532 deletions

View File

@@ -7,21 +7,15 @@ import sys
import textwrap
from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from build import (
build,
count_entries,
extract_github_repo,
extract_preview,
group_categories,
load_stars,
parse_readme,
render_content_html,
slugify,
sort_entries,
)
from readme_parser import slugify
# ---------------------------------------------------------------------------
# slugify
@@ -51,244 +45,6 @@ class TestSlugify:
assert slugify(" Date and Time ") == "date-and-time"
# ---------------------------------------------------------------------------
# count_entries
# ---------------------------------------------------------------------------
class TestCountEntries:
def test_counts_dash_entries(self):
assert count_entries("- [a](url) - Desc.\n- [b](url) - Desc.") == 2
def test_counts_star_entries(self):
assert count_entries("* [a](url) - Desc.") == 1
def test_ignores_non_entries(self):
assert count_entries("Some text\n- [a](url) - Desc.\nMore text") == 1
def test_counts_indented_entries(self):
assert count_entries(" - [a](url) - Desc.") == 1
def test_empty_content(self):
assert count_entries("") == 0
# ---------------------------------------------------------------------------
# extract_preview
# ---------------------------------------------------------------------------
class TestExtractPreview:
def test_basic(self):
content = "* [alpha](url) - A.\n* [beta](url) - B.\n* [gamma](url) - C."
assert extract_preview(content) == "alpha, beta, gamma"
def test_max_four(self):
content = "\n".join(f"* [lib{i}](url) - Desc." for i in range(10))
assert extract_preview(content) == "lib0, lib1, lib2, lib3"
def test_empty(self):
assert extract_preview("") == ""
def test_skips_subcategory_labels(self):
content = "* Synchronous\n* [django](url) - Framework.\n* [flask](url) - Micro."
assert extract_preview(content) == "django, flask"
# ---------------------------------------------------------------------------
# render_content_html
# ---------------------------------------------------------------------------
class TestRenderContentHtml:
def test_basic_entry(self):
content = "* [django](https://example.com) - A web framework."
html = render_content_html(content)
assert 'href="https://example.com"' in html
assert "django" in html
assert "A web framework." in html
assert 'class="entry"' in html
def test_subcategory_label(self):
content = "* Synchronous\n* [django](https://x.com) - Framework."
html = render_content_html(content)
assert 'class="subcat"' in html
assert "Synchronous" in html
def test_sub_entry(self):
content = "* [django](https://x.com) - Framework.\n * [awesome-django](https://y.com)"
html = render_content_html(content)
assert 'class="entry-sub"' in html
assert "awesome-django" in html
def test_link_only_entry(self):
content = "* [tool](https://x.com)"
html = render_content_html(content)
assert 'href="https://x.com"' in html
assert "tool" in html
# ---------------------------------------------------------------------------
# parse_readme
# ---------------------------------------------------------------------------
MINIMAL_README = textwrap.dedent("""\
# Awesome Python
Some intro text.
---
## Alpha
_Libraries for alpha stuff._
- [lib-a](https://example.com/a) - Does A.
- [lib-b](https://example.com/b) - Does B.
## Beta
_Tools for beta._
- [lib-c](https://example.com/c) - Does C.
# Resources
Where to discover resources.
## Newsletters
- [News One](https://example.com/n1)
- [News Two](https://example.com/n2)
## Podcasts
- [Pod One](https://example.com/p1)
# Contributing
Please contribute!
""")
class TestParseReadme:
def test_category_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(cats) == 2
def test_resource_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(resources) == 2
def test_category_names(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["name"] == "Alpha"
assert cats[1]["name"] == "Beta"
def test_category_slugs(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["slug"] == "alpha"
assert cats[1]["slug"] == "beta"
def test_category_description(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_category_content_has_entries(self):
cats, _ = parse_readme(MINIMAL_README)
assert "lib-a" in cats[0]["content"]
assert "lib-b" in cats[0]["content"]
def test_resources_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_resources_content(self):
_, resources = parse_readme(MINIMAL_README)
assert "News One" in resources[0]["content"]
assert "Pod One" in resources[1]["content"]
def test_contributing_skipped(self):
cats, resources = parse_readme(MINIMAL_README)
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
assert "Contributing" not in all_names
def test_no_separator(self):
cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
assert cats == []
assert resources == []
def test_no_description(self):
readme = textwrap.dedent("""\
# Title
---
## NullDesc
- [item](https://x.com) - Thing.
# Resources
## Tips
- [tip](https://x.com)
# Contributing
Done.
""")
cats, resources = parse_readme(readme)
assert cats[0]["description"] == ""
assert "item" in cats[0]["content"]
# ---------------------------------------------------------------------------
# parse_readme on real README
# ---------------------------------------------------------------------------
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
self.cats, self.resources = parse_readme(self.readme_text)
def test_at_least_83_categories(self):
assert len(self.cats) >= 83
def test_resources_has_newsletters_and_podcasts(self):
names = [r["name"] for r in self.resources]
assert "Newsletters" in names
assert "Podcasts" in names
def test_contributing_not_in_results(self):
all_names = [c["name"] for c in self.cats] + [
r["name"] for r in self.resources
]
assert "Contributing" not in all_names
def test_first_category_is_admin_panels(self):
assert self.cats[0]["name"] == "Admin Panels"
assert self.cats[0]["slug"] == "admin-panels"
def test_last_category_is_wsgi_servers(self):
assert self.cats[-1]["name"] == "WSGI Servers"
assert self.cats[-1]["slug"] == "wsgi-servers"
def test_restful_api_slug(self):
slugs = [c["slug"] for c in self.cats]
assert "restful-api" in slugs
def test_descriptions_extracted(self):
admin = self.cats[0]
assert admin["description"] == "Libraries for administrative interfaces."
# ---------------------------------------------------------------------------
# group_categories
# ---------------------------------------------------------------------------
@@ -318,26 +74,6 @@ class TestGroupCategories:
assert "Resources" in group_names
# ---------------------------------------------------------------------------
# render_markdown (kept for compatibility)
# ---------------------------------------------------------------------------
class TestRenderMarkdown:
def test_renders_link_list(self):
from build import render_markdown
html = render_markdown("- [lib](https://example.com) - Does stuff.")
assert "<li>" in html
assert '<a href="https://example.com">lib</a>' in html
def test_renders_plain_text(self):
from build import render_markdown
html = render_markdown("Hello world")
assert "<p>Hello world</p>" in html
# ---------------------------------------------------------------------------
# build (integration)
# ---------------------------------------------------------------------------