refactor: extract parsing logic from build.py into readme_parser module

slugify, parse_readme, count_entries, extract_preview, render_content_html, and related helpers are moved to a dedicated readme_parser module. build.py now imports from readme_parser rather than defining these inline. Tests for the removed functions are dropped from test_build.py since they now live with the module they test. Co-Authored-By: Claude <noreply@anthropic.com>
2026-05-09 22:53:49 +08:00 · 2026-03-18 17:27:14 +08:00
parent 03ac212880
commit 0f374970dd
2 changed files with 20 additions and 532 deletions
@@ -7,21 +7,15 @@ import sys
 import textwrap
 from pathlib import Path

-import pytest
-
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 from build import (
    build,
-    count_entries,
    extract_github_repo,
-    extract_preview,
    group_categories,
    load_stars,
-    parse_readme,
-    render_content_html,
-    slugify,
    sort_entries,
 )
+from readme_parser import slugify

 # ---------------------------------------------------------------------------
 # slugify
@@ -51,244 +45,6 @@ class TestSlugify:
        assert slugify("  Date  and  Time  ") == "date-and-time"


-# ---------------------------------------------------------------------------
-# count_entries
-# ---------------------------------------------------------------------------
-
-
-class TestCountEntries:
-    def test_counts_dash_entries(self):
-        assert count_entries("- [a](url) - Desc.\n- [b](url) - Desc.") == 2
-
-    def test_counts_star_entries(self):
-        assert count_entries("* [a](url) - Desc.") == 1
-
-    def test_ignores_non_entries(self):
-        assert count_entries("Some text\n- [a](url) - Desc.\nMore text") == 1
-
-    def test_counts_indented_entries(self):
-        assert count_entries("    - [a](url) - Desc.") == 1
-
-    def test_empty_content(self):
-        assert count_entries("") == 0
-
-
-# ---------------------------------------------------------------------------
-# extract_preview
-# ---------------------------------------------------------------------------
-
-
-class TestExtractPreview:
-    def test_basic(self):
-        content = "* [alpha](url) - A.\n* [beta](url) - B.\n* [gamma](url) - C."
-        assert extract_preview(content) == "alpha, beta, gamma"
-
-    def test_max_four(self):
-        content = "\n".join(f"* [lib{i}](url) - Desc." for i in range(10))
-        assert extract_preview(content) == "lib0, lib1, lib2, lib3"
-
-    def test_empty(self):
-        assert extract_preview("") == ""
-
-    def test_skips_subcategory_labels(self):
-        content = "* Synchronous\n* [django](url) - Framework.\n* [flask](url) - Micro."
-        assert extract_preview(content) == "django, flask"
-
-
-# ---------------------------------------------------------------------------
-# render_content_html
-# ---------------------------------------------------------------------------
-
-
-class TestRenderContentHtml:
-    def test_basic_entry(self):
-        content = "* [django](https://example.com) - A web framework."
-        html = render_content_html(content)
-        assert 'href="https://example.com"' in html
-        assert "django" in html
-        assert "A web framework." in html
-        assert 'class="entry"' in html
-
-    def test_subcategory_label(self):
-        content = "* Synchronous\n* [django](https://x.com) - Framework."
-        html = render_content_html(content)
-        assert 'class="subcat"' in html
-        assert "Synchronous" in html
-
-    def test_sub_entry(self):
-        content = "* [django](https://x.com) - Framework.\n    * [awesome-django](https://y.com)"
-        html = render_content_html(content)
-        assert 'class="entry-sub"' in html
-        assert "awesome-django" in html
-
-    def test_link_only_entry(self):
-        content = "* [tool](https://x.com)"
-        html = render_content_html(content)
-        assert 'href="https://x.com"' in html
-        assert "tool" in html
-
-
-# ---------------------------------------------------------------------------
-# parse_readme
-# ---------------------------------------------------------------------------
-
-MINIMAL_README = textwrap.dedent("""\
-    # Awesome Python
-
-    Some intro text.
-
-    ---
-
-    ## Alpha
-
-    _Libraries for alpha stuff._
-
-    - [lib-a](https://example.com/a) - Does A.
-    - [lib-b](https://example.com/b) - Does B.
-
-    ## Beta
-
-    _Tools for beta._
-
-    - [lib-c](https://example.com/c) - Does C.
-
-    # Resources
-
-    Where to discover resources.
-
-    ## Newsletters
-
-    - [News One](https://example.com/n1)
-    - [News Two](https://example.com/n2)
-
-    ## Podcasts
-
-    - [Pod One](https://example.com/p1)
-
-    # Contributing
-
-    Please contribute!
-""")
-
-
-class TestParseReadme:
-    def test_category_count(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        assert len(cats) == 2
-
-    def test_resource_count(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        assert len(resources) == 2
-
-    def test_category_names(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert cats[0]["name"] == "Alpha"
-        assert cats[1]["name"] == "Beta"
-
-    def test_category_slugs(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert cats[0]["slug"] == "alpha"
-        assert cats[1]["slug"] == "beta"
-
-    def test_category_description(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert cats[0]["description"] == "Libraries for alpha stuff."
-        assert cats[1]["description"] == "Tools for beta."
-
-    def test_category_content_has_entries(self):
-        cats, _ = parse_readme(MINIMAL_README)
-        assert "lib-a" in cats[0]["content"]
-        assert "lib-b" in cats[0]["content"]
-
-    def test_resources_names(self):
-        _, resources = parse_readme(MINIMAL_README)
-        assert resources[0]["name"] == "Newsletters"
-        assert resources[1]["name"] == "Podcasts"
-
-    def test_resources_content(self):
-        _, resources = parse_readme(MINIMAL_README)
-        assert "News One" in resources[0]["content"]
-        assert "Pod One" in resources[1]["content"]
-
-    def test_contributing_skipped(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
-        assert "Contributing" not in all_names
-
-    def test_no_separator(self):
-        cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
-        assert cats == []
-        assert resources == []
-
-    def test_no_description(self):
-        readme = textwrap.dedent("""\
-            # Title
-
-            ---
-
-            ## NullDesc
-
-            - [item](https://x.com) - Thing.
-
-            # Resources
-
-            ## Tips
-
-            - [tip](https://x.com)
-
-            # Contributing
-
-            Done.
-        """)
-        cats, resources = parse_readme(readme)
-        assert cats[0]["description"] == ""
-        assert "item" in cats[0]["content"]
-
-
-# ---------------------------------------------------------------------------
-# parse_readme on real README
-# ---------------------------------------------------------------------------
-
-
-class TestParseRealReadme:
-    @pytest.fixture(autouse=True)
-    def load_readme(self):
-        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
-        with open(readme_path, encoding="utf-8") as f:
-            self.readme_text = f.read()
-        self.cats, self.resources = parse_readme(self.readme_text)
-
-    def test_at_least_83_categories(self):
-        assert len(self.cats) >= 83
-
-    def test_resources_has_newsletters_and_podcasts(self):
-        names = [r["name"] for r in self.resources]
-        assert "Newsletters" in names
-        assert "Podcasts" in names
-
-    def test_contributing_not_in_results(self):
-        all_names = [c["name"] for c in self.cats] + [
-            r["name"] for r in self.resources
-        ]
-        assert "Contributing" not in all_names
-
-    def test_first_category_is_admin_panels(self):
-        assert self.cats[0]["name"] == "Admin Panels"
-        assert self.cats[0]["slug"] == "admin-panels"
-
-    def test_last_category_is_wsgi_servers(self):
-        assert self.cats[-1]["name"] == "WSGI Servers"
-        assert self.cats[-1]["slug"] == "wsgi-servers"
-
-    def test_restful_api_slug(self):
-        slugs = [c["slug"] for c in self.cats]
-        assert "restful-api" in slugs
-
-    def test_descriptions_extracted(self):
-        admin = self.cats[0]
-        assert admin["description"] == "Libraries for administrative interfaces."
-
-
 # ---------------------------------------------------------------------------
 # group_categories
 # ---------------------------------------------------------------------------
@@ -318,26 +74,6 @@ class TestGroupCategories:
        assert "Resources" in group_names


-# ---------------------------------------------------------------------------
-# render_markdown (kept for compatibility)
-# ---------------------------------------------------------------------------
-
-
-class TestRenderMarkdown:
-    def test_renders_link_list(self):
-        from build import render_markdown
-
-        html = render_markdown("- [lib](https://example.com) - Does stuff.")
-        assert "<li>" in html
-        assert '<a href="https://example.com">lib</a>' in html
-
-    def test_renders_plain_text(self):
-        from build import render_markdown
-
-        html = render_markdown("Hello world")
-        assert "<p>Hello world</p>" in html
-
-
 # ---------------------------------------------------------------------------
 # build (integration)
 # ---------------------------------------------------------------------------