refactor: parse thematic groups from README bold markers instead of hardcoding them

The website builder previously relied on a hardcoded SECTION_GROUPS list in build.py to organize categories into thematic groups. This was fragile: any rename or addition to README.md required a matching code change. Replace this with a parser-driven approach: - readme_parser.py now detects bold-only paragraphs (**Group Name**) as group boundary markers and groups H2 categories beneath them into ParsedGroup structs. - build.py drops SECTION_GROUPS entirely; group_categories() now just passes parsed groups through and appends the Resources group. - sort.py is removed as it relied on the old flat section model. - Tests updated throughout to reflect the new (groups, resources) return shape and to cover the new grouping logic. Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-23 13:56:43 +08:00 · 2026-03-20 18:43:09 +08:00
parent fd9b2665ed
commit 4322026817
5 changed files with 346 additions and 324 deletions
--- a/sort.py
+++ b/sort.py
@@ -1,83 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-"""
-    The approach taken is explained below. I decided to do it simply.
-    Initially I was considering parsing the data into some sort of
-    structure and then generating an appropriate README. I am still
-    considering doing it - but for now this should work. The only issue
-    I see is that it only sorts the entries at the lowest level, and that
-    the order of the top-level contents do not match the order of the actual
-    entries.
-
-    This could be extended by having nested blocks, sorting them recursively
-    and flattening the end structure into a list of lines. Revision 2 maybe ^.^.
-"""
-
-def sort_blocks():
-    # First, we load the current README into memory
-    with open('README.md', 'r') as read_me_file:
-        read_me = read_me_file.read()
-
-    # Separating the 'table of contents' from the contents (blocks)
-    table_of_contents = ''.join(read_me.split('- - -')[0])
-    blocks = ''.join(read_me.split('- - -')[1]).split('\n# ')
-    for i in range(len(blocks)):
-        if i == 0:
-            blocks[i] = blocks[i] + '\n'
-        else:
-            blocks[i] = '# ' + blocks[i] + '\n'
-
-    # Sorting the libraries
-    inner_blocks = sorted(blocks[0].split('##'))
-    for i in range(1, len(inner_blocks)):
-        if inner_blocks[i][0] != '#':
-            inner_blocks[i] = '##' + inner_blocks[i]
-    inner_blocks = ''.join(inner_blocks)
-
-    # Replacing the non-sorted libraries by the sorted ones and gathering all at the final_README file
-    blocks[0] = inner_blocks
-    final_README = table_of_contents + '- - -' + ''.join(blocks)
-
-    with open('README.md', 'w+') as sorted_file:
-        sorted_file.write(final_README)
-
-def main():
-    # First, we load the current README into memory as an array of lines
-    with open('README.md', 'r') as read_me_file:
-        read_me = read_me_file.readlines()
-
-    # Then we cluster the lines together as blocks
-    # Each block represents a collection of lines that should be sorted
-    # This was done by assuming only links ([...](...)) are meant to be sorted
-    # Clustering is done by indentation
-    blocks = []
-    last_indent = None
-    for line in read_me:
-        s_line = line.lstrip()
-        indent = len(line) - len(s_line)
-
-        if any([s_line.startswith(s) for s in ['* [', '- [']]):
-            if indent == last_indent:
-                blocks[-1].append(line)
-            else:
-                blocks.append([line])
-            last_indent = indent
-        else:
-            blocks.append([line])
-            last_indent = None
-
-    with open('README.md', 'w+') as sorted_file:
-        # Then all of the blocks are sorted individually
-        blocks = [
-            ''.join(sorted(block, key=str.lower)) for block in blocks
-        ]
-        # And the result is written back to README.md
-        sorted_file.write(''.join(blocks))
-
-    # Then we call the sorting method
-    sort_blocks()
-
-
-if __name__ == "__main__":
-    main()
--- a/website/build.py
+++ b/website/build.py
@@ -10,179 +10,20 @@ from typing import TypedDict
 from jinja2 import Environment, FileSystemLoader
 from readme_parser import parse_readme, slugify

-# Thematic grouping of categories. Each category name must match exactly
-# as it appears in README.md (the ## heading text).
-SECTION_GROUPS: list[tuple[str, list[str]]] = [
-    (
-        "Web & API",
-        [
-            "Admin Panels",
-            "CMS",
-            "Email",
-            "Static Site Generator",
-            "URL Manipulation",
-            "Web Frameworks",
-            "RESTful API",
-            "GraphQL",
-            "WebSocket",
-            "ASGI Servers",
-            "WSGI Servers",
-            "HTTP Clients",
-            "Template Engine",
-            "Web Asset Management",
-            "Web Content Extracting",
-            "Web Crawling",
-        ],
-    ),
-    (
-        "AI & ML",
-        [
-            "AI and Agents",
-            "Machine Learning",
-            "Deep Learning",
-            "Computer Vision",
-            "Natural Language Processing",
-            "Recommender Systems",
-            "Robotics",
-        ],
-    ),
-    (
-        "Data & Science",
-        [
-            "Data Analysis",
-            "Data Validation",
-            "Data Visualization",
-            "Geolocation",
-            "Science",
-            "Quantum Computing",
-        ],
-    ),
-    (
-        "DevOps & Infrastructure",
-        [
-            "DevOps Tools",
-            "Distributed Computing",
-            "Task Queues",
-            "Job Scheduler",
-            "Serverless Frameworks",
-            "Logging",
-            "Processes",
-            "Shell",
-            "Network Virtualization",
-            "RPC Servers",
-        ],
-    ),
-    (
-        "Database & Storage",
-        [
-            "Database",
-            "Database Drivers",
-            "ORM",
-            "Caching",
-            "Search",
-            "Serialization",
-        ],
-    ),
-    (
-        "Development Tools",
-        [
-            "Testing",
-            "Debugging Tools",
-            "Code Analysis",
-            "Build Tools",
-            "Algorithms and Design Patterns",
-            "Refactoring",
-            "Documentation",
-            "Editor Plugins and IDEs",
-            "Interactive Interpreter",
-        ],
-    ),
-    (
-        "CLI & GUI",
-        [
-            "Command-line Interface Development",
-            "Command-line Tools",
-            "GUI Development",
-        ],
-    ),
-    (
-        "Content & Media",
-        [
-            "Audio",
-            "Video",
-            "Game Development",
-            "Image Processing",
-            "Internationalization",
-            "HTML Manipulation",
-            "Text Processing",
-            "Specific Formats Processing",
-            "File Manipulation",
-            "Downloader",
-        ],
-    ),
-    (
-        "System & Runtime",
-        [
-            "Asynchronous Programming",
-            "Environment Management",
-            "Package Management",
-            "Package Repositories",
-            "Date and Time",
-            "Distribution",
-            "Hardware",
-            "Implementations",
-            "Microsoft Windows",
-            "Built-in Classes Enhancement",
-            "Functional Programming",
-            "Configuration Files",
-        ],
-    ),
-    (
-        "Security & Auth",
-        [
-            "Authentication",
-            "Cryptography",
-            "Penetration Testing",
-            "Permissions",
-        ],
-    ),
-    ("Resources", []),  # Filled dynamically from parsed resources
-]
-

 def group_categories(
-    categories: list[dict],
+    parsed_groups: list[dict],
    resources: list[dict],
 ) -> list[dict]:
-    """Organize categories and resources into thematic section groups."""
-    cat_by_name = {c["name"]: c for c in categories}
-    groups = []
-    grouped_names: set[str] = set()
+    """Combine parsed groups with resources for template rendering."""
+    groups = list(parsed_groups)

-    for group_name, cat_names in SECTION_GROUPS:
-        grouped_names.update(cat_names)
-        if group_name == "Resources":
-            group_cats = list(resources)
-        else:
-            group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
-
-        if group_cats:
-            groups.append(
-                {
-                    "name": group_name,
-                    "slug": slugify(group_name),
-                    "categories": group_cats,
-                }
-            )
-
-    # Any categories not in a group go into "Other"
-    ungrouped = [c for c in categories if c["name"] not in grouped_names]
-    if ungrouped:
+    if resources:
        groups.append(
            {
-                "name": "Other",
-                "slug": "other",
-                "categories": ungrouped,
+                "name": "Resources",
+                "slug": slugify("Resources"),
+                "categories": list(resources),
            }
        )

@@ -295,11 +136,11 @@ def build(repo_root: str) -> None:
            subtitle = stripped
            break

-    categories, resources = parse_readme(readme_text)
-    # All fields pre-computed: entry_count, content_html, preview, description
+    parsed_groups, resources = parse_readme(readme_text)

+    categories = [cat for g in parsed_groups for cat in g["categories"]]
    total_entries = sum(c["entry_count"] for c in categories)
-    groups = group_categories(categories, resources)
+    groups = group_categories(parsed_groups, resources)
    entries = extract_entries(categories, groups)

    stars_data = load_stars(website / "data" / "github_stars.json")
@@ -344,7 +185,7 @@ def build(repo_root: str) -> None:

    shutil.copy(repo / "README.md", site_dir / "llms.txt")

-    print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
+    print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories + {len(resources)} resources")
    print(f"Total entries: {total_entries}")
    print(f"Output: {site_dir}")

--- a/website/readme_parser.py
+++ b/website/readme_parser.py
@@ -32,6 +32,12 @@ class ParsedSection(TypedDict):
    content_html: str  # rendered HTML, properly escaped


+class ParsedGroup(TypedDict):
+    name: str
+    slug: str
+    categories: list[ParsedSection]
+
+
 # --- Slugify ----------------------------------------------------------------

 _SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
@@ -305,6 +311,25 @@ def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
 # --- Section splitting -------------------------------------------------------


+def _build_section(name: str, body: list[SyntaxTreeNode]) -> ParsedSection:
+    """Build a ParsedSection from a heading name and its body nodes."""
+    desc = _extract_description(body)
+    content_nodes = body[1:] if desc else body
+    entries = _parse_section_entries(content_nodes)
+    entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
+    preview = ", ".join(e["name"] for e in entries[:4])
+    content_html = _render_section_html(content_nodes)
+    return ParsedSection(
+        name=name,
+        slug=slugify(name),
+        description=desc,
+        entries=entries,
+        entry_count=entry_count,
+        preview=preview,
+        content_html=content_html,
+    )
+
+
 def _group_by_h2(
    nodes: list[SyntaxTreeNode],
 ) -> list[ParsedSection]:
@@ -317,22 +342,7 @@ def _group_by_h2(
        nonlocal current_name
        if current_name is None:
            return
-        desc = _extract_description(current_body)
-        content_nodes = current_body[1:] if desc else current_body
-        entries = _parse_section_entries(content_nodes)
-        entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
-        preview = ", ".join(e["name"] for e in entries[:4])
-        content_html = _render_section_html(content_nodes)
-
-        sections.append(ParsedSection(
-            name=current_name,
-            slug=slugify(current_name),
-            description=desc,
-            entries=entries,
-            entry_count=entry_count,
-            preview=preview,
-            content_html=content_html,
-        ))
+        sections.append(_build_section(current_name, current_body))
        current_name = None

    for node in nodes:
@@ -347,10 +357,86 @@ def _group_by_h2(
    return sections


-def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
-    """Parse README.md text into categories and resources.
+def _is_bold_marker(node: SyntaxTreeNode) -> str | None:
+    """Detect a bold-only paragraph used as a group marker.

-    Returns (categories, resources) where each is a list of ParsedSection dicts.
+    Pattern: a paragraph whose only content is **Group Name** (possibly
+    surrounded by empty text nodes in the AST).
+    Returns the group name text, or None if not a group marker.
+    """
+    if node.type != "paragraph":
+        return None
+    for child in node.children:
+        if child.type != "inline":
+            continue
+        # Filter out empty text nodes that markdown-it inserts around strong
+        meaningful = [c for c in child.children if not (c.type == "text" and c.content == "")]
+        if len(meaningful) == 1 and meaningful[0].type == "strong":
+            return render_inline_text(meaningful[0].children)
+    return None
+
+
+def _parse_grouped_sections(
+    nodes: list[SyntaxTreeNode],
+) -> list[ParsedGroup]:
+    """Parse nodes into groups of categories using bold markers as group boundaries.
+
+    Bold-only paragraphs (**Group Name**) delimit groups. H2 headings under each
+    bold marker become categories within that group. Categories appearing before
+    any bold marker go into an "Other" group.
+    """
+    groups: list[ParsedGroup] = []
+    current_group_name: str | None = None
+    current_group_cats: list[ParsedSection] = []
+    current_cat_name: str | None = None
+    current_cat_body: list[SyntaxTreeNode] = []
+
+    def flush_cat() -> None:
+        nonlocal current_cat_name
+        if current_cat_name is None:
+            return
+        current_group_cats.append(_build_section(current_cat_name, current_cat_body))
+        current_cat_name = None
+
+    def flush_group() -> None:
+        nonlocal current_group_name, current_group_cats
+        if not current_group_cats:
+            current_group_name = None
+            current_group_cats = []
+            return
+        name = current_group_name or "Other"
+        groups.append(ParsedGroup(
+            name=name,
+            slug=slugify(name),
+            categories=list(current_group_cats),
+        ))
+        current_group_name = None
+        current_group_cats = []
+
+    for node in nodes:
+        bold_name = _is_bold_marker(node)
+        if bold_name is not None:
+            flush_cat()
+            flush_group()
+            current_group_name = bold_name
+            current_cat_body = []
+        elif node.type == "heading" and node.tag == "h2":
+            flush_cat()
+            current_cat_name = _heading_text(node)
+            current_cat_body = []
+        elif current_cat_name is not None:
+            current_cat_body.append(node)
+
+    flush_cat()
+    flush_group()
+    return groups
+
+
+def parse_readme(text: str) -> tuple[list[ParsedGroup], list[ParsedSection]]:
+    """Parse README.md text into grouped categories and resources.
+
+    Returns (groups, resources) where groups is a list of ParsedGroup dicts
+    containing nested categories, and resources is a flat list of ParsedSection.
    """
    md = MarkdownIt("commonmark")
    tokens = md.parse(text)
@@ -382,7 +468,7 @@ def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
        res_end = contributing_idx or len(children)
        res_nodes = children[resources_idx + 1 : res_end]

-    categories = _group_by_h2(cat_nodes)
+    groups = _parse_grouped_sections(cat_nodes)
    resources = _group_by_h2(res_nodes)

-    return categories, resources
+    return groups, resources
--- a/website/tests/test_build.py
+++ b/website/tests/test_build.py
@@ -48,28 +48,33 @@ class TestSlugify:


 class TestGroupCategories:
-    def test_groups_known_categories(self):
-        cats = [
-            {"name": "Web Frameworks", "slug": "web-frameworks"},
-            {"name": "Testing", "slug": "testing"},
+    def test_appends_resources(self):
+        parsed_groups = [
+            {"name": "G1", "slug": "g1", "categories": [{"name": "Cat1"}]},
        ]
-        groups = group_categories(cats, [])
-        group_names = [g["name"] for g in groups]
-        assert "Web & API" in group_names
-        assert "Development Tools" in group_names
-
-    def test_ungrouped_go_to_other(self):
-        cats = [{"name": "Unknown Category", "slug": "unknown-category"}]
-        groups = group_categories(cats, [])
-        group_names = [g["name"] for g in groups]
-        assert "Other" in group_names
-
-    def test_resources_grouped(self):
        resources = [{"name": "Newsletters", "slug": "newsletters"}]
-        groups = group_categories([], resources)
+        groups = group_categories(parsed_groups, resources)
        group_names = [g["name"] for g in groups]
+        assert "G1" in group_names
        assert "Resources" in group_names

+    def test_no_resources_no_extra_group(self):
+        parsed_groups = [
+            {"name": "G1", "slug": "g1", "categories": [{"name": "Cat1"}]},
+        ]
+        groups = group_categories(parsed_groups, [])
+        assert len(groups) == 1
+        assert groups[0]["name"] == "G1"
+
+    def test_preserves_group_order(self):
+        parsed_groups = [
+            {"name": "Second", "slug": "second", "categories": [{"name": "C2"}]},
+            {"name": "First", "slug": "first", "categories": [{"name": "C1"}]},
+        ]
+        groups = group_categories(parsed_groups, [])
+        assert groups[0]["name"] == "Second"
+        assert groups[1]["name"] == "First"
+

 # ---------------------------------------------------------------------------
 # build (integration)
@@ -114,6 +119,8 @@ class TestBuild:

            ---

+            **Tools**
+
            ## Widgets

            _Widget libraries._
@@ -176,10 +183,14 @@ class TestBuild:

            ---

+            **Group A**
+
            ## Alpha

            - [a](https://x.com) - A.

+            **Group B**
+
            ## Beta

            - [b](https://x.com) - B.
@@ -194,6 +205,8 @@ class TestBuild:
        index_html = (tmp_path / "website" / "output" / "index.html").read_text()
        assert "Alpha" in index_html
        assert "Beta" in index_html
+        assert "Group A" in index_html
+        assert "Group B" in index_html

    def test_index_contains_preview_text(self, tmp_path):
        readme = textwrap.dedent("""\
--- a/website/tests/test_readme_parser.py
+++ b/website/tests/test_readme_parser.py
@@ -115,27 +115,74 @@ MINIMAL_README = textwrap.dedent("""\
 """)


+GROUPED_README = textwrap.dedent("""\
+    # Awesome Python
+
+    Some intro text.
+
+    ---
+
+    **Group One**
+
+    ## Alpha
+
+    _Libraries for alpha stuff._
+
+    - [lib-a](https://example.com/a) - Does A.
+    - [lib-b](https://example.com/b) - Does B.
+
+    **Group Two**
+
+    ## Beta
+
+    _Tools for beta._
+
+    - [lib-c](https://example.com/c) - Does C.
+
+    ## Gamma
+
+    - [lib-d](https://example.com/d) - Does D.
+
+    # Resources
+
+    Where to discover resources.
+
+    ## Newsletters
+
+    - [News One](https://example.com/n1)
+
+    # Contributing
+
+    Please contribute!
+""")
+
+
 class TestParseReadmeSections:
-    def test_category_count(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        assert len(cats) == 2
+    def test_ungrouped_categories_go_to_other(self):
+        groups, resources = parse_readme(MINIMAL_README)
+        assert len(groups) == 1
+        assert groups[0]["name"] == "Other"
+        assert len(groups[0]["categories"]) == 2

-    def test_resource_count(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        assert len(resources) == 2
-
-    def test_category_names(self):
-        cats, _ = parse_readme(MINIMAL_README)
+    def test_ungrouped_category_names(self):
+        groups, _ = parse_readme(MINIMAL_README)
+        cats = groups[0]["categories"]
        assert cats[0]["name"] == "Alpha"
        assert cats[1]["name"] == "Beta"

+    def test_resource_count(self):
+        _, resources = parse_readme(MINIMAL_README)
+        assert len(resources) == 2
+
    def test_category_slugs(self):
-        cats, _ = parse_readme(MINIMAL_README)
+        groups, _ = parse_readme(MINIMAL_README)
+        cats = groups[0]["categories"]
        assert cats[0]["slug"] == "alpha"
        assert cats[1]["slug"] == "beta"

    def test_category_description(self):
-        cats, _ = parse_readme(MINIMAL_README)
+        groups, _ = parse_readme(MINIMAL_README)
+        cats = groups[0]["categories"]
        assert cats[0]["description"] == "Libraries for alpha stuff."
        assert cats[1]["description"] == "Tools for beta."

@@ -145,13 +192,16 @@ class TestParseReadmeSections:
        assert resources[1]["name"] == "Podcasts"

    def test_contributing_skipped(self):
-        cats, resources = parse_readme(MINIMAL_README)
-        all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
+        groups, resources = parse_readme(MINIMAL_README)
+        all_names = []
+        for g in groups:
+            all_names.extend(c["name"] for c in g["categories"])
+        all_names.extend(r["name"] for r in resources)
        assert "Contributing" not in all_names

    def test_no_separator(self):
-        cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
-        assert cats == []
+        groups, resources = parse_readme("# Just a heading\n\nSome text.\n")
+        assert groups == []
        assert resources == []

    def test_no_description(self):
@@ -174,7 +224,8 @@ class TestParseReadmeSections:

            Done.
        """)
-        cats, resources = parse_readme(readme)
+        groups, resources = parse_readme(readme)
+        cats = groups[0]["categories"]
        assert cats[0]["description"] == ""
        assert cats[0]["entries"][0]["name"] == "item"

@@ -194,10 +245,114 @@ class TestParseReadmeSections:

            Done.
        """)
-        cats, _ = parse_readme(readme)
+        groups, _ = parse_readme(readme)
+        cats = groups[0]["categories"]
        assert cats[0]["description"] == "Algorithms. Also see awesome-algos."


+class TestParseGroupedReadme:
+    def test_group_count(self):
+        groups, _ = parse_readme(GROUPED_README)
+        assert len(groups) == 2
+
+    def test_group_names(self):
+        groups, _ = parse_readme(GROUPED_README)
+        assert groups[0]["name"] == "Group One"
+        assert groups[1]["name"] == "Group Two"
+
+    def test_group_slugs(self):
+        groups, _ = parse_readme(GROUPED_README)
+        assert groups[0]["slug"] == "group-one"
+        assert groups[1]["slug"] == "group-two"
+
+    def test_group_one_has_one_category(self):
+        groups, _ = parse_readme(GROUPED_README)
+        assert len(groups[0]["categories"]) == 1
+        assert groups[0]["categories"][0]["name"] == "Alpha"
+
+    def test_group_two_has_two_categories(self):
+        groups, _ = parse_readme(GROUPED_README)
+        assert len(groups[1]["categories"]) == 2
+        assert groups[1]["categories"][0]["name"] == "Beta"
+        assert groups[1]["categories"][1]["name"] == "Gamma"
+
+    def test_resources_still_parsed(self):
+        _, resources = parse_readme(GROUPED_README)
+        assert len(resources) == 1
+        assert resources[0]["name"] == "Newsletters"
+
+    def test_empty_group_skipped(self):
+        readme = textwrap.dedent("""\
+            # T
+
+            ---
+
+            **Empty**
+
+            **HasCats**
+
+            ## Cat
+
+            - [x](https://x.com) - X.
+
+            # Contributing
+
+            Done.
+        """)
+        groups, _ = parse_readme(readme)
+        assert len(groups) == 1
+        assert groups[0]["name"] == "HasCats"
+
+    def test_bold_with_extra_text_not_group_marker(self):
+        readme = textwrap.dedent("""\
+            # T
+
+            ---
+
+            **Note:** This is not a group marker.
+
+            ## Cat
+
+            - [x](https://x.com) - X.
+
+            # Contributing
+
+            Done.
+        """)
+        groups, _ = parse_readme(readme)
+        # "Note:" has text after the strong node, so it's not a group marker
+        # Category goes into "Other"
+        assert len(groups) == 1
+        assert groups[0]["name"] == "Other"
+
+    def test_categories_before_any_group_marker(self):
+        readme = textwrap.dedent("""\
+            # T
+
+            ---
+
+            ## Orphan
+
+            - [x](https://x.com) - X.
+
+            **A Group**
+
+            ## Grouped
+
+            - [y](https://x.com) - Y.
+
+            # Contributing
+
+            Done.
+        """)
+        groups, _ = parse_readme(readme)
+        assert len(groups) == 2
+        assert groups[0]["name"] == "Other"
+        assert groups[0]["categories"][0]["name"] == "Orphan"
+        assert groups[1]["name"] == "A Group"
+        assert groups[1]["categories"][0]["name"] == "Grouped"
+
+
 def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
    """Helper: parse markdown and return all block nodes."""
    md = MarkdownIt("commonmark")
@@ -283,7 +438,8 @@ class TestParseSectionEntries:

            Done.
        """)
-        cats, _ = parse_readme(readme)
+        groups, _ = parse_readme(readme)
+        cats = groups[0]["categories"]
        # 2 main entries + 1 also_see = 3
        assert cats[0]["entry_count"] == 3

@@ -305,7 +461,8 @@ class TestParseSectionEntries:

            Done.
        """)
-        cats, _ = parse_readme(readme)
+        groups, _ = parse_readme(readme)
+        cats = groups[0]["categories"]
        assert cats[0]["preview"] == "alpha, beta, gamma, delta"

    def test_description_html_escapes_xss(self):
@@ -366,10 +523,17 @@ class TestParseRealReadme:
        readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
        with open(readme_path, encoding="utf-8") as f:
            self.readme_text = f.read()
-        self.cats, self.resources = parse_readme(self.readme_text)
+        self.groups, self.resources = parse_readme(self.readme_text)
+        self.cats = [c for g in self.groups for c in g["categories"]]

-    def test_at_least_83_categories(self):
-        assert len(self.cats) >= 83
+    def test_at_least_11_groups(self):
+        assert len(self.groups) >= 11
+
+    def test_first_group_is_ai_ml(self):
+        assert self.groups[0]["name"] == "AI & ML"
+
+    def test_at_least_76_categories(self):
+        assert len(self.cats) >= 76

    def test_resources_has_newsletters_and_podcasts(self):
        names = [r["name"] for r in self.resources]
@@ -380,21 +544,17 @@ class TestParseRealReadme:
        all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
        assert "Contributing" not in all_names

-    def test_first_category_is_admin_panels(self):
-        assert self.cats[0]["name"] == "Admin Panels"
-        assert self.cats[0]["slug"] == "admin-panels"
+    def test_first_category_is_ai_and_agents(self):
+        assert self.cats[0]["name"] == "AI and Agents"
+        assert self.cats[0]["slug"] == "ai-and-agents"

-    def test_last_category_is_wsgi_servers(self):
-        assert self.cats[-1]["name"] == "WSGI Servers"
-        assert self.cats[-1]["slug"] == "wsgi-servers"
-
-    def test_restful_api_slug(self):
+    def test_web_apis_slug(self):
        slugs = [c["slug"] for c in self.cats]
-        assert "restful-api" in slugs
+        assert "web-apis" in slugs

    def test_descriptions_extracted(self):
-        admin = self.cats[0]
-        assert admin["description"] == "Libraries for administrative interfaces."
+        ai = next(c for c in self.cats if c["name"] == "AI and Agents")
+        assert "AI applications" in ai["description"]

    def test_entry_counts_nonzero(self):
        for cat in self.cats:
@@ -422,3 +582,8 @@ class TestParseRealReadme:
        algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
        assert "awesome-algorithms" in algos["description"]
        assert "https://" not in algos["description"]
+
+    def test_miscellaneous_in_own_group(self):
+        misc_group = next((g for g in self.groups if g["name"] == "Miscellaneous"), None)
+        assert misc_group is not None
+        assert any(c["name"] == "Miscellaneous" for c in misc_group["categories"])