refactor(parser): remove resources parsing, preview, and content_html fields

parse_readme now returns list[ParsedGroup] instead of a tuple. The resources section (Newsletters, Podcasts), preview string, and content_html rendering are no longer produced by the parser or consumed by the build. Removes _render_section_html, _group_by_h2, and the associated dead code and tests. Co-Authored-By: Claude <noreply@anthropic.com>
2026-05-21 13:28:31 +08:00 · 2026-03-23 01:43:19 +08:00
parent c5dd3060ef
commit 25a3f4d903
4 changed files with 37 additions and 253 deletions
@@ -29,8 +29,6 @@ class ParsedSection(TypedDict):
    description: str  # plain text, links resolved to text
    entries: list[ParsedEntry]
    entry_count: int
-    preview: str
-    content_html: str  # rendered HTML, properly escaped


 class ParsedGroup(TypedDict):
@@ -258,69 +256,6 @@ def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEn
    return entries


-# --- Content HTML rendering --------------------------------------------------
-
-
-def _render_bullet_list_html(
-    bullet_list: SyntaxTreeNode,
-    *,
-    is_sub: bool = False,
-) -> str:
-    """Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
-    out: list[str] = []
-
-    for list_item in bullet_list.children:
-        if list_item.type != "list_item":
-            continue
-
-        inline = _find_inline(list_item)
-        if inline is None:
-            continue
-
-        first_link = _find_first_link(inline)
-
-        if first_link is None or not _is_leading_link(inline, first_link):
-            # Subcategory label (plain text or text-before-link)
-            label = str(escape(render_inline_text(inline.children)))
-            out.append(f'<div class="subcat">{label}</div>')
-            nested = _find_child(list_item, "bullet_list")
-            if nested:
-                out.append(_render_bullet_list_html(nested, is_sub=False))
-            continue
-
-        # Entry with a link
-        name = str(escape(render_inline_text(first_link.children)))
-        url = str(escape(first_link.attrGet("href") or ""))
-
-        if is_sub:
-            out.append(f'<div class="entry-sub"><a href="{url}">{name}</a></div>')
-        else:
-            desc = _extract_description_html(inline, first_link)
-            if desc:
-                out.append(
-                    f'<div class="entry"><a href="{url}">{name}</a>'
-                    f'<span class="sep">&mdash;</span>{desc}</div>'
-                )
-            else:
-                out.append(f'<div class="entry"><a href="{url}">{name}</a></div>')
-
-        # Nested items under an entry with a link are sub-entries
-        nested = _find_child(list_item, "bullet_list")
-        if nested:
-            out.append(_render_bullet_list_html(nested, is_sub=True))
-
-    return "\n".join(out)
-
-
-def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
-    """Render a section's content nodes to HTML."""
-    parts: list[str] = []
-    for node in content_nodes:
-        if node.type == "bullet_list":
-            parts.append(_render_bullet_list_html(node))
-    return "\n".join(parts)
-
-
 # --- Section splitting -------------------------------------------------------


@@ -330,45 +265,15 @@ def _build_section(name: str, body: list[SyntaxTreeNode]) -> ParsedSection:
    content_nodes = body[1:] if desc else body
    entries = _parse_section_entries(content_nodes)
    entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
-    preview = ", ".join(e["name"] for e in entries[:4])
-    content_html = _render_section_html(content_nodes)
    return ParsedSection(
        name=name,
        slug=slugify(name),
        description=desc,
        entries=entries,
        entry_count=entry_count,
-        preview=preview,
-        content_html=content_html,
    )


-def _group_by_h2(
-    nodes: list[SyntaxTreeNode],
-) -> list[ParsedSection]:
-    """Group AST nodes into sections by h2 headings."""
-    sections: list[ParsedSection] = []
-    current_name: str | None = None
-    current_body: list[SyntaxTreeNode] = []
-
-    def flush() -> None:
-        nonlocal current_name
-        if current_name is None:
-            return
-        sections.append(_build_section(current_name, current_body))
-        current_name = None
-
-    for node in nodes:
-        if node.type == "heading" and node.tag == "h2":
-            flush()
-            current_name = _heading_text(node)
-            current_body = []
-        elif current_name is not None:
-            current_body.append(node)
-
-    flush()
-    return sections
-

 def _is_bold_marker(node: SyntaxTreeNode) -> str | None:
    """Detect a bold-only paragraph used as a group marker.
@@ -445,43 +350,30 @@ def _parse_grouped_sections(
    return groups


-def parse_readme(text: str) -> tuple[list[ParsedGroup], list[ParsedSection]]:
-    """Parse README.md text into grouped categories and resources.
+def parse_readme(text: str) -> list[ParsedGroup]:
+    """Parse README.md text into grouped categories.

-    Returns (groups, resources) where groups is a list of ParsedGroup dicts
-    containing nested categories, and resources is a flat list of ParsedSection.
+    Returns a list of ParsedGroup dicts containing nested categories.
+    Content between the thematic break (---) and # Resources or # Contributing
+    is parsed as categories grouped by bold markers (**Group Name**).
    """
    md = MarkdownIt("commonmark")
    tokens = md.parse(text)
    root = SyntaxTreeNode(tokens)
    children = root.children

-    # Find thematic break (---), # Resources, and # Contributing in one pass
+    # Find thematic break (---) and section boundaries in one pass
    hr_idx = None
-    resources_idx = None
-    contributing_idx = None
+    cat_end_idx = None
    for i, node in enumerate(children):
        if hr_idx is None and node.type == "hr":
            hr_idx = i
        elif node.type == "heading" and node.tag == "h1":
            text_content = _heading_text(node)
-            if text_content == "Resources":
-                resources_idx = i
-            elif text_content == "Contributing":
-                contributing_idx = i
+            if cat_end_idx is None and text_content in ("Resources", "Contributing"):
+                cat_end_idx = i
    if hr_idx is None:
-        return [], []
+        return []

-    # Slice into category and resource ranges
-    cat_end = resources_idx or contributing_idx or len(children)
-    cat_nodes = children[hr_idx + 1 : cat_end]
-
-    res_nodes: list[SyntaxTreeNode] = []
-    if resources_idx is not None:
-        res_end = contributing_idx or len(children)
-        res_nodes = children[resources_idx + 1 : res_end]
-
-    groups = _parse_grouped_sections(cat_nodes)
-    resources = _group_by_h2(res_nodes)
-
-    return groups, resources
+    cat_nodes = children[hr_idx + 1 : cat_end_idx or len(children)]
+    return _parse_grouped_sections(cat_nodes)