mirror of
https://github.com/vinta/awesome-python.git
synced 2026-03-24 01:24:43 +08:00
refactor(parser): remove resources parsing, preview, and content_html fields
parse_readme now returns list[ParsedGroup] instead of a tuple. The resources section (Newsletters, Podcasts), preview string, and content_html rendering are no longer produced by the parser or consumed by the build. Removes _render_section_html, _group_by_h2, and the associated dead code and tests. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -29,8 +29,6 @@ class ParsedSection(TypedDict):
|
||||
description: str # plain text, links resolved to text
|
||||
entries: list[ParsedEntry]
|
||||
entry_count: int
|
||||
preview: str
|
||||
content_html: str # rendered HTML, properly escaped
|
||||
|
||||
|
||||
class ParsedGroup(TypedDict):
|
||||
@@ -258,69 +256,6 @@ def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEn
|
||||
return entries
|
||||
|
||||
|
||||
# --- Content HTML rendering --------------------------------------------------
|
||||
|
||||
|
||||
def _render_bullet_list_html(
|
||||
bullet_list: SyntaxTreeNode,
|
||||
*,
|
||||
is_sub: bool = False,
|
||||
) -> str:
|
||||
"""Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
|
||||
out: list[str] = []
|
||||
|
||||
for list_item in bullet_list.children:
|
||||
if list_item.type != "list_item":
|
||||
continue
|
||||
|
||||
inline = _find_inline(list_item)
|
||||
if inline is None:
|
||||
continue
|
||||
|
||||
first_link = _find_first_link(inline)
|
||||
|
||||
if first_link is None or not _is_leading_link(inline, first_link):
|
||||
# Subcategory label (plain text or text-before-link)
|
||||
label = str(escape(render_inline_text(inline.children)))
|
||||
out.append(f'<div class="subcat">{label}</div>')
|
||||
nested = _find_child(list_item, "bullet_list")
|
||||
if nested:
|
||||
out.append(_render_bullet_list_html(nested, is_sub=False))
|
||||
continue
|
||||
|
||||
# Entry with a link
|
||||
name = str(escape(render_inline_text(first_link.children)))
|
||||
url = str(escape(first_link.attrGet("href") or ""))
|
||||
|
||||
if is_sub:
|
||||
out.append(f'<div class="entry-sub"><a href="{url}">{name}</a></div>')
|
||||
else:
|
||||
desc = _extract_description_html(inline, first_link)
|
||||
if desc:
|
||||
out.append(
|
||||
f'<div class="entry"><a href="{url}">{name}</a>'
|
||||
f'<span class="sep">—</span>{desc}</div>'
|
||||
)
|
||||
else:
|
||||
out.append(f'<div class="entry"><a href="{url}">{name}</a></div>')
|
||||
|
||||
# Nested items under an entry with a link are sub-entries
|
||||
nested = _find_child(list_item, "bullet_list")
|
||||
if nested:
|
||||
out.append(_render_bullet_list_html(nested, is_sub=True))
|
||||
|
||||
return "\n".join(out)
|
||||
|
||||
|
||||
def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
|
||||
"""Render a section's content nodes to HTML."""
|
||||
parts: list[str] = []
|
||||
for node in content_nodes:
|
||||
if node.type == "bullet_list":
|
||||
parts.append(_render_bullet_list_html(node))
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
# --- Section splitting -------------------------------------------------------
|
||||
|
||||
|
||||
@@ -330,45 +265,15 @@ def _build_section(name: str, body: list[SyntaxTreeNode]) -> ParsedSection:
|
||||
content_nodes = body[1:] if desc else body
|
||||
entries = _parse_section_entries(content_nodes)
|
||||
entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
|
||||
preview = ", ".join(e["name"] for e in entries[:4])
|
||||
content_html = _render_section_html(content_nodes)
|
||||
return ParsedSection(
|
||||
name=name,
|
||||
slug=slugify(name),
|
||||
description=desc,
|
||||
entries=entries,
|
||||
entry_count=entry_count,
|
||||
preview=preview,
|
||||
content_html=content_html,
|
||||
)
|
||||
|
||||
|
||||
def _group_by_h2(
|
||||
nodes: list[SyntaxTreeNode],
|
||||
) -> list[ParsedSection]:
|
||||
"""Group AST nodes into sections by h2 headings."""
|
||||
sections: list[ParsedSection] = []
|
||||
current_name: str | None = None
|
||||
current_body: list[SyntaxTreeNode] = []
|
||||
|
||||
def flush() -> None:
|
||||
nonlocal current_name
|
||||
if current_name is None:
|
||||
return
|
||||
sections.append(_build_section(current_name, current_body))
|
||||
current_name = None
|
||||
|
||||
for node in nodes:
|
||||
if node.type == "heading" and node.tag == "h2":
|
||||
flush()
|
||||
current_name = _heading_text(node)
|
||||
current_body = []
|
||||
elif current_name is not None:
|
||||
current_body.append(node)
|
||||
|
||||
flush()
|
||||
return sections
|
||||
|
||||
|
||||
def _is_bold_marker(node: SyntaxTreeNode) -> str | None:
|
||||
"""Detect a bold-only paragraph used as a group marker.
|
||||
@@ -445,43 +350,30 @@ def _parse_grouped_sections(
|
||||
return groups
|
||||
|
||||
|
||||
def parse_readme(text: str) -> tuple[list[ParsedGroup], list[ParsedSection]]:
|
||||
"""Parse README.md text into grouped categories and resources.
|
||||
def parse_readme(text: str) -> list[ParsedGroup]:
|
||||
"""Parse README.md text into grouped categories.
|
||||
|
||||
Returns (groups, resources) where groups is a list of ParsedGroup dicts
|
||||
containing nested categories, and resources is a flat list of ParsedSection.
|
||||
Returns a list of ParsedGroup dicts containing nested categories.
|
||||
Content between the thematic break (---) and # Resources or # Contributing
|
||||
is parsed as categories grouped by bold markers (**Group Name**).
|
||||
"""
|
||||
md = MarkdownIt("commonmark")
|
||||
tokens = md.parse(text)
|
||||
root = SyntaxTreeNode(tokens)
|
||||
children = root.children
|
||||
|
||||
# Find thematic break (---), # Resources, and # Contributing in one pass
|
||||
# Find thematic break (---) and section boundaries in one pass
|
||||
hr_idx = None
|
||||
resources_idx = None
|
||||
contributing_idx = None
|
||||
cat_end_idx = None
|
||||
for i, node in enumerate(children):
|
||||
if hr_idx is None and node.type == "hr":
|
||||
hr_idx = i
|
||||
elif node.type == "heading" and node.tag == "h1":
|
||||
text_content = _heading_text(node)
|
||||
if text_content == "Resources":
|
||||
resources_idx = i
|
||||
elif text_content == "Contributing":
|
||||
contributing_idx = i
|
||||
if cat_end_idx is None and text_content in ("Resources", "Contributing"):
|
||||
cat_end_idx = i
|
||||
if hr_idx is None:
|
||||
return [], []
|
||||
return []
|
||||
|
||||
# Slice into category and resource ranges
|
||||
cat_end = resources_idx or contributing_idx or len(children)
|
||||
cat_nodes = children[hr_idx + 1 : cat_end]
|
||||
|
||||
res_nodes: list[SyntaxTreeNode] = []
|
||||
if resources_idx is not None:
|
||||
res_end = contributing_idx or len(children)
|
||||
res_nodes = children[resources_idx + 1 : res_end]
|
||||
|
||||
groups = _parse_grouped_sections(cat_nodes)
|
||||
resources = _group_by_h2(res_nodes)
|
||||
|
||||
return groups, resources
|
||||
cat_nodes = children[hr_idx + 1 : cat_end_idx or len(children)]
|
||||
return _parse_grouped_sections(cat_nodes)
|
||||
|
||||
Reference in New Issue
Block a user