diff --git a/website/build.py b/website/build.py index 6ff46df0..f2057472 100644 --- a/website/build.py +++ b/website/build.py @@ -139,7 +139,7 @@ def build(repo_root: str) -> None: subtitle = stripped break - parsed_groups, _ = parse_readme(readme_text) + parsed_groups = parse_readme(readme_text) categories = [cat for g in parsed_groups for cat in g["categories"]] total_entries = sum(c["entry_count"] for c in categories) @@ -172,7 +172,6 @@ def build(repo_root: str) -> None: (site_dir / "index.html").write_text( tpl_index.render( categories=categories, - groups=parsed_groups, subtitle=subtitle, entries=entries, total_entries=total_entries, diff --git a/website/readme_parser.py b/website/readme_parser.py index 91b0faf2..4f36ed77 100644 --- a/website/readme_parser.py +++ b/website/readme_parser.py @@ -29,8 +29,6 @@ class ParsedSection(TypedDict): description: str # plain text, links resolved to text entries: list[ParsedEntry] entry_count: int - preview: str - content_html: str # rendered HTML, properly escaped class ParsedGroup(TypedDict): @@ -258,69 +256,6 @@ def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEn return entries -# --- Content HTML rendering -------------------------------------------------- - - -def _render_bullet_list_html( - bullet_list: SyntaxTreeNode, - *, - is_sub: bool = False, -) -> str: - """Render a bullet_list node to HTML with entry/entry-sub/subcat classes.""" - out: list[str] = [] - - for list_item in bullet_list.children: - if list_item.type != "list_item": - continue - - inline = _find_inline(list_item) - if inline is None: - continue - - first_link = _find_first_link(inline) - - if first_link is None or not _is_leading_link(inline, first_link): - # Subcategory label (plain text or text-before-link) - label = str(escape(render_inline_text(inline.children))) - out.append(f'
{label}
') - nested = _find_child(list_item, "bullet_list") - if nested: - out.append(_render_bullet_list_html(nested, is_sub=False)) - continue - - # Entry with a link - name = str(escape(render_inline_text(first_link.children))) - url = str(escape(first_link.attrGet("href") or "")) - - if is_sub: - out.append(f'
{name}
') - else: - desc = _extract_description_html(inline, first_link) - if desc: - out.append( - f'
{name}' - f'{desc}
' - ) - else: - out.append(f'
{name}
') - - # Nested items under an entry with a link are sub-entries - nested = _find_child(list_item, "bullet_list") - if nested: - out.append(_render_bullet_list_html(nested, is_sub=True)) - - return "\n".join(out) - - -def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str: - """Render a section's content nodes to HTML.""" - parts: list[str] = [] - for node in content_nodes: - if node.type == "bullet_list": - parts.append(_render_bullet_list_html(node)) - return "\n".join(parts) - - # --- Section splitting ------------------------------------------------------- @@ -330,45 +265,15 @@ def _build_section(name: str, body: list[SyntaxTreeNode]) -> ParsedSection: content_nodes = body[1:] if desc else body entries = _parse_section_entries(content_nodes) entry_count = len(entries) + sum(len(e["also_see"]) for e in entries) - preview = ", ".join(e["name"] for e in entries[:4]) - content_html = _render_section_html(content_nodes) return ParsedSection( name=name, slug=slugify(name), description=desc, entries=entries, entry_count=entry_count, - preview=preview, - content_html=content_html, ) -def _group_by_h2( - nodes: list[SyntaxTreeNode], -) -> list[ParsedSection]: - """Group AST nodes into sections by h2 headings.""" - sections: list[ParsedSection] = [] - current_name: str | None = None - current_body: list[SyntaxTreeNode] = [] - - def flush() -> None: - nonlocal current_name - if current_name is None: - return - sections.append(_build_section(current_name, current_body)) - current_name = None - - for node in nodes: - if node.type == "heading" and node.tag == "h2": - flush() - current_name = _heading_text(node) - current_body = [] - elif current_name is not None: - current_body.append(node) - - flush() - return sections - def _is_bold_marker(node: SyntaxTreeNode) -> str | None: """Detect a bold-only paragraph used as a group marker. @@ -445,43 +350,30 @@ def _parse_grouped_sections( return groups -def parse_readme(text: str) -> tuple[list[ParsedGroup], list[ParsedSection]]: - """Parse README.md text into grouped categories and resources. +def parse_readme(text: str) -> list[ParsedGroup]: + """Parse README.md text into grouped categories. - Returns (groups, resources) where groups is a list of ParsedGroup dicts - containing nested categories, and resources is a flat list of ParsedSection. + Returns a list of ParsedGroup dicts containing nested categories. + Content between the thematic break (---) and # Resources or # Contributing + is parsed as categories grouped by bold markers (**Group Name**). """ md = MarkdownIt("commonmark") tokens = md.parse(text) root = SyntaxTreeNode(tokens) children = root.children - # Find thematic break (---), # Resources, and # Contributing in one pass + # Find thematic break (---) and section boundaries in one pass hr_idx = None - resources_idx = None - contributing_idx = None + cat_end_idx = None for i, node in enumerate(children): if hr_idx is None and node.type == "hr": hr_idx = i elif node.type == "heading" and node.tag == "h1": text_content = _heading_text(node) - if text_content == "Resources": - resources_idx = i - elif text_content == "Contributing": - contributing_idx = i + if cat_end_idx is None and text_content in ("Resources", "Contributing"): + cat_end_idx = i if hr_idx is None: - return [], [] + return [] - # Slice into category and resource ranges - cat_end = resources_idx or contributing_idx or len(children) - cat_nodes = children[hr_idx + 1 : cat_end] - - res_nodes: list[SyntaxTreeNode] = [] - if resources_idx is not None: - res_end = contributing_idx or len(children) - res_nodes = children[resources_idx + 1 : res_end] - - groups = _parse_grouped_sections(cat_nodes) - resources = _group_by_h2(res_nodes) - - return groups, resources + cat_nodes = children[hr_idx + 1 : cat_end_idx or len(children)] + return _parse_grouped_sections(cat_nodes) diff --git a/website/tests/test_build.py b/website/tests/test_build.py index 0e7eb487..c9d29f45 100644 --- a/website/tests/test_build.py +++ b/website/tests/test_build.py @@ -59,19 +59,13 @@ class TestBuild: ) (tpl_dir / "index.html").write_text( '{% extends "base.html" %}{% block content %}' - "{% for group in groups %}" - '
' - "

{{ group.name }}

" - "{% for cat in group.categories %}" - '
' - "{{ cat.name }}" - "{{ cat.preview }}" - "{{ cat.entry_count }}" - '' + "{% for entry in entries %}" + '
' + "{{ entry.name }}" + "{{ entry.categories | join(', ') }}" + "{{ entry.groups | join(', ') }}" "
" "{% endfor %}" - "
" - "{% endfor %}" "{% endblock %}", encoding="utf-8", ) diff --git a/website/tests/test_readme_parser.py b/website/tests/test_readme_parser.py index d365c45c..cea5cbbf 100644 --- a/website/tests/test_readme_parser.py +++ b/website/tests/test_readme_parser.py @@ -7,7 +7,6 @@ import pytest from readme_parser import ( _parse_section_entries, - _render_section_html, parse_readme, render_inline_html, render_inline_text, @@ -159,50 +158,39 @@ GROUPED_README = textwrap.dedent("""\ class TestParseReadmeSections: def test_ungrouped_categories_go_to_other(self): - groups, resources = parse_readme(MINIMAL_README) + groups = parse_readme(MINIMAL_README) assert len(groups) == 1 assert groups[0]["name"] == "Other" assert len(groups[0]["categories"]) == 2 def test_ungrouped_category_names(self): - groups, _ = parse_readme(MINIMAL_README) + groups = parse_readme(MINIMAL_README) cats = groups[0]["categories"] assert cats[0]["name"] == "Alpha" assert cats[1]["name"] == "Beta" - def test_resource_count(self): - _, resources = parse_readme(MINIMAL_README) - assert len(resources) == 2 - def test_category_slugs(self): - groups, _ = parse_readme(MINIMAL_README) + groups = parse_readme(MINIMAL_README) cats = groups[0]["categories"] assert cats[0]["slug"] == "alpha" assert cats[1]["slug"] == "beta" def test_category_description(self): - groups, _ = parse_readme(MINIMAL_README) + groups = parse_readme(MINIMAL_README) cats = groups[0]["categories"] assert cats[0]["description"] == "Libraries for alpha stuff." assert cats[1]["description"] == "Tools for beta." - def test_resource_names(self): - _, resources = parse_readme(MINIMAL_README) - assert resources[0]["name"] == "Newsletters" - assert resources[1]["name"] == "Podcasts" - def test_contributing_skipped(self): - groups, resources = parse_readme(MINIMAL_README) + groups = parse_readme(MINIMAL_README) all_names = [] for g in groups: all_names.extend(c["name"] for c in g["categories"]) - all_names.extend(r["name"] for r in resources) assert "Contributing" not in all_names def test_no_separator(self): - groups, resources = parse_readme("# Just a heading\n\nSome text.\n") + groups = parse_readme("# Just a heading\n\nSome text.\n") assert groups == [] - assert resources == [] def test_no_description(self): readme = textwrap.dedent("""\ @@ -224,7 +212,7 @@ class TestParseReadmeSections: Done. """) - groups, resources = parse_readme(readme) + groups = parse_readme(readme) cats = groups[0]["categories"] assert cats[0]["description"] == "" assert cats[0]["entries"][0]["name"] == "item" @@ -245,42 +233,37 @@ class TestParseReadmeSections: Done. """) - groups, _ = parse_readme(readme) + groups = parse_readme(readme) cats = groups[0]["categories"] assert cats[0]["description"] == "Algorithms. Also see awesome-algos." class TestParseGroupedReadme: def test_group_count(self): - groups, _ = parse_readme(GROUPED_README) + groups = parse_readme(GROUPED_README) assert len(groups) == 2 def test_group_names(self): - groups, _ = parse_readme(GROUPED_README) + groups = parse_readme(GROUPED_README) assert groups[0]["name"] == "Group One" assert groups[1]["name"] == "Group Two" def test_group_slugs(self): - groups, _ = parse_readme(GROUPED_README) + groups = parse_readme(GROUPED_README) assert groups[0]["slug"] == "group-one" assert groups[1]["slug"] == "group-two" def test_group_one_has_one_category(self): - groups, _ = parse_readme(GROUPED_README) + groups = parse_readme(GROUPED_README) assert len(groups[0]["categories"]) == 1 assert groups[0]["categories"][0]["name"] == "Alpha" def test_group_two_has_two_categories(self): - groups, _ = parse_readme(GROUPED_README) + groups = parse_readme(GROUPED_README) assert len(groups[1]["categories"]) == 2 assert groups[1]["categories"][0]["name"] == "Beta" assert groups[1]["categories"][1]["name"] == "Gamma" - def test_resources_still_parsed(self): - _, resources = parse_readme(GROUPED_README) - assert len(resources) == 1 - assert resources[0]["name"] == "Newsletters" - def test_empty_group_skipped(self): readme = textwrap.dedent("""\ # T @@ -299,7 +282,7 @@ class TestParseGroupedReadme: Done. """) - groups, _ = parse_readme(readme) + groups = parse_readme(readme) assert len(groups) == 1 assert groups[0]["name"] == "HasCats" @@ -319,7 +302,7 @@ class TestParseGroupedReadme: Done. """) - groups, _ = parse_readme(readme) + groups = parse_readme(readme) # "Note:" has text after the strong node, so it's not a group marker # Category goes into "Other" assert len(groups) == 1 @@ -345,7 +328,7 @@ class TestParseGroupedReadme: Done. """) - groups, _ = parse_readme(readme) + groups = parse_readme(readme) assert len(groups) == 2 assert groups[0]["name"] == "Other" assert groups[0]["categories"][0]["name"] == "Orphan" @@ -438,33 +421,11 @@ class TestParseSectionEntries: Done. """) - groups, _ = parse_readme(readme) + groups = parse_readme(readme) cats = groups[0]["categories"] # 2 main entries + 1 also_see = 3 assert cats[0]["entry_count"] == 3 - def test_preview_first_four_names(self): - readme = textwrap.dedent("""\ - # T - - --- - - ## Libs - - - [alpha](https://x.com) - A. - - [beta](https://x.com) - B. - - [gamma](https://x.com) - C. - - [delta](https://x.com) - D. - - [epsilon](https://x.com) - E. - - # Contributing - - Done. - """) - groups, _ = parse_readme(readme) - cats = groups[0]["categories"] - assert cats[0]["preview"] == "alpha, beta, gamma, delta" - def test_description_html_escapes_xss(self): nodes = _content_nodes('- [lib](https://x.com) - A lib.\n') entries = _parse_section_entries(nodes) @@ -472,58 +433,13 @@ class TestParseSectionEntries: assert "<script>" in entries[0]["description"] -class TestRenderSectionHtml: - def test_basic_entry(self): - nodes = _content_nodes("- [django](https://example.com) - A web framework.\n") - html = _render_section_html(nodes) - assert 'class="entry"' in html - assert 'href="https://example.com"' in html - assert "django" in html - assert "A web framework." in html - - def test_subcategory_label(self): - nodes = _content_nodes( - "- Synchronous\n - [django](https://x.com) - Framework.\n" - ) - html = _render_section_html(nodes) - assert 'class="subcat"' in html - assert "Synchronous" in html - assert 'class="entry"' in html - - def test_sub_entry(self): - nodes = _content_nodes( - "- [django](https://x.com) - Framework.\n" - " - [awesome-django](https://y.com)\n" - ) - html = _render_section_html(nodes) - assert 'class="entry-sub"' in html - assert "awesome-django" in html - - def test_link_only_entry(self): - nodes = _content_nodes("- [tool](https://x.com)\n") - html = _render_section_html(nodes) - assert 'class="entry"' in html - assert 'href="https://x.com"' in html - assert "tool" in html - - def test_xss_escaped_in_name(self): - nodes = _content_nodes('- [](https://x.com) - Bad.\n') - html = _render_section_html(nodes) - assert "onerror" not in html or "&" in html - - def test_xss_escaped_in_subcat(self): - nodes = _content_nodes("- \n") - html = _render_section_html(nodes) - assert "