"""Tests for the build module.""" import json import shutil import textwrap import xml.etree.ElementTree as ET from datetime import UTC, date, datetime from html.parser import HTMLParser from pathlib import Path from build import ( annotate_entries_with_stars, build, detect_source_type, extract_entries, extract_github_repo, load_stars, sort_entries, ) from readme_parser import parse_readme, slugify class HeadMetadataParser(HTMLParser): def __init__(self): super().__init__() self.title_count = 0 self.title = "" self.meta_by_name = {} self.meta_by_property = {} self.links_by_rel = {} self._in_title = False def handle_starttag(self, tag, attrs): attrs = dict(attrs) if tag == "title": self.title_count += 1 self._in_title = True elif tag == "meta": if "name" in attrs: self.meta_by_name[attrs["name"]] = attrs.get("content", "") if "property" in attrs: self.meta_by_property[attrs["property"]] = attrs.get("content", "") elif tag == "link" and attrs.get("rel"): for rel in attrs["rel"].split(): self.links_by_rel[rel] = attrs.get("href", "") def handle_endtag(self, tag): if tag == "title": self._in_title = False def handle_data(self, data): if self._in_title: self.title += data # --------------------------------------------------------------------------- # slugify # --------------------------------------------------------------------------- class TestSlugify: def test_simple(self): assert slugify("Admin Panels") == "admin-panels" def test_uppercase_acronym(self): assert slugify("RESTful API") == "restful-api" def test_all_caps(self): assert slugify("CMS") == "cms" def test_hyphenated_input(self): assert slugify("Command-line Tools") == "command-line-tools" def test_special_chars(self): assert slugify("Editor Plugins and IDEs") == "editor-plugins-and-ides" def test_single_word(self): assert slugify("Audio") == "audio" def test_extra_spaces(self): assert slugify(" Date and Time ") == "date-and-time" # --------------------------------------------------------------------------- # build (integration) # --------------------------------------------------------------------------- class TestBuild: def _make_repo(self, tmp_path, readme): (tmp_path / "README.md").write_text(readme, encoding="utf-8") tpl_dir = tmp_path / "website" / "templates" tpl_dir.mkdir(parents=True) (tpl_dir / "base.html").write_text( "{% block title %}{% endblock %}" "" "{% block content %}{% endblock %}", encoding="utf-8", ) (tpl_dir / "index.html").write_text( '{% extends "base.html" %}{% block content %}' "{% for entry in entries %}" '
' "{{ entry.name }}" "{{ entry.categories | join(', ') }}" "{{ entry.groups | join(', ') }}" "
" "{% endfor %}" "{% endblock %}", encoding="utf-8", ) (tpl_dir / "category.html").write_text( '{% extends "base.html" %}{% block content %}' "

{{ category.name }}

" "{% for entry in entries %}" '{{ entry.name }}' "{% endfor %}" "{% endblock %}", encoding="utf-8", ) (tpl_dir / "llms.txt").write_text( "# Awesome Python\n" "\n" "Use this list to find Python tools.\n" "\n" "# Categories\n" "\n" "{{ categories_md }}\n", encoding="utf-8", ) def _copy_real_templates(self, tmp_path): real_tpl = Path(__file__).parent / ".." / "templates" tpl_dir = tmp_path / "website" / "templates" shutil.copytree(real_tpl, tpl_dir) def test_build_creates_homepage_and_category_pages(self, tmp_path): readme = textwrap.dedent("""\ # Awesome Python Intro. --- **Tools** ## Widgets _Widget libraries._ - [w1](https://example.com) - A widget. ## Gadgets _Gadget tools._ - [g1](https://example.com) - A gadget. # Resources Info. ## Newsletters - [NL](https://example.com) # Contributing Help! """) self._make_repo(tmp_path, readme) build(tmp_path) site = tmp_path / "website" / "output" assert (site / "index.html").exists() assert (site / "categories" / "widgets" / "index.html").exists() assert (site / "categories" / "gadgets" / "index.html").exists() def test_build_creates_root_discovery_files(self, tmp_path): readme = textwrap.dedent("""\ # Awesome Python Intro. --- ## Widgets - [w1](https://example.com) - A widget. # Contributing Help! """) self._make_repo(tmp_path, readme) start_date = datetime.now(UTC).date() build(tmp_path) end_date = datetime.now(UTC).date() site = tmp_path / "website" / "output" robots = (site / "robots.txt").read_text(encoding="utf-8") assert robots == ( "User-agent: *\n" "Content-Signal: search=yes, ai-input=yes, ai-train=yes\n" "Allow: /\n" "\n" "Sitemap: https://awesome-python.com/sitemap.xml\n" ) sitemap = ET.parse(site / "sitemap.xml") root = sitemap.getroot() ns = {"sitemap": "http://www.sitemaps.org/schemas/sitemap/0.9"} locs = [loc.text for loc in root.findall("sitemap:url/sitemap:loc", ns)] lastmods = [lastmod.text for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)] assert root.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset" assert locs == [ "https://awesome-python.com/", "https://awesome-python.com/categories/widgets/", ] assert len(lastmods) == 2 assert all(start_date <= date.fromisoformat(lastmod) <= end_date for lastmod in lastmods) assert all(loc.startswith("https://awesome-python.com/") for loc in locs) assert all("?" not in loc for loc in locs) def test_build_creates_category_pages_with_metadata_and_links(self, tmp_path): readme = textwrap.dedent("""\ # Awesome Python Intro. --- **Tools** ## Widgets _Widget libraries._ - [w1](https://example.com/w1) - A widget. - [w2](https://github.com/owner/w2) - A starred widget. ## Gadgets _Gadget tools._ - [g1](https://example.com/g1) - A gadget. # Contributing Help! """) (tmp_path / "README.md").write_text(readme, encoding="utf-8") self._copy_real_templates(tmp_path) data_dir = tmp_path / "website" / "data" data_dir.mkdir(parents=True) stars = { "owner/w2": { "stars": 42, "owner": "owner", "last_commit_at": "2026-01-01T00:00:00+00:00", "fetched_at": "2026-01-01T00:00:00+00:00", }, } (data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8") build(tmp_path) site = tmp_path / "website" / "output" index_html = (site / "index.html").read_text(encoding="utf-8") category_html = (site / "categories" / "widgets" / "index.html").read_text(encoding="utf-8") parser = HeadMetadataParser() parser.feed(category_html) assert 'href="/categories/widgets/"' in index_html assert 'data-value="Widgets"' in index_html assert parser.title.strip() == "Widgets Python Libraries | Awesome Python" assert parser.meta_by_name["description"] == "Explore 2 curated Python projects in Widgets. Widget libraries." assert parser.links_by_rel["canonical"] == "https://awesome-python.com/categories/widgets/" assert parser.meta_by_property["og:url"] == "https://awesome-python.com/categories/widgets/" assert '' not in category_html assert "

Widgets

" in category_html assert "Widget libraries." in category_html assert 'href="https://example.com/w1"' in category_html assert "A widget." in category_html assert 'href="https://github.com/owner/w2"' in category_html assert '' in category_html assert "42" in category_html assert "2026-01-01T00:00:00+00:00" in category_html def test_build_creates_markdown_alternate_without_sponsors(self, tmp_path): readme = textwrap.dedent("""\ # Awesome Python Intro. # **Sponsors** - **[Sponsor](https://sponsor.example.com)**: Sponsored tool. > Become a sponsor: [Sponsor us](SPONSORSHIP.md). # Categories **Tools** - [Widgets](#widgets) --- ## Widgets - [w1](https://example.com) - A widget. - [w2](https://github.com/owner/w2) - A starred widget. # Contributing Help! """) (tmp_path / "README.md").write_text(readme, encoding="utf-8") self._copy_real_templates(tmp_path) data_dir = tmp_path / "website" / "data" data_dir.mkdir(parents=True) stars = { "owner/w2": {"stars": 42, "owner": "owner", "fetched_at": "2026-01-01T00:00:00+00:00"}, } (data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8") build(tmp_path) site = tmp_path / "website" / "output" index_html = (site / "index.html").read_text(encoding="utf-8") index_md = (site / "index.md").read_text(encoding="utf-8") llms_txt = (site / "llms.txt").read_text(encoding="utf-8") assert '' in index_html assert index_md.startswith("# Awesome Python\n\nIntro.\n\n# Categories") assert "# **Sponsors**" not in index_md assert "Sponsor" not in index_md assert "SPONSORSHIP.md" not in index_md assert "## Widgets" in index_md assert "- [w1](https://example.com) - A widget." in index_md assert "- [w2](https://github.com/owner/w2) - A starred widget. (42 GitHub stars)" in index_md assert llms_txt.startswith("# Awesome Python\n") assert "# Categories" in llms_txt assert "Use this curated list" in llms_txt assert "## Widgets" in llms_txt assert "- [w1](https://example.com) - A widget." in llms_txt assert "- [w2](https://github.com/owner/w2) - A starred widget. (42)" in llms_txt assert "{{ categories_md }}" not in llms_txt assert "# Contributing" not in llms_txt assert "Help!" not in llms_txt def test_build_cleans_stale_output(self, tmp_path): readme = textwrap.dedent("""\ # T --- ## Only - [x](https://x.com) - X. # Contributing Done. """) self._make_repo(tmp_path, readme) stale = tmp_path / "website" / "output" / "categories" / "stale" stale.mkdir(parents=True) (stale / "index.html").write_text("old", encoding="utf-8") build(tmp_path) assert not (tmp_path / "website" / "output" / "categories" / "stale").exists() def test_index_contains_category_names(self, tmp_path): readme = textwrap.dedent("""\ # T --- **Group A** ## Alpha - [a](https://x.com) - A. **Group B** ## Beta - [b](https://x.com) - B. # Contributing Done. """) self._make_repo(tmp_path, readme) build(tmp_path) index_html = (tmp_path / "website" / "output" / "index.html").read_text() assert "Alpha" in index_html assert "Beta" in index_html assert "Group A" in index_html assert "Group B" in index_html def test_index_contains_preview_text(self, tmp_path): readme = textwrap.dedent("""\ # T --- ## Stuff - [django](https://x.com) - A framework. - [flask](https://x.com) - A micro. # Contributing Done. """) self._make_repo(tmp_path, readme) build(tmp_path) index_html = (tmp_path / "website" / "output" / "index.html").read_text() assert "django" in index_html assert "flask" in index_html def test_build_with_stars_sorts_by_stars(self, tmp_path): readme = textwrap.dedent("""\ # T --- ## Stuff - [low-stars](https://github.com/org/low) - Low. - [high-stars](https://github.com/org/high) - High. - [no-stars](https://example.com/none) - None. # Contributing Done. """) (tmp_path / "README.md").write_text(readme, encoding="utf-8") # Copy real templates real_tpl = Path(__file__).parent / ".." / "templates" tpl_dir = tmp_path / "website" / "templates" shutil.copytree(real_tpl, tpl_dir) # Create mock star data data_dir = tmp_path / "website" / "data" data_dir.mkdir(parents=True) stars = { "org/high": {"stars": 5000, "owner": "org", "fetched_at": "2026-01-01T00:00:00+00:00"}, "org/low": {"stars": 100, "owner": "org", "fetched_at": "2026-01-01T00:00:00+00:00"}, } (data_dir / "github_stars.json").write_text(json.dumps(stars), encoding="utf-8") build(tmp_path) html = (tmp_path / "website" / "output" / "index.html").read_text(encoding="utf-8") # Star-sorted: high-stars (5000) before low-stars (100) before no-stars (None) assert html.index("high-stars") < html.index("low-stars") assert html.index("low-stars") < html.index("no-stars") # Formatted star counts assert "5,000" in html assert "100" in html # Expand content present assert "expand-content" in html def test_index_contains_aligned_homepage_metadata(self, tmp_path): readme = (Path(__file__).parents[2] / "README.md").read_text(encoding="utf-8") (tmp_path / "README.md").write_text(readme, encoding="utf-8") self._copy_real_templates(tmp_path) build(tmp_path) parsed_groups = parse_readme(readme) categories = [cat for group in parsed_groups for cat in group["categories"]] entries = extract_entries(categories, parsed_groups) html = (tmp_path / "website" / "output" / "index.html").read_text(encoding="utf-8") parser = HeadMetadataParser() parser.feed(html) expected_title = "Awesome Python" expected_description = f"An opinionated guide to the best Python frameworks, libraries, and tools. Explore {len(entries)} curated projects across {len(categories)} categories, from AI and agents to data science and web development." expected_url = "https://awesome-python.com/" expected_image = "https://awesome-python.com/static/og-image.png" assert parser.title_count == 1 assert parser.title.strip() == expected_title assert parser.meta_by_name["description"] == expected_description assert parser.links_by_rel["canonical"] == expected_url assert parser.meta_by_property["og:type"] == "website" assert parser.meta_by_property["og:title"] == expected_title assert parser.meta_by_property["og:description"] == expected_description assert parser.meta_by_property["og:image"] == expected_image assert parser.meta_by_property["og:url"] == expected_url assert parser.meta_by_name["twitter:card"] == "summary_large_image" assert parser.meta_by_name["twitter:title"] == expected_title assert parser.meta_by_name["twitter:description"] == expected_description assert parser.meta_by_name["twitter:image"] == expected_image assert "\n