Merge pull request #2971 from vinta/feature/markdown-it-py-parser

feat: replace regex README parser with markdown-it-py AST parser
This commit is contained in:
Vinta Chen
2026-03-18 20:35:06 +08:00
committed by GitHub
15 changed files with 2529 additions and 1443 deletions

View File

@@ -1,17 +1,24 @@
-include .env
export
site_install:
uv sync --no-dev
install:
uv sync
site_fetch_stats:
fetch_stats:
uv run python website/fetch_github_stars.py
site_build:
test:
uv run pytest website/tests/ -v
build:
uv run python website/build.py
site_preview: site_build
python -m http.server -d website/output/ 8000
site_deploy: site_build
@echo "Deploy via GitHub Actions (push to master)"
preview: build
@echo "Check the website on http://localhost:8000"
uv run watchmedo shell-command \
--patterns='*.md;*.html;*.css;*.js;*.py' \
--recursive \
--wait --drop \
--command='uv run python website/build.py' \
README.md website/templates website/static website/data & \
python -m http.server -b 127.0.0.1 -d website/output/ 8000

View File

@@ -2,22 +2,30 @@
name = "awesome-python"
version = "0.1.0"
description = "An opinionated list of awesome Python frameworks, libraries, software and resources."
authors = [{ name = "Vinta Chen", email = "vinta.chen@gmail.com" }]
readme = "README.md"
license = "MIT"
requires-python = ">=3.13"
dependencies = [
"httpx==0.28.1",
"jinja2==3.1.6",
"markdown==3.10.2",
]
dependencies = []
[project.urls]
Homepage = "https://awesome-python.com/"
Repository = "https://github.com/vinta/awesome-python"
[dependency-groups]
build = ["httpx==0.28.1", "jinja2==3.1.6", "markdown-it-py==4.0.0"]
lint = ["ruff==0.15.6"]
test = ["pytest==9.0.2"]
dev = [
"pytest==9.0.2",
"ruff==0.15.6",
{ include-group = "build" },
{ include-group = "lint" },
{ include-group = "test" },
"watchdog==6.0.0",
]
[tool.pytest.ini_options]
testpaths = ["website/tests"]
pythonpath = ["website"]
[tool.ruff]
target-version = "py313"
line-length = 100
line-length = 200

77
uv.lock generated
View File

@@ -18,30 +18,46 @@ wheels = [
name = "awesome-python"
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "httpx" },
{ name = "jinja2" },
{ name = "markdown" },
]
[package.dev-dependencies]
build = [
{ name = "httpx" },
{ name = "jinja2" },
{ name = "markdown-it-py" },
]
dev = [
{ name = "httpx" },
{ name = "jinja2" },
{ name = "markdown-it-py" },
{ name = "pytest" },
{ name = "ruff" },
{ name = "watchdog" },
]
lint = [
{ name = "ruff" },
]
test = [
{ name = "pytest" },
]
[package.metadata]
requires-dist = [
{ name = "httpx", specifier = "==0.28.1" },
{ name = "jinja2", specifier = "==3.1.6" },
{ name = "markdown", specifier = "==3.10.2" },
]
[package.metadata.requires-dev]
build = [
{ name = "httpx", specifier = "==0.28.1" },
{ name = "jinja2", specifier = "==3.1.6" },
{ name = "markdown-it-py", specifier = "==4.0.0" },
]
dev = [
{ name = "httpx", specifier = "==0.28.1" },
{ name = "jinja2", specifier = "==3.1.6" },
{ name = "markdown-it-py", specifier = "==4.0.0" },
{ name = "pytest", specifier = "==9.0.2" },
{ name = "ruff", specifier = "==0.15.6" },
{ name = "watchdog", specifier = "==6.0.0" },
]
lint = [{ name = "ruff", specifier = "==0.15.6" }]
test = [{ name = "pytest", specifier = "==9.0.2" }]
[[package]]
name = "certifi"
@@ -129,12 +145,15 @@ wheels = [
]
[[package]]
name = "markdown"
version = "3.10.2"
name = "markdown-it-py"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" }
dependencies = [
{ name = "mdurl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" },
{ url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
]
[[package]]
@@ -189,6 +208,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
]
[[package]]
name = "mdurl"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]
[[package]]
name = "packaging"
version = "26.0"
@@ -256,3 +284,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
{ url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
]
[[package]]
name = "watchdog"
version = "6.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" },
{ url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" },
{ url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" },
{ url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" },
{ url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" },
{ url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" },
{ url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" },
{ url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" },
{ url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" },
{ url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" },
{ url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" },
{ url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" },
{ url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" },
]

View File

@@ -7,9 +7,10 @@ import shutil
from pathlib import Path
from typing import TypedDict
import markdown
from jinja2 import Environment, FileSystemLoader
from readme_parser import parse_readme, slugify
# Thematic grouping of categories. Each category name must match exactly
# as it appears in README.md (the ## heading text).
SECTION_GROUPS: list[tuple[str, list[str]]] = [
@@ -67,217 +68,6 @@ SECTION_GROUPS: list[tuple[str, list[str]]] = [
]
def slugify(name: str) -> str:
"""Convert a category name to a URL-friendly slug."""
slug = name.lower()
slug = re.sub(r"[^a-z0-9\s-]", "", slug)
slug = re.sub(r"[\s]+", "-", slug.strip())
slug = re.sub(r"-+", "-", slug)
return slug
def count_entries(content: str) -> int:
"""Count library entries (lines starting with * [ or - [) in a content block."""
return sum(1 for line in content.split("\n") if re.match(r"\s*[-*]\s+\[", line))
def extract_preview(content: str, *, max_names: int = 4) -> str:
"""Extract first N main library names from markdown content for preview text.
Only includes top-level or single-indent entries (indent <= 3 spaces),
skipping subcategory labels (items without links) and deep sub-entries.
"""
names = []
for m in re.finditer(r"^(\s*)[-*]\s+\[([^\]]+)\]", content, re.MULTILINE):
indent_len = len(m.group(1))
if indent_len > 3:
continue
names.append(m.group(2))
if len(names) >= max_names:
break
return ", ".join(names)
def render_content_html(content: str) -> str:
"""Render category markdown content to HTML with subcategory detection.
Lines that are list items without links (e.g., "- Synchronous") are
treated as subcategory headers and rendered as bold dividers.
Indent levels in the README:
- 0 spaces: top-level entry or subcategory label
- 2 spaces: entry under a subcategory (still a main entry)
- 4+ spaces: sub-entry (e.g., awesome-django under django)
"""
lines = content.split("\n")
out: list[str] = []
for line in lines:
stripped = line.strip()
indent_len = len(line) - len(line.lstrip())
# Detect subcategory labels: list items without links
m = re.match(r"^[-*]\s+(.+)$", stripped)
if m and "[" not in stripped:
label = m.group(1)
out.append(f'<div class="subcat">{label}</div>')
continue
# Entry with link and description: * [name](url) - Description.
m = re.match(
r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*[-\u2013\u2014]\s*(.+)$",
line,
)
if m:
name, url, desc = m.groups()
if indent_len > 3:
out.append(
f'<div class="entry-sub">'
f'<a href="{url}">{name}</a>'
f"</div>"
)
else:
out.append(
f'<div class="entry">'
f'<a href="{url}">{name}</a>'
f'<span class="sep">&mdash;</span>{desc}'
f"</div>"
)
continue
# Link-only entry (no description): * [name](url)
m = re.match(r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
if m:
name, url = m.groups()
if indent_len > 3:
out.append(
f'<div class="entry-sub">'
f'<a href="{url}">{name}</a>'
f"</div>"
)
else:
out.append(
f'<div class="entry">'
f'<a href="{url}">{name}</a>'
f"</div>"
)
continue
return "\n".join(out)
def parse_readme(text: str) -> tuple[list[dict], list[dict]]:
"""Parse README.md text into categories and resources.
Returns:
(categories, resources) where each is a list of dicts with keys:
name, slug, description, content
"""
lines = text.split("\n")
separator_idx = None
for i, line in enumerate(lines):
if line.strip() == "---" and i > 0:
separator_idx = i
break
if separator_idx is None:
return [], []
resources_idx = None
contributing_idx = None
for i, line in enumerate(lines):
if line.strip() == "# Resources":
resources_idx = i
elif line.strip() == "# Contributing":
contributing_idx = i
cat_end = resources_idx if resources_idx is not None else len(lines)
category_lines = lines[separator_idx + 1 : cat_end]
resource_lines = []
if resources_idx is not None:
res_end = contributing_idx if contributing_idx is not None else len(lines)
resource_lines = lines[resources_idx:res_end]
categories = _extract_sections(category_lines, level=2)
resources = _extract_sections(resource_lines, level=2)
return categories, resources
def _extract_sections(lines: list[str], *, level: int) -> list[dict]:
"""Extract ## sections from a block of lines."""
prefix = "#" * level + " "
sections = []
current_name = None
current_lines: list[str] = []
for line in lines:
if line.startswith(prefix) and not line.startswith(prefix + "#"):
if current_name is not None:
sections.append(_build_section(current_name, current_lines))
current_name = line[len(prefix) :].strip()
current_lines = []
elif current_name is not None:
current_lines.append(line)
if current_name is not None:
sections.append(_build_section(current_name, current_lines))
return sections
def _build_section(name: str, lines: list[str]) -> dict:
"""Build a section dict from a name and its content lines."""
while lines and not lines[0].strip():
lines = lines[1:]
while lines and not lines[-1].strip():
lines = lines[:-1]
description = ""
content_lines = lines
if lines:
m = re.match(r"^_(.+)_$", lines[0].strip())
if m:
description = m.group(1)
content_lines = lines[1:]
while content_lines and not content_lines[0].strip():
content_lines = content_lines[1:]
content = "\n".join(content_lines).strip()
return {
"name": name,
"slug": slugify(name),
"description": description,
"content": content,
}
def render_markdown(text: str) -> str:
"""Render markdown text to HTML."""
md = markdown.Markdown(extensions=["extra"])
return md.convert(text)
def strip_markdown_links(text: str) -> str:
"""Replace [text](url) with just text for plain-text contexts."""
return re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
def render_inline_markdown(text: str) -> str:
"""Render inline markdown (links, bold, italic) to HTML."""
from markupsafe import Markup
html = markdown.markdown(text)
# Strip wrapping <p>...</p> since this is inline content
html = re.sub(r"^<p>(.*)</p>$", r"\1", html.strip())
# Add target/rel to links for external navigation
html = html.replace("<a ", '<a target="_blank" rel="noopener" ')
return Markup(html)
def group_categories(
categories: list[dict],
resources: list[dict],
@@ -285,10 +75,11 @@ def group_categories(
"""Organize categories and resources into thematic section groups."""
cat_by_name = {c["name"]: c for c in categories}
groups = []
grouped_names: set[str] = set()
for group_name, cat_names in SECTION_GROUPS:
grouped_names.update(cat_names)
if group_name == "Resources":
# Resources group uses parsed resources directly
group_cats = list(resources)
else:
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
@@ -301,9 +92,6 @@ def group_categories(
})
# Any categories not in a group go into "Other"
grouped_names = set()
for _, cat_names in SECTION_GROUPS:
grouped_names.update(cat_names)
ungrouped = [c for c in categories if c["name"] not in grouped_names]
if ungrouped:
groups.append({
@@ -323,13 +111,13 @@ class Entry(TypedDict):
group: str
stars: int | None
owner: str | None
pushed_at: str | None
last_commit_at: str | None
class StarData(TypedDict):
stars: int
owner: str
pushed_at: str
last_commit_at: str
fetched_at: str
@@ -367,7 +155,6 @@ def sort_entries(entries: list[dict]) -> list[dict]:
def extract_entries(
categories: list[dict],
resources: list[dict],
groups: list[dict],
) -> list[dict]:
"""Flatten categories into individual library entries for table display."""
@@ -379,38 +166,18 @@ def extract_entries(
entries: list[dict] = []
for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other")
last_entry_indent = -1
for line in cat["content"].split("\n"):
indent_len = len(line) - len(line.lstrip())
# Link-only sub-item deeper than parent → "also see"
m_sub = re.match(r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
if m_sub and indent_len > last_entry_indent >= 0 and entries:
entries[-1]["also_see"].append({
"name": m_sub.group(1),
"url": m_sub.group(2),
})
continue
if indent_len > 3:
continue
m = re.match(
r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*(?:[-\u2013\u2014]\s*(.+))?$",
line,
)
if m:
last_entry_indent = indent_len
entries.append({
"name": m.group(1),
"url": m.group(2),
"description": render_inline_markdown(m.group(3)) if m.group(3) else "",
"category": cat["name"],
"group": group_name,
"stars": None,
"owner": None,
"pushed_at": None,
"also_see": [],
})
for entry in cat["entries"]:
entries.append({
"name": entry["name"],
"url": entry["url"],
"description": entry["description"],
"category": cat["name"],
"group": group_name,
"stars": None,
"owner": None,
"last_commit_at": None,
"also_see": entry["also_see"],
})
return entries
@@ -420,7 +187,6 @@ def build(repo_root: str) -> None:
website = repo / "website"
readme_text = (repo / "README.md").read_text(encoding="utf-8")
# Extract subtitle from the first non-empty, non-heading line
subtitle = ""
for line in readme_text.split("\n"):
stripped = line.strip()
@@ -429,47 +195,33 @@ def build(repo_root: str) -> None:
break
categories, resources = parse_readme(readme_text)
# Enrich with entry counts, rendered HTML, previews, and clean descriptions
for cat in categories + resources:
cat["entry_count"] = count_entries(cat["content"])
cat["content_html"] = render_content_html(cat["content"])
cat["preview"] = extract_preview(cat["content"])
cat["description"] = strip_markdown_links(cat["description"])
# All fields pre-computed: entry_count, content_html, preview, description
total_entries = sum(c["entry_count"] for c in categories)
# Organize into groups
groups = group_categories(categories, resources)
entries = extract_entries(categories, groups)
# Flatten entries for table view
entries = extract_entries(categories, resources, groups)
# Load and merge GitHub star data
stars_data = load_stars(website / "data" / "github_stars.json")
for entry in entries:
repo_key = extract_github_repo(entry["url"])
if repo_key and repo_key in stars_data:
entry["stars"] = stars_data[repo_key]["stars"]
entry["owner"] = stars_data[repo_key]["owner"]
entry["pushed_at"] = stars_data[repo_key].get("pushed_at", "")
sd = stars_data[repo_key]
entry["stars"] = sd["stars"]
entry["owner"] = sd["owner"]
entry["last_commit_at"] = sd.get("last_commit_at", "")
# Sort by stars descending
entries = sort_entries(entries)
# Set up Jinja2
env = Environment(
loader=FileSystemLoader(website / "templates"),
autoescape=True,
)
# Output directory
site_dir = website / "output"
if site_dir.exists():
shutil.rmtree(site_dir)
site_dir.mkdir(parents=True)
# Generate single index.html
tpl_index = env.get_template("index.html")
(site_dir / "index.html").write_text(
tpl_index.render(
@@ -484,14 +236,10 @@ def build(repo_root: str) -> None:
encoding="utf-8",
)
# Copy static assets
static_src = website / "static"
static_dst = site_dir / "static"
if static_src.exists():
shutil.copytree(static_src, static_dst)
# Write CNAME
(site_dir / "CNAME").write_text("awesome-python.com\n", encoding="utf-8")
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
print(f"Total entries: {total_entries}")

File diff suppressed because it is too large Load Diff

View File

@@ -10,14 +10,14 @@ from pathlib import Path
import httpx
from build import extract_github_repo
from build import extract_github_repo, load_stars
CACHE_MAX_AGE_DAYS = 7
DATA_DIR = Path(__file__).parent / "data"
CACHE_FILE = DATA_DIR / "github_stars.json"
README_PATH = Path(__file__).parent.parent / "README.md"
GRAPHQL_URL = "https://api.github.com/graphql"
BATCH_SIZE = 100
BATCH_SIZE = 50
def extract_github_repos(text: str) -> set[str]:
@@ -30,17 +30,6 @@ def extract_github_repos(text: str) -> set[str]:
return repos
def load_cache() -> dict:
"""Load the star cache from disk. Returns empty dict if missing or corrupt."""
if CACHE_FILE.exists():
try:
return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
except json.JSONDecodeError:
print(f"Warning: corrupt cache at {CACHE_FILE}, starting fresh.", file=sys.stderr)
return {}
return {}
def save_cache(cache: dict) -> None:
"""Write the star cache to disk, creating data/ dir if needed."""
DATA_DIR.mkdir(parents=True, exist_ok=True)
@@ -61,7 +50,7 @@ def build_graphql_query(repos: list[str]) -> str:
continue
parts.append(
f'repo_{i}: repository(owner: "{owner}", name: "{name}") '
f"{{ stargazerCount pushedAt owner {{ login }} }}"
f"{{ stargazerCount owner {{ login }} defaultBranchRef {{ target {{ ... on Commit {{ committedDate }} }} }} }}"
)
if not parts:
return ""
@@ -78,10 +67,12 @@ def parse_graphql_response(
node = data.get(f"repo_{i}")
if node is None:
continue
default_branch = node.get("defaultBranchRef") or {}
target = default_branch.get("target") or {}
result[repo] = {
"stars": node.get("stargazerCount", 0),
"owner": node.get("owner", {}).get("login", ""),
"pushed_at": node.get("pushedAt", ""),
"last_commit_at": target.get("committedDate", ""),
}
return result
@@ -114,7 +105,7 @@ def main() -> None:
current_repos = extract_github_repos(readme_text)
print(f"Found {len(current_repos)} GitHub repos in README.md")
cache = load_cache()
cache = load_stars(CACHE_FILE)
now = datetime.now(timezone.utc)
# Prune entries not in current README
@@ -173,7 +164,7 @@ def main() -> None:
cache[repo] = {
"stars": results[repo]["stars"],
"owner": results[repo]["owner"],
"pushed_at": results[repo]["pushed_at"],
"last_commit_at": results[repo]["last_commit_at"],
"fetched_at": now_iso,
}
fetched_count += 1

388
website/readme_parser.py Normal file
View File

@@ -0,0 +1,388 @@
"""Parse README.md into structured section data using markdown-it-py AST."""
from __future__ import annotations
import re
from typing import TypedDict
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
from markupsafe import escape
class AlsoSee(TypedDict):
name: str
url: str
class ParsedEntry(TypedDict):
name: str
url: str
description: str # inline HTML, properly escaped
also_see: list[AlsoSee]
class ParsedSection(TypedDict):
name: str
slug: str
description: str # plain text, links resolved to text
entries: list[ParsedEntry]
entry_count: int
preview: str
content_html: str # rendered HTML, properly escaped
# --- Slugify ----------------------------------------------------------------
_SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
_SLUG_WHITESPACE_RE = re.compile(r"[\s]+")
_SLUG_MULTI_DASH_RE = re.compile(r"-+")
def slugify(name: str) -> str:
"""Convert a category name to a URL-friendly slug."""
slug = name.lower()
slug = _SLUG_NON_ALNUM_RE.sub("", slug)
slug = _SLUG_WHITESPACE_RE.sub("-", slug.strip())
slug = _SLUG_MULTI_DASH_RE.sub("-", slug)
return slug
# --- Inline renderers -------------------------------------------------------
def render_inline_html(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to HTML with proper escaping."""
parts: list[str] = []
for child in children:
match child.type:
case "text":
parts.append(str(escape(child.content)))
case "softbreak":
parts.append(" ")
case "link":
href = str(escape(child.attrGet("href") or ""))
inner = render_inline_html(child.children)
parts.append(
f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>'
)
case "em":
parts.append(f"<em>{render_inline_html(child.children)}</em>")
case "strong":
parts.append(f"<strong>{render_inline_html(child.children)}</strong>")
case "code_inline":
parts.append(f"<code>{escape(child.content)}</code>")
case "html_inline":
parts.append(str(escape(child.content)))
return "".join(parts)
def render_inline_text(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to plain text (links become their text)."""
parts: list[str] = []
for child in children:
match child.type:
case "text":
parts.append(child.content)
case "softbreak":
parts.append(" ")
case "code_inline":
parts.append(child.content)
case "em" | "strong" | "link":
parts.append(render_inline_text(child.children))
return "".join(parts)
# --- AST helpers -------------------------------------------------------------
def _heading_text(node: SyntaxTreeNode) -> str:
"""Extract plain text from a heading node."""
for child in node.children:
if child.type == "inline":
return render_inline_text(child.children)
return ""
def _extract_description(nodes: list[SyntaxTreeNode]) -> str:
"""Extract description from the first paragraph if it's a single <em> block.
Pattern: _Libraries for foo._ -> "Libraries for foo."
"""
if not nodes:
return ""
first = nodes[0]
if first.type != "paragraph":
return ""
for child in first.children:
if child.type == "inline" and len(child.children) == 1:
em = child.children[0]
if em.type == "em":
return render_inline_text(em.children)
return ""
# --- Entry extraction --------------------------------------------------------
_DESC_SEP_RE = re.compile(r"^\s*[-\u2013\u2014]\s*")
def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
"""Find first direct child of a given type."""
for child in node.children:
if child.type == child_type:
return child
return None
def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
"""Find the inline node in a list_item's paragraph."""
para = _find_child(node, "paragraph")
if para is None:
return None
return _find_child(para, "inline")
def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
"""Find the first link node among inline children."""
for child in inline.children:
if child.type == "link":
return child
return None
def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
"""Check if the link is the first child of inline (a real entry, not a subcategory label)."""
return bool(inline.children) and inline.children[0] is link
def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
"""Extract description HTML from inline content after the first link.
AST: [link("name"), text(" - Description.")] -> "Description."
The separator (- / en-dash / em-dash) is stripped.
"""
link_idx = next((i for i, c in enumerate(inline.children) if c is first_link), None)
if link_idx is None:
return ""
desc_children = inline.children[link_idx + 1 :]
if not desc_children:
return ""
html = render_inline_html(desc_children)
return _DESC_SEP_RE.sub("", html)
def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
"""Extract entries from a bullet_list AST node.
Handles three patterns:
- Text-only list_item -> subcategory label -> recurse into nested list
- Link list_item with nested link-only items -> entry with also_see
- Link list_item without nesting -> simple entry
"""
entries: list[ParsedEntry] = []
for list_item in bullet_list.children:
if list_item.type != "list_item":
continue
inline = _find_inline(list_item)
if inline is None:
continue
first_link = _find_first_link(inline)
if first_link is None or not _is_leading_link(inline, first_link):
# Subcategory label (plain text or text-before-link) — recurse into nested list
nested = _find_child(list_item, "bullet_list")
if nested:
entries.extend(_parse_list_entries(nested))
continue
# Entry with a link
name = render_inline_text(first_link.children)
url = first_link.attrGet("href") or ""
desc_html = _extract_description_html(inline, first_link)
# Collect also_see from nested bullet_list
also_see: list[AlsoSee] = []
nested = _find_child(list_item, "bullet_list")
if nested:
for sub_item in nested.children:
if sub_item.type != "list_item":
continue
sub_inline = _find_inline(sub_item)
if sub_inline:
sub_link = _find_first_link(sub_inline)
if sub_link:
also_see.append(AlsoSee(
name=render_inline_text(sub_link.children),
url=sub_link.attrGet("href") or "",
))
entries.append(ParsedEntry(
name=name,
url=url,
description=desc_html,
also_see=also_see,
))
return entries
def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEntry]:
"""Extract all entries from a section's content nodes."""
entries: list[ParsedEntry] = []
for node in content_nodes:
if node.type == "bullet_list":
entries.extend(_parse_list_entries(node))
return entries
# --- Content HTML rendering --------------------------------------------------
def _render_bullet_list_html(
bullet_list: SyntaxTreeNode,
*,
is_sub: bool = False,
) -> str:
"""Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
out: list[str] = []
for list_item in bullet_list.children:
if list_item.type != "list_item":
continue
inline = _find_inline(list_item)
if inline is None:
continue
first_link = _find_first_link(inline)
if first_link is None or not _is_leading_link(inline, first_link):
# Subcategory label (plain text or text-before-link)
label = str(escape(render_inline_text(inline.children)))
out.append(f'<div class="subcat">{label}</div>')
nested = _find_child(list_item, "bullet_list")
if nested:
out.append(_render_bullet_list_html(nested, is_sub=False))
continue
# Entry with a link
name = str(escape(render_inline_text(first_link.children)))
url = str(escape(first_link.attrGet("href") or ""))
if is_sub:
out.append(f'<div class="entry-sub"><a href="{url}">{name}</a></div>')
else:
desc = _extract_description_html(inline, first_link)
if desc:
out.append(
f'<div class="entry"><a href="{url}">{name}</a>'
f'<span class="sep">&mdash;</span>{desc}</div>'
)
else:
out.append(f'<div class="entry"><a href="{url}">{name}</a></div>')
# Nested items under an entry with a link are sub-entries
nested = _find_child(list_item, "bullet_list")
if nested:
out.append(_render_bullet_list_html(nested, is_sub=True))
return "\n".join(out)
def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
"""Render a section's content nodes to HTML."""
parts: list[str] = []
for node in content_nodes:
if node.type == "bullet_list":
parts.append(_render_bullet_list_html(node))
return "\n".join(parts)
# --- Section splitting -------------------------------------------------------
def _group_by_h2(
nodes: list[SyntaxTreeNode],
) -> list[ParsedSection]:
"""Group AST nodes into sections by h2 headings."""
sections: list[ParsedSection] = []
current_name: str | None = None
current_body: list[SyntaxTreeNode] = []
def flush() -> None:
nonlocal current_name
if current_name is None:
return
desc = _extract_description(current_body)
content_nodes = current_body[1:] if desc else current_body
entries = _parse_section_entries(content_nodes)
entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
preview = ", ".join(e["name"] for e in entries[:4])
content_html = _render_section_html(content_nodes)
sections.append(ParsedSection(
name=current_name,
slug=slugify(current_name),
description=desc,
entries=entries,
entry_count=entry_count,
preview=preview,
content_html=content_html,
))
current_name = None
for node in nodes:
if node.type == "heading" and node.tag == "h2":
flush()
current_name = _heading_text(node)
current_body = []
elif current_name is not None:
current_body.append(node)
flush()
return sections
def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
"""Parse README.md text into categories and resources.
Returns (categories, resources) where each is a list of ParsedSection dicts.
"""
md = MarkdownIt("commonmark")
tokens = md.parse(text)
root = SyntaxTreeNode(tokens)
children = root.children
# Find thematic break (---), # Resources, and # Contributing in one pass
hr_idx = None
resources_idx = None
contributing_idx = None
for i, node in enumerate(children):
if hr_idx is None and node.type == "hr":
hr_idx = i
elif node.type == "heading" and node.tag == "h1":
text_content = _heading_text(node)
if text_content == "Resources":
resources_idx = i
elif text_content == "Contributing":
contributing_idx = i
if hr_idx is None:
return [], []
# Slice into category and resource ranges
cat_end = resources_idx or contributing_idx or len(children)
cat_nodes = children[hr_idx + 1 : cat_end]
res_nodes: list[SyntaxTreeNode] = []
if resources_idx is not None:
res_end = contributing_idx or len(children)
res_nodes = children[resources_idx + 1 : res_end]
categories = _group_by_h2(cat_nodes)
resources = _group_by_h2(res_nodes)
return categories, resources

View File

@@ -0,0 +1,6 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<path d="M8 2h16a6 6 0 0 1 6 6v8H2V8a6 6 0 0 1 6-6z" fill="#1d5fa6"/>
<path d="M2 16h28v8a6 6 0 0 1-6 6H8a6 6 0 0 1-6-6z" fill="#f0c73e"/>
<circle cx="11.5" cy="9.5" r="2.2" fill="#f0c73e"/>
<circle cx="20.5" cy="22.5" r="2.2" fill="#1d5fa6"/>
</svg>

After

Width:  |  Height:  |  Size: 320 B

View File

@@ -1,15 +1,44 @@
// State
var activeFilter = null; // { type: "cat"|"group", value: "..." }
var activeSort = { col: 'stars', order: 'desc' };
var searchInput = document.querySelector('.search');
var filterBar = document.querySelector('.filter-bar');
var filterValue = document.querySelector('.filter-value');
var filterClear = document.querySelector('.filter-clear');
var noResults = document.querySelector('.no-results');
var countEl = document.querySelector('.count');
var rows = document.querySelectorAll('.table tbody tr.row');
var tags = document.querySelectorAll('.tag');
var tbody = document.querySelector('.table tbody');
// Relative time formatting
function relativeTime(isoStr) {
var date = new Date(isoStr);
var now = new Date();
var diffMs = now - date;
var diffHours = Math.floor(diffMs / 3600000);
var diffDays = Math.floor(diffMs / 86400000);
if (diffHours < 1) return 'just now';
if (diffHours < 24) return diffHours === 1 ? '1 hour ago' : diffHours + ' hours ago';
if (diffDays === 1) return 'yesterday';
if (diffDays < 30) return diffDays + ' days ago';
var diffMonths = Math.floor(diffDays / 30);
if (diffMonths < 12) return diffMonths === 1 ? '1 month ago' : diffMonths + ' months ago';
var diffYears = Math.floor(diffDays / 365);
return diffYears === 1 ? '1 year ago' : diffYears + ' years ago';
}
// Format all commit date cells
document.querySelectorAll('.col-commit[data-commit]').forEach(function (td) {
var time = td.querySelector('time');
if (time) time.textContent = relativeTime(td.dataset.commit);
});
// Store original row order for sort reset
rows.forEach(function (row, i) {
row._origIndex = i;
row._expandRow = row.nextElementSibling;
});
function collapseAll() {
var openRows = document.querySelectorAll('.table tbody tr.row.open');
openRows.forEach(function (row) {
@@ -46,16 +75,18 @@ function applyFilters() {
show = row._searchText.includes(query);
}
row.hidden = !show;
if (row.hidden !== !show) row.hidden = !show;
if (show) {
visibleCount++;
row.querySelector('.col-num').textContent = String(visibleCount);
var numCell = row.cells[0];
if (numCell.textContent !== String(visibleCount)) {
numCell.textContent = String(visibleCount);
}
}
});
if (noResults) noResults.hidden = visibleCount > 0;
if (countEl) countEl.textContent = visibleCount;
// Update tag highlights
tags.forEach(function (tag) {
@@ -74,6 +105,76 @@ function applyFilters() {
filterBar.hidden = true;
}
}
updateURL();
}
function updateURL() {
var params = new URLSearchParams();
var query = searchInput ? searchInput.value.trim() : '';
if (query) params.set('q', query);
if (activeFilter) {
params.set(activeFilter.type === 'cat' ? 'category' : 'group', activeFilter.value);
}
if (activeSort.col !== 'stars' || activeSort.order !== 'desc') {
params.set('sort', activeSort.col);
params.set('order', activeSort.order);
}
var qs = params.toString();
history.replaceState(null, '', qs ? '?' + qs : location.pathname);
}
function getSortValue(row, col) {
if (col === 'name') {
return row.querySelector('.col-name a').textContent.trim().toLowerCase();
}
if (col === 'stars') {
var text = row.querySelector('.col-stars').textContent.trim().replace(/,/g, '');
var num = parseInt(text, 10);
return isNaN(num) ? -1 : num;
}
if (col === 'commit-time') {
var attr = row.querySelector('.col-commit').getAttribute('data-commit');
return attr ? new Date(attr).getTime() : 0;
}
return 0;
}
function sortRows() {
var arr = Array.prototype.slice.call(rows);
if (activeSort) {
arr.sort(function (a, b) {
var aVal = getSortValue(a, activeSort.col);
var bVal = getSortValue(b, activeSort.col);
if (activeSort.col === 'name') {
var cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
if (cmp === 0) return a._origIndex - b._origIndex;
return activeSort.order === 'desc' ? -cmp : cmp;
}
if (aVal <= 0 && bVal <= 0) return a._origIndex - b._origIndex;
if (aVal <= 0) return 1;
if (bVal <= 0) return -1;
var cmp = aVal - bVal;
if (cmp === 0) return a._origIndex - b._origIndex;
return activeSort.order === 'desc' ? -cmp : cmp;
});
} else {
arr.sort(function (a, b) { return a._origIndex - b._origIndex; });
}
arr.forEach(function (row) {
tbody.appendChild(row);
tbody.appendChild(row._expandRow);
});
applyFilters();
}
function updateSortIndicators() {
document.querySelectorAll('th[data-sort]').forEach(function (th) {
th.classList.remove('sort-asc', 'sort-desc');
if (activeSort && th.dataset.sort === activeSort.col) {
th.classList.add('sort-' + activeSort.order);
}
});
}
// Expand/collapse: event delegation on tbody
@@ -130,6 +231,23 @@ if (filterClear) {
});
}
// Column sorting
document.querySelectorAll('th[data-sort]').forEach(function (th) {
th.addEventListener('click', function () {
var col = th.dataset.sort;
var defaultOrder = col === 'name' ? 'asc' : 'desc';
var altOrder = defaultOrder === 'asc' ? 'desc' : 'asc';
if (activeSort && activeSort.col === col) {
if (activeSort.order === defaultOrder) activeSort = { col: col, order: altOrder };
else activeSort = { col: 'stars', order: 'desc' };
} else {
activeSort = { col: col, order: defaultOrder };
}
sortRows();
updateSortIndicators();
});
});
// Search input
if (searchInput) {
var searchTimer;
@@ -152,3 +270,23 @@ if (searchInput) {
}
});
}
// Restore state from URL
(function () {
var params = new URLSearchParams(location.search);
var q = params.get('q');
var cat = params.get('category');
var group = params.get('group');
var sort = params.get('sort');
var order = params.get('order');
if (q && searchInput) searchInput.value = q;
if (cat) activeFilter = { type: 'cat', value: cat };
else if (group) activeFilter = { type: 'group', value: group };
if ((sort === 'name' || sort === 'stars' || sort === 'commit-time') && (order === 'desc' || order === 'asc')) {
activeSort = { col: sort, order: order };
}
if (q || cat || group || sort) {
sortRows();
}
updateSortIndicators();
})();

View File

@@ -23,6 +23,8 @@
--accent-light: oklch(97% 0.015 240);
--highlight: oklch(93% 0.10 90);
--highlight-text: oklch(35% 0.10 90);
--tag-text: oklch(45% 0.06 240);
--tag-hover-bg: oklch(93% 0.025 240);
}
html { font-size: 16px; }
@@ -65,8 +67,10 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.hero-main {
display: flex;
flex-wrap: wrap;
justify-content: space-between;
align-items: flex-start;
gap: 1rem;
}
.hero-submit {
@@ -78,14 +82,21 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--text);
text-decoration: none;
white-space: nowrap;
transition: border-color 0.2s, background 0.2s, color 0.2s;
}
.hero-submit:hover {
border-color: var(--accent);
background: var(--accent-light);
color: var(--accent);
text-decoration: none;
}
.hero-submit:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 2px;
}
.hero h1 {
font-family: var(--font-display);
font-size: clamp(2rem, 5vw, 3rem);
@@ -144,6 +155,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
font-family: var(--font-body);
font-size: var(--text-sm);
color: var(--text);
transition: border-color 0.15s, background 0.15s;
}
.search::placeholder { color: var(--text-muted); }
@@ -174,11 +186,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
background: none;
border: 1px solid var(--border);
border-radius: 4px;
padding: 0.15rem 0.5rem;
padding: 0.35rem 0.65rem;
font-family: inherit;
font-size: var(--text-xs);
color: var(--text-muted);
cursor: pointer;
transition: border-color 0.15s, color 0.15s;
}
.filter-clear:hover {
@@ -186,14 +199,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--text);
}
.stats {
font-size: var(--text-sm);
color: var(--text-muted);
font-variant-numeric: tabular-nums;
.filter-clear:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 2px;
}
.stats strong { color: var(--text-secondary); }
/* === Table === */
.table-wrap {
width: 100%;
@@ -201,6 +211,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
overflow-x: auto;
}
.table-wrap:focus {
outline: 2px solid var(--accent);
outline-offset: -2px;
}
.table {
width: 100%;
border-collapse: separate;
@@ -236,6 +251,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
padding: 0.7rem 0.75rem;
border-bottom: 1px solid var(--border);
vertical-align: top;
transition: background 0.15s;
}
.table tbody tr.row:not(.open):hover td {
@@ -253,9 +269,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.col-name {
width: 35%;
overflow-wrap: break-word;
word-wrap: break-word;
word-break: break-word;
overflow-wrap: anywhere;
}
.col-name > a {
@@ -266,12 +280,47 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.col-name > a:hover { text-decoration: underline; color: var(--accent-hover); }
/* === Sortable Headers === */
th[data-sort] {
cursor: pointer;
user-select: none;
}
th[data-sort]:hover {
color: var(--accent);
}
th[data-sort]::after {
content: " ▼";
opacity: 0;
transition: opacity 0.15s;
}
th[data-sort="name"]::after {
content: " ▲";
}
th[data-sort]:hover::after {
opacity: 1;
}
th[data-sort].sort-desc::after {
content: " ▼";
opacity: 1;
}
th[data-sort].sort-asc::after {
content: " ▲";
opacity: 1;
}
/* === Stars Column === */
.col-stars {
width: 5rem;
font-variant-numeric: tabular-nums;
white-space: nowrap;
color: var(--text-secondary);
text-align: right;
}
/* === Arrow Column === */
@@ -294,6 +343,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
/* === Row Click === */
.row { cursor: pointer; }
.row:focus-visible td {
outline: none;
background: var(--bg-hover);
box-shadow: inset 2px 0 0 var(--accent);
}
/* === Expand Row === */
.expand-row {
display: none;
@@ -315,10 +370,36 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
border-bottom: 1px solid var(--border);
}
@keyframes expand-in {
from {
opacity: 0;
transform: translateY(-4px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.expand-content {
font-size: var(--text-sm);
color: var(--text-secondary);
line-height: 1.6;
animation: expand-in 0.2s cubic-bezier(0.25, 1, 0.5, 1);
}
.expand-tags {
display: flex;
gap: 0.4rem;
margin-bottom: 0.4rem;
}
.expand-tag {
font-size: var(--text-xs);
color: var(--tag-text);
background: var(--bg);
padding: 0.15rem 0.4rem;
border-radius: 3px;
}
.expand-also-see {
@@ -357,35 +438,63 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--border);
}
.col-cat, .col-group {
.col-cat {
width: 13%;
white-space: nowrap;
}
/* === Last Commit Column === */
.col-commit {
width: 9rem;
white-space: nowrap;
color: var(--text-muted);
}
/* === Tags === */
.tag {
position: relative;
background: var(--accent-light);
border: none;
font-family: inherit;
font-size: var(--text-xs);
color: oklch(45% 0.06 240);
color: var(--tag-text);
cursor: pointer;
padding: 0.15rem 0.35rem;
padding: 0.25rem 0.5rem;
border-radius: 3px;
white-space: nowrap;
transition: background 0.15s, color 0.15s;
}
/* Expand touch target to 44x44px minimum */
.tag::after {
content: "";
position: absolute;
inset: -0.5rem -0.25rem;
}
.tag:hover {
background: var(--accent-light);
background: var(--tag-hover-bg);
color: var(--accent);
}
.tag:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 1px;
}
.tag.active {
background: var(--highlight);
color: var(--highlight-text);
font-weight: 600;
}
/* === Noscript === */
.noscript-msg {
text-align: center;
padding: 1rem;
color: var(--text-muted);
}
/* === No Results === */
.no-results {
max-width: 1400px;
@@ -407,20 +516,18 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
background: var(--bg-input);
display: flex;
align-items: center;
justify-content: space-between;
justify-content: flex-end;
gap: 0.5rem;
}
.footer a { color: var(--text-muted); text-decoration: none; }
.footer a:hover { color: var(--accent); }
.footer a { color: var(--accent); text-decoration: none; }
.footer a:hover { color: var(--accent-hover); text-decoration: underline; }
.footer-links {
display: flex;
gap: 1rem;
}
.footer-sep { color: var(--border-strong); }
/* === Responsive === */
@media (max-width: 900px) {
.col-group { display: none; }
.col-commit { display: none; }
}
@media (max-width: 640px) {
@@ -435,7 +542,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.col-cat { display: none; }
.col-name { white-space: normal; }
.footer { padding: 1.25rem; flex-direction: column; gap: 0.5rem; }
.footer { padding: 1.25rem; justify-content: center; flex-wrap: wrap; }
}
/* === Screen Reader Only === */
@@ -454,6 +561,8 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
/* === Reduced Motion === */
@media (prefers-reduced-motion: reduce) {
*, *::before, *::after {
animation-duration: 0.01ms !important;
animation-iteration-count: 1 !important;
transition-duration: 0.01ms !important;
}
}

View File

@@ -17,10 +17,7 @@
/>
<meta property="og:url" content="https://awesome-python.com/" />
<meta name="twitter:card" content="summary" />
<link
rel="icon"
href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🐍</text></svg>"
/>
<link rel="icon" href="/static/favicon.svg" type="image/svg+xml" />
<link rel="stylesheet" href="/static/style.css" />
<script
async
@@ -41,24 +38,24 @@
<main id="content">{% block content %}{% endblock %}</main>
<footer class="footer">
<div class="footer-links">
<a href="https://github.com/vinta" target="_blank" rel="noopener"
>GitHub</a
>
<a href="https://twitter.com/vinta" target="_blank" rel="noopener"
>Twitter</a
>
</div>
<span
>Curated by
<a href="https://github.com/vinta" target="_blank" rel="noopener"
>Made by
<a href="https://vinta.ws/" target="_blank" rel="noopener"
>Vinta</a
></span
>
<span class="footer-sep">/</span>
<a href="https://github.com/vinta" target="_blank" rel="noopener"
>GitHub</a
>
<span class="footer-sep">/</span>
<a href="https://twitter.com/vinta" target="_blank" rel="noopener"
>Twitter</a
>
</footer>
<noscript
><p style="text-align: center; padding: 1rem; color: #666">
><p class="noscript-msg">
JavaScript is needed for search and filtering.
</p></noscript
>

View File

@@ -29,6 +29,7 @@
</div>
</header>
<h2 class="sr-only">Search and filter</h2>
<div class="controls">
<div class="search-wrap">
<svg
@@ -60,22 +61,24 @@
</div>
</div>
<div class="table-wrap">
<h2 class="sr-only">Results</h2>
<div class="table-wrap" tabindex="0" role="region" aria-label="Libraries table">
<table class="table">
<thead>
<tr>
<th class="col-num"><span class="sr-only">#</span></th>
<th class="col-name">Project Name</th>
<th class="col-stars">GitHub Stars</th>
<th class="col-name" data-sort="name">Project Name</th>
<th class="col-stars" data-sort="stars">GitHub Stars</th>
<th class="col-commit" data-sort="commit-time">Last Commit</th>
<th class="col-cat">Category</th>
<th class="col-group">Group</th>
<th class="col-arrow"></th>
<th class="col-arrow"><span class="sr-only">Details</span></th>
</tr>
</thead>
<tbody>
{% for entry in entries %}
<tr
class="row"
role="button"
data-cat="{{ entry.category }}"
data-group="{{ entry.group }}"
tabindex="0"
@@ -92,25 +95,24 @@
{% if entry.stars is not none %}{{ "{:,}".format(entry.stars) }}{%
else %}&mdash;{% endif %}
</td>
<td class="col-commit"
{% if entry.last_commit_at %}data-commit="{{ entry.last_commit_at }}"{% endif %}
>{% if entry.last_commit_at %}<time datetime="{{ entry.last_commit_at }}">{{ entry.last_commit_at[:10] }}</time>{% else %}&mdash;{% endif %}</td>
<td class="col-cat">
<button class="tag" data-type="cat" data-value="{{ entry.category }}">
{{ entry.category }}
</button>
</td>
<td class="col-group">
<button class="tag" data-type="group" data-value="{{ entry.group }}">
{{ entry.group }}
</button>
</td>
<td class="col-arrow"><span class="arrow">&rarr;</span></td>
</tr>
<tr class="expand-row" id="expand-{{ loop.index }}">
<td></td>
<td colspan="5">
<td colspan="3">
<div class="expand-content">
{% if entry.description %}
<div class="expand-desc">{{ entry.description | safe }}</div>
{% endif %} {% if entry.also_see %}
{% endif %}
{% if entry.also_see %}
<div class="expand-also-see">
Also see: {% for see in entry.also_see %}<a
href="{{ see.url }}"
@@ -131,11 +133,16 @@
target="_blank"
rel="noopener"
>{{ entry.url | replace("https://", "") }}</a
>{% if entry.pushed_at %}<span class="expand-sep">&middot;</span
>Last pushed {{ entry.pushed_at[:10] }}{% endif %}
>
</div>
</div>
</td>
<td class="col-cat">
<button class="tag" data-type="group" data-value="{{ entry.group }}">
{{ entry.group }}
</button>
</td>
<td></td>
</tr>
{% endfor %}
</tbody>

View File

@@ -1,27 +1,18 @@
"""Tests for the build module."""
import json
import os
import shutil
import sys
import textwrap
from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from build import (
build,
count_entries,
extract_github_repo,
extract_preview,
group_categories,
load_stars,
parse_readme,
render_content_html,
slugify,
sort_entries,
)
from readme_parser import slugify
# ---------------------------------------------------------------------------
# slugify
@@ -51,244 +42,6 @@ class TestSlugify:
assert slugify(" Date and Time ") == "date-and-time"
# ---------------------------------------------------------------------------
# count_entries
# ---------------------------------------------------------------------------
class TestCountEntries:
def test_counts_dash_entries(self):
assert count_entries("- [a](url) - Desc.\n- [b](url) - Desc.") == 2
def test_counts_star_entries(self):
assert count_entries("* [a](url) - Desc.") == 1
def test_ignores_non_entries(self):
assert count_entries("Some text\n- [a](url) - Desc.\nMore text") == 1
def test_counts_indented_entries(self):
assert count_entries(" - [a](url) - Desc.") == 1
def test_empty_content(self):
assert count_entries("") == 0
# ---------------------------------------------------------------------------
# extract_preview
# ---------------------------------------------------------------------------
class TestExtractPreview:
def test_basic(self):
content = "* [alpha](url) - A.\n* [beta](url) - B.\n* [gamma](url) - C."
assert extract_preview(content) == "alpha, beta, gamma"
def test_max_four(self):
content = "\n".join(f"* [lib{i}](url) - Desc." for i in range(10))
assert extract_preview(content) == "lib0, lib1, lib2, lib3"
def test_empty(self):
assert extract_preview("") == ""
def test_skips_subcategory_labels(self):
content = "* Synchronous\n* [django](url) - Framework.\n* [flask](url) - Micro."
assert extract_preview(content) == "django, flask"
# ---------------------------------------------------------------------------
# render_content_html
# ---------------------------------------------------------------------------
class TestRenderContentHtml:
def test_basic_entry(self):
content = "* [django](https://example.com) - A web framework."
html = render_content_html(content)
assert 'href="https://example.com"' in html
assert "django" in html
assert "A web framework." in html
assert 'class="entry"' in html
def test_subcategory_label(self):
content = "* Synchronous\n* [django](https://x.com) - Framework."
html = render_content_html(content)
assert 'class="subcat"' in html
assert "Synchronous" in html
def test_sub_entry(self):
content = "* [django](https://x.com) - Framework.\n * [awesome-django](https://y.com)"
html = render_content_html(content)
assert 'class="entry-sub"' in html
assert "awesome-django" in html
def test_link_only_entry(self):
content = "* [tool](https://x.com)"
html = render_content_html(content)
assert 'href="https://x.com"' in html
assert "tool" in html
# ---------------------------------------------------------------------------
# parse_readme
# ---------------------------------------------------------------------------
MINIMAL_README = textwrap.dedent("""\
# Awesome Python
Some intro text.
---
## Alpha
_Libraries for alpha stuff._
- [lib-a](https://example.com/a) - Does A.
- [lib-b](https://example.com/b) - Does B.
## Beta
_Tools for beta._
- [lib-c](https://example.com/c) - Does C.
# Resources
Where to discover resources.
## Newsletters
- [News One](https://example.com/n1)
- [News Two](https://example.com/n2)
## Podcasts
- [Pod One](https://example.com/p1)
# Contributing
Please contribute!
""")
class TestParseReadme:
def test_category_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(cats) == 2
def test_resource_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(resources) == 2
def test_category_names(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["name"] == "Alpha"
assert cats[1]["name"] == "Beta"
def test_category_slugs(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["slug"] == "alpha"
assert cats[1]["slug"] == "beta"
def test_category_description(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_category_content_has_entries(self):
cats, _ = parse_readme(MINIMAL_README)
assert "lib-a" in cats[0]["content"]
assert "lib-b" in cats[0]["content"]
def test_resources_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_resources_content(self):
_, resources = parse_readme(MINIMAL_README)
assert "News One" in resources[0]["content"]
assert "Pod One" in resources[1]["content"]
def test_contributing_skipped(self):
cats, resources = parse_readme(MINIMAL_README)
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
assert "Contributing" not in all_names
def test_no_separator(self):
cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
assert cats == []
assert resources == []
def test_no_description(self):
readme = textwrap.dedent("""\
# Title
---
## NullDesc
- [item](https://x.com) - Thing.
# Resources
## Tips
- [tip](https://x.com)
# Contributing
Done.
""")
cats, resources = parse_readme(readme)
assert cats[0]["description"] == ""
assert "item" in cats[0]["content"]
# ---------------------------------------------------------------------------
# parse_readme on real README
# ---------------------------------------------------------------------------
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
self.cats, self.resources = parse_readme(self.readme_text)
def test_at_least_83_categories(self):
assert len(self.cats) >= 83
def test_resources_has_newsletters_and_podcasts(self):
names = [r["name"] for r in self.resources]
assert "Newsletters" in names
assert "Podcasts" in names
def test_contributing_not_in_results(self):
all_names = [c["name"] for c in self.cats] + [
r["name"] for r in self.resources
]
assert "Contributing" not in all_names
def test_first_category_is_admin_panels(self):
assert self.cats[0]["name"] == "Admin Panels"
assert self.cats[0]["slug"] == "admin-panels"
def test_last_category_is_wsgi_servers(self):
assert self.cats[-1]["name"] == "WSGI Servers"
assert self.cats[-1]["slug"] == "wsgi-servers"
def test_restful_api_slug(self):
slugs = [c["slug"] for c in self.cats]
assert "restful-api" in slugs
def test_descriptions_extracted(self):
admin = self.cats[0]
assert admin["description"] == "Libraries for administrative interfaces."
# ---------------------------------------------------------------------------
# group_categories
# ---------------------------------------------------------------------------
@@ -318,26 +71,6 @@ class TestGroupCategories:
assert "Resources" in group_names
# ---------------------------------------------------------------------------
# render_markdown (kept for compatibility)
# ---------------------------------------------------------------------------
class TestRenderMarkdown:
def test_renders_link_list(self):
from build import render_markdown
html = render_markdown("- [lib](https://example.com) - Does stuff.")
assert "<li>" in html
assert '<a href="https://example.com">lib</a>' in html
def test_renders_plain_text(self):
from build import render_markdown
html = render_markdown("Hello world")
assert "<p>Hello world</p>" in html
# ---------------------------------------------------------------------------
# build (integration)
# ---------------------------------------------------------------------------
@@ -413,27 +146,6 @@ class TestBuild:
# No category sub-pages
assert not (site / "categories").exists()
def test_build_creates_cname(self, tmp_path):
readme = textwrap.dedent("""\
# T
---
## Only
- [x](https://x.com) - X.
# Contributing
Done.
""")
self._make_repo(tmp_path, readme)
build(str(tmp_path))
cname = tmp_path / "website" / "output" / "CNAME"
assert cname.exists()
assert "awesome-python.com" in cname.read_text()
def test_build_cleans_stale_output(self, tmp_path):
readme = textwrap.dedent("""\
# T

View File

@@ -8,7 +8,6 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from fetch_github_stars import (
build_graphql_query,
extract_github_repos,
load_cache,
parse_graphql_response,
save_cache,
)
@@ -65,27 +64,6 @@ class TestExtractGithubRepos:
assert result == {"org/repo"}
class TestLoadCache:
def test_returns_empty_when_missing(self, tmp_path, monkeypatch):
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", tmp_path / "nonexistent.json")
result = load_cache()
assert result == {}
def test_loads_valid_cache(self, tmp_path, monkeypatch):
cache_file = tmp_path / "stars.json"
cache_file.write_text('{"a/b": {"stars": 1}}', encoding="utf-8")
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
result = load_cache()
assert result == {"a/b": {"stars": 1}}
def test_returns_empty_on_corrupt_json(self, tmp_path, monkeypatch):
cache_file = tmp_path / "stars.json"
cache_file.write_text("not json", encoding="utf-8")
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
result = load_cache()
assert result == {}
class TestSaveCache:
def test_creates_directory_and_writes_json(self, tmp_path, monkeypatch):
data_dir = tmp_path / "data"

View File

@@ -0,0 +1,424 @@
"""Tests for the readme_parser module."""
import os
import textwrap
import pytest
from readme_parser import (
_parse_section_entries,
_render_section_html,
parse_readme,
render_inline_html,
render_inline_text,
)
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
def _parse_inline(md_text: str) -> list[SyntaxTreeNode]:
"""Helper: parse a single paragraph and return its inline children."""
md = MarkdownIt("commonmark")
root = SyntaxTreeNode(md.parse(md_text))
# root > paragraph > inline > children
return root.children[0].children[0].children
class TestRenderInlineHtml:
def test_plain_text_escapes_html(self):
children = _parse_inline("Hello <world> & friends")
assert render_inline_html(children) == "Hello &lt;world&gt; &amp; friends"
def test_link_with_target(self):
children = _parse_inline("[name](https://example.com)")
html = render_inline_html(children)
assert 'href="https://example.com"' in html
assert 'target="_blank"' in html
assert 'rel="noopener"' in html
assert ">name</a>" in html
def test_emphasis(self):
children = _parse_inline("*italic* text")
assert "<em>italic</em>" in render_inline_html(children)
def test_strong(self):
children = _parse_inline("**bold** text")
assert "<strong>bold</strong>" in render_inline_html(children)
def test_code_inline(self):
children = _parse_inline("`some code`")
assert "<code>some code</code>" in render_inline_html(children)
def test_mixed_link_and_text(self):
children = _parse_inline("See [foo](https://x.com) for details.")
html = render_inline_html(children)
assert "See " in html
assert ">foo</a>" in html
assert " for details." in html
class TestRenderInlineText:
def test_plain_text(self):
children = _parse_inline("Hello world")
assert render_inline_text(children) == "Hello world"
def test_link_becomes_text(self):
children = _parse_inline("See [awesome-algos](https://github.com/x/y).")
assert render_inline_text(children) == "See awesome-algos."
def test_emphasis_stripped(self):
children = _parse_inline("*italic* text")
assert render_inline_text(children) == "italic text"
def test_code_inline_kept(self):
children = _parse_inline("`code` here")
assert render_inline_text(children) == "code here"
MINIMAL_README = textwrap.dedent("""\
# Awesome Python
Some intro text.
---
## Alpha
_Libraries for alpha stuff._
- [lib-a](https://example.com/a) - Does A.
- [lib-b](https://example.com/b) - Does B.
## Beta
_Tools for beta._
- [lib-c](https://example.com/c) - Does C.
# Resources
Where to discover resources.
## Newsletters
- [News One](https://example.com/n1)
- [News Two](https://example.com/n2)
## Podcasts
- [Pod One](https://example.com/p1)
# Contributing
Please contribute!
""")
class TestParseReadmeSections:
def test_category_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(cats) == 2
def test_resource_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(resources) == 2
def test_category_names(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["name"] == "Alpha"
assert cats[1]["name"] == "Beta"
def test_category_slugs(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["slug"] == "alpha"
assert cats[1]["slug"] == "beta"
def test_category_description(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_resource_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_contributing_skipped(self):
cats, resources = parse_readme(MINIMAL_README)
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
assert "Contributing" not in all_names
def test_no_separator(self):
cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
assert cats == []
assert resources == []
def test_no_description(self):
readme = textwrap.dedent("""\
# Title
---
## NullDesc
- [item](https://x.com) - Thing.
# Resources
## Tips
- [tip](https://x.com)
# Contributing
Done.
""")
cats, resources = parse_readme(readme)
assert cats[0]["description"] == ""
assert cats[0]["entries"][0]["name"] == "item"
def test_description_with_link_stripped(self):
readme = textwrap.dedent("""\
# T
---
## Algos
_Algorithms. Also see [awesome-algos](https://example.com)._
- [lib](https://x.com) - Lib.
# Contributing
Done.
""")
cats, _ = parse_readme(readme)
assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
"""Helper: parse markdown and return all block nodes."""
md = MarkdownIt("commonmark")
root = SyntaxTreeNode(md.parse(md_text))
return root.children
class TestParseSectionEntries:
def test_flat_entries(self):
nodes = _content_nodes(
"- [django](https://example.com/d) - A web framework.\n"
"- [flask](https://example.com/f) - A micro framework.\n"
)
entries = _parse_section_entries(nodes)
assert len(entries) == 2
assert entries[0]["name"] == "django"
assert entries[0]["url"] == "https://example.com/d"
assert "web framework" in entries[0]["description"]
assert entries[0]["also_see"] == []
assert entries[1]["name"] == "flask"
def test_link_only_entry(self):
nodes = _content_nodes("- [tool](https://x.com)\n")
entries = _parse_section_entries(nodes)
assert len(entries) == 1
assert entries[0]["name"] == "tool"
assert entries[0]["description"] == ""
def test_subcategorized_entries(self):
nodes = _content_nodes(
"- Algorithms\n"
" - [algos](https://x.com/a) - Algo lib.\n"
" - [sorts](https://x.com/s) - Sort lib.\n"
"- Design Patterns\n"
" - [patterns](https://x.com/p) - Pattern lib.\n"
)
entries = _parse_section_entries(nodes)
assert len(entries) == 3
assert entries[0]["name"] == "algos"
assert entries[2]["name"] == "patterns"
def test_text_before_link_is_subcategory(self):
nodes = _content_nodes(
"- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
" - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
" - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
)
entries = _parse_section_entries(nodes)
# awesome-mysql is a subcategory label, not an entry
assert len(entries) == 2
names = [e["name"] for e in entries]
assert "awesome-mysql" not in names
assert "mysqlclient" in names
assert "pymysql" in names
def test_also_see_sub_entries(self):
nodes = _content_nodes(
"- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"
" - [awesome-asyncio](https://github.com/timofurrer/awesome-asyncio)\n"
"- [trio](https://github.com/python-trio/trio) - Friendly async.\n"
)
entries = _parse_section_entries(nodes)
assert len(entries) == 2
assert entries[0]["name"] == "asyncio"
assert len(entries[0]["also_see"]) == 1
assert entries[0]["also_see"][0]["name"] == "awesome-asyncio"
assert entries[1]["name"] == "trio"
assert entries[1]["also_see"] == []
def test_entry_count_includes_also_see(self):
readme = textwrap.dedent("""\
# T
---
## Async
- [asyncio](https://x.com) - Async I/O.
- [awesome-asyncio](https://y.com)
- [trio](https://z.com) - Friendly async.
# Contributing
Done.
""")
cats, _ = parse_readme(readme)
# 2 main entries + 1 also_see = 3
assert cats[0]["entry_count"] == 3
def test_preview_first_four_names(self):
readme = textwrap.dedent("""\
# T
---
## Libs
- [alpha](https://x.com) - A.
- [beta](https://x.com) - B.
- [gamma](https://x.com) - C.
- [delta](https://x.com) - D.
- [epsilon](https://x.com) - E.
# Contributing
Done.
""")
cats, _ = parse_readme(readme)
assert cats[0]["preview"] == "alpha, beta, gamma, delta"
def test_description_html_escapes_xss(self):
nodes = _content_nodes('- [lib](https://x.com) - A <script>alert(1)</script> lib.\n')
entries = _parse_section_entries(nodes)
assert "<script>" not in entries[0]["description"]
assert "&lt;script&gt;" in entries[0]["description"]
class TestRenderSectionHtml:
def test_basic_entry(self):
nodes = _content_nodes("- [django](https://example.com) - A web framework.\n")
html = _render_section_html(nodes)
assert 'class="entry"' in html
assert 'href="https://example.com"' in html
assert "django" in html
assert "A web framework." in html
def test_subcategory_label(self):
nodes = _content_nodes(
"- Synchronous\n - [django](https://x.com) - Framework.\n"
)
html = _render_section_html(nodes)
assert 'class="subcat"' in html
assert "Synchronous" in html
assert 'class="entry"' in html
def test_sub_entry(self):
nodes = _content_nodes(
"- [django](https://x.com) - Framework.\n"
" - [awesome-django](https://y.com)\n"
)
html = _render_section_html(nodes)
assert 'class="entry-sub"' in html
assert "awesome-django" in html
def test_link_only_entry(self):
nodes = _content_nodes("- [tool](https://x.com)\n")
html = _render_section_html(nodes)
assert 'class="entry"' in html
assert 'href="https://x.com"' in html
assert "tool" in html
def test_xss_escaped_in_name(self):
nodes = _content_nodes('- [<img onerror=alert(1)>](https://x.com) - Bad.\n')
html = _render_section_html(nodes)
assert "onerror" not in html or "&" in html
def test_xss_escaped_in_subcat(self):
nodes = _content_nodes("- <script>alert(1)</script>\n")
html = _render_section_html(nodes)
assert "<script>" not in html
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
self.cats, self.resources = parse_readme(self.readme_text)
def test_at_least_83_categories(self):
assert len(self.cats) >= 83
def test_resources_has_newsletters_and_podcasts(self):
names = [r["name"] for r in self.resources]
assert "Newsletters" in names
assert "Podcasts" in names
def test_contributing_not_in_results(self):
all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
assert "Contributing" not in all_names
def test_first_category_is_admin_panels(self):
assert self.cats[0]["name"] == "Admin Panels"
assert self.cats[0]["slug"] == "admin-panels"
def test_last_category_is_wsgi_servers(self):
assert self.cats[-1]["name"] == "WSGI Servers"
assert self.cats[-1]["slug"] == "wsgi-servers"
def test_restful_api_slug(self):
slugs = [c["slug"] for c in self.cats]
assert "restful-api" in slugs
def test_descriptions_extracted(self):
admin = self.cats[0]
assert admin["description"] == "Libraries for administrative interfaces."
def test_entry_counts_nonzero(self):
for cat in self.cats:
assert cat["entry_count"] > 0, f"{cat['name']} has 0 entries"
def test_previews_nonempty(self):
for cat in self.cats:
assert cat["preview"], f"{cat['name']} has empty preview"
def test_content_html_nonempty(self):
for cat in self.cats:
assert cat["content_html"], f"{cat['name']} has empty content_html"
def test_algorithms_has_subcategories(self):
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
assert 'class="subcat"' in algos["content_html"]
def test_async_has_also_see(self):
async_cat = next(c for c in self.cats if c["name"] == "Asynchronous Programming")
asyncio_entry = next(e for e in async_cat["entries"] if e["name"] == "asyncio")
assert len(asyncio_entry["also_see"]) >= 1
assert asyncio_entry["also_see"][0]["name"] == "awesome-asyncio"
def test_description_links_stripped_to_text(self):
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
assert "awesome-algorithms" in algos["description"]
assert "https://" not in algos["description"]