Merge pull request #2971 from vinta/feature/markdown-it-py-parser

feat: replace regex README parser with markdown-it-py AST parser
This commit is contained in:
Vinta Chen
2026-03-18 20:35:06 +08:00
committed by GitHub
15 changed files with 2529 additions and 1443 deletions
+16 -9
View File
@@ -1,17 +1,24 @@
-include .env -include .env
export export
site_install: install:
uv sync --no-dev uv sync
site_fetch_stats: fetch_stats:
uv run python website/fetch_github_stars.py uv run python website/fetch_github_stars.py
site_build: test:
uv run pytest website/tests/ -v
build:
uv run python website/build.py uv run python website/build.py
site_preview: site_build preview: build
python -m http.server -d website/output/ 8000 @echo "Check the website on http://localhost:8000"
uv run watchmedo shell-command \
site_deploy: site_build --patterns='*.md;*.html;*.css;*.js;*.py' \
@echo "Deploy via GitHub Actions (push to master)" --recursive \
--wait --drop \
--command='uv run python website/build.py' \
README.md website/templates website/static website/data & \
python -m http.server -b 127.0.0.1 -d website/output/ 8000
+17 -9
View File
@@ -2,22 +2,30 @@
name = "awesome-python" name = "awesome-python"
version = "0.1.0" version = "0.1.0"
description = "An opinionated list of awesome Python frameworks, libraries, software and resources." description = "An opinionated list of awesome Python frameworks, libraries, software and resources."
authors = [{ name = "Vinta Chen", email = "vinta.chen@gmail.com" }]
readme = "README.md"
license = "MIT"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = []
"httpx==0.28.1",
"jinja2==3.1.6", [project.urls]
"markdown==3.10.2", Homepage = "https://awesome-python.com/"
] Repository = "https://github.com/vinta/awesome-python"
[dependency-groups] [dependency-groups]
build = ["httpx==0.28.1", "jinja2==3.1.6", "markdown-it-py==4.0.0"]
lint = ["ruff==0.15.6"]
test = ["pytest==9.0.2"]
dev = [ dev = [
"pytest==9.0.2", { include-group = "build" },
"ruff==0.15.6", { include-group = "lint" },
{ include-group = "test" },
"watchdog==6.0.0",
] ]
[tool.pytest.ini_options] [tool.pytest.ini_options]
testpaths = ["website/tests"] testpaths = ["website/tests"]
pythonpath = ["website"]
[tool.ruff] [tool.ruff]
target-version = "py313" line-length = 200
line-length = 100
Generated
+63 -14
View File
@@ -18,30 +18,46 @@ wheels = [
name = "awesome-python" name = "awesome-python"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [
{ name = "httpx" },
{ name = "jinja2" },
{ name = "markdown" },
]
[package.dev-dependencies] [package.dev-dependencies]
build = [
{ name = "httpx" },
{ name = "jinja2" },
{ name = "markdown-it-py" },
]
dev = [ dev = [
{ name = "httpx" },
{ name = "jinja2" },
{ name = "markdown-it-py" },
{ name = "pytest" }, { name = "pytest" },
{ name = "ruff" }, { name = "ruff" },
{ name = "watchdog" },
]
lint = [
{ name = "ruff" },
]
test = [
{ name = "pytest" },
] ]
[package.metadata] [package.metadata]
requires-dist = [
{ name = "httpx", specifier = "==0.28.1" },
{ name = "jinja2", specifier = "==3.1.6" },
{ name = "markdown", specifier = "==3.10.2" },
]
[package.metadata.requires-dev] [package.metadata.requires-dev]
build = [
{ name = "httpx", specifier = "==0.28.1" },
{ name = "jinja2", specifier = "==3.1.6" },
{ name = "markdown-it-py", specifier = "==4.0.0" },
]
dev = [ dev = [
{ name = "httpx", specifier = "==0.28.1" },
{ name = "jinja2", specifier = "==3.1.6" },
{ name = "markdown-it-py", specifier = "==4.0.0" },
{ name = "pytest", specifier = "==9.0.2" }, { name = "pytest", specifier = "==9.0.2" },
{ name = "ruff", specifier = "==0.15.6" }, { name = "ruff", specifier = "==0.15.6" },
{ name = "watchdog", specifier = "==6.0.0" },
] ]
lint = [{ name = "ruff", specifier = "==0.15.6" }]
test = [{ name = "pytest", specifier = "==9.0.2" }]
[[package]] [[package]]
name = "certifi" name = "certifi"
@@ -129,12 +145,15 @@ wheels = [
] ]
[[package]] [[package]]
name = "markdown" name = "markdown-it-py"
version = "3.10.2" version = "4.0.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/2b/f4/69fa6ed85ae003c2378ffa8f6d2e3234662abd02c10d216c0ba96081a238/markdown-3.10.2.tar.gz", hash = "sha256:994d51325d25ad8aa7ce4ebaec003febcce822c3f8c911e3b17c52f7f589f950", size = 368805, upload-time = "2026-02-09T14:57:26.942Z" } dependencies = [
{ name = "mdurl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/de/1f/77fa3081e4f66ca3576c896ae5d31c3002ac6607f9747d2e3aa49227e464/markdown-3.10.2-py3-none-any.whl", hash = "sha256:e91464b71ae3ee7afd3017d9f358ef0baf158fd9a298db92f1d4761133824c36", size = 108180, upload-time = "2026-02-09T14:57:25.787Z" }, { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
] ]
[[package]] [[package]]
@@ -189,6 +208,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
] ]
[[package]]
name = "mdurl"
version = "0.1.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
]
[[package]] [[package]]
name = "packaging" name = "packaging"
version = "26.0" version = "26.0"
@@ -256,3 +284,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" }, { url = "https://files.pythonhosted.org/packages/89/7a/09ece68445ceac348df06e08bf75db72d0e8427765b96c9c0ffabc1be1d9/ruff-0.15.6-py3-none-win_amd64.whl", hash = "sha256:aee25bc84c2f1007ecb5037dff75cef00414fdf17c23f07dc13e577883dca406", size = 11787271, upload-time = "2026-03-12T23:05:20.168Z" },
{ url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" }, { url = "https://files.pythonhosted.org/packages/7f/d0/578c47dd68152ddddddf31cd7fc67dc30b7cdf639a86275fda821b0d9d98/ruff-0.15.6-py3-none-win_arm64.whl", hash = "sha256:c34de3dd0b0ba203be50ae70f5910b17188556630e2178fd7d79fc030eb0d837", size = 11060497, upload-time = "2026-03-12T23:05:25.968Z" },
] ]
[[package]]
name = "watchdog"
version = "6.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" },
{ url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" },
{ url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" },
{ url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" },
{ url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" },
{ url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" },
{ url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" },
{ url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" },
{ url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" },
{ url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" },
{ url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" },
{ url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" },
{ url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" },
]
+19 -271
View File
@@ -7,9 +7,10 @@ import shutil
from pathlib import Path from pathlib import Path
from typing import TypedDict from typing import TypedDict
import markdown
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
from readme_parser import parse_readme, slugify
# Thematic grouping of categories. Each category name must match exactly # Thematic grouping of categories. Each category name must match exactly
# as it appears in README.md (the ## heading text). # as it appears in README.md (the ## heading text).
SECTION_GROUPS: list[tuple[str, list[str]]] = [ SECTION_GROUPS: list[tuple[str, list[str]]] = [
@@ -67,217 +68,6 @@ SECTION_GROUPS: list[tuple[str, list[str]]] = [
] ]
def slugify(name: str) -> str:
"""Convert a category name to a URL-friendly slug."""
slug = name.lower()
slug = re.sub(r"[^a-z0-9\s-]", "", slug)
slug = re.sub(r"[\s]+", "-", slug.strip())
slug = re.sub(r"-+", "-", slug)
return slug
def count_entries(content: str) -> int:
"""Count library entries (lines starting with * [ or - [) in a content block."""
return sum(1 for line in content.split("\n") if re.match(r"\s*[-*]\s+\[", line))
def extract_preview(content: str, *, max_names: int = 4) -> str:
"""Extract first N main library names from markdown content for preview text.
Only includes top-level or single-indent entries (indent <= 3 spaces),
skipping subcategory labels (items without links) and deep sub-entries.
"""
names = []
for m in re.finditer(r"^(\s*)[-*]\s+\[([^\]]+)\]", content, re.MULTILINE):
indent_len = len(m.group(1))
if indent_len > 3:
continue
names.append(m.group(2))
if len(names) >= max_names:
break
return ", ".join(names)
def render_content_html(content: str) -> str:
"""Render category markdown content to HTML with subcategory detection.
Lines that are list items without links (e.g., "- Synchronous") are
treated as subcategory headers and rendered as bold dividers.
Indent levels in the README:
- 0 spaces: top-level entry or subcategory label
- 2 spaces: entry under a subcategory (still a main entry)
- 4+ spaces: sub-entry (e.g., awesome-django under django)
"""
lines = content.split("\n")
out: list[str] = []
for line in lines:
stripped = line.strip()
indent_len = len(line) - len(line.lstrip())
# Detect subcategory labels: list items without links
m = re.match(r"^[-*]\s+(.+)$", stripped)
if m and "[" not in stripped:
label = m.group(1)
out.append(f'<div class="subcat">{label}</div>')
continue
# Entry with link and description: * [name](url) - Description.
m = re.match(
r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*[-\u2013\u2014]\s*(.+)$",
line,
)
if m:
name, url, desc = m.groups()
if indent_len > 3:
out.append(
f'<div class="entry-sub">'
f'<a href="{url}">{name}</a>'
f"</div>"
)
else:
out.append(
f'<div class="entry">'
f'<a href="{url}">{name}</a>'
f'<span class="sep">&mdash;</span>{desc}'
f"</div>"
)
continue
# Link-only entry (no description): * [name](url)
m = re.match(r"^\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
if m:
name, url = m.groups()
if indent_len > 3:
out.append(
f'<div class="entry-sub">'
f'<a href="{url}">{name}</a>'
f"</div>"
)
else:
out.append(
f'<div class="entry">'
f'<a href="{url}">{name}</a>'
f"</div>"
)
continue
return "\n".join(out)
def parse_readme(text: str) -> tuple[list[dict], list[dict]]:
"""Parse README.md text into categories and resources.
Returns:
(categories, resources) where each is a list of dicts with keys:
name, slug, description, content
"""
lines = text.split("\n")
separator_idx = None
for i, line in enumerate(lines):
if line.strip() == "---" and i > 0:
separator_idx = i
break
if separator_idx is None:
return [], []
resources_idx = None
contributing_idx = None
for i, line in enumerate(lines):
if line.strip() == "# Resources":
resources_idx = i
elif line.strip() == "# Contributing":
contributing_idx = i
cat_end = resources_idx if resources_idx is not None else len(lines)
category_lines = lines[separator_idx + 1 : cat_end]
resource_lines = []
if resources_idx is not None:
res_end = contributing_idx if contributing_idx is not None else len(lines)
resource_lines = lines[resources_idx:res_end]
categories = _extract_sections(category_lines, level=2)
resources = _extract_sections(resource_lines, level=2)
return categories, resources
def _extract_sections(lines: list[str], *, level: int) -> list[dict]:
"""Extract ## sections from a block of lines."""
prefix = "#" * level + " "
sections = []
current_name = None
current_lines: list[str] = []
for line in lines:
if line.startswith(prefix) and not line.startswith(prefix + "#"):
if current_name is not None:
sections.append(_build_section(current_name, current_lines))
current_name = line[len(prefix) :].strip()
current_lines = []
elif current_name is not None:
current_lines.append(line)
if current_name is not None:
sections.append(_build_section(current_name, current_lines))
return sections
def _build_section(name: str, lines: list[str]) -> dict:
"""Build a section dict from a name and its content lines."""
while lines and not lines[0].strip():
lines = lines[1:]
while lines and not lines[-1].strip():
lines = lines[:-1]
description = ""
content_lines = lines
if lines:
m = re.match(r"^_(.+)_$", lines[0].strip())
if m:
description = m.group(1)
content_lines = lines[1:]
while content_lines and not content_lines[0].strip():
content_lines = content_lines[1:]
content = "\n".join(content_lines).strip()
return {
"name": name,
"slug": slugify(name),
"description": description,
"content": content,
}
def render_markdown(text: str) -> str:
"""Render markdown text to HTML."""
md = markdown.Markdown(extensions=["extra"])
return md.convert(text)
def strip_markdown_links(text: str) -> str:
"""Replace [text](url) with just text for plain-text contexts."""
return re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
def render_inline_markdown(text: str) -> str:
"""Render inline markdown (links, bold, italic) to HTML."""
from markupsafe import Markup
html = markdown.markdown(text)
# Strip wrapping <p>...</p> since this is inline content
html = re.sub(r"^<p>(.*)</p>$", r"\1", html.strip())
# Add target/rel to links for external navigation
html = html.replace("<a ", '<a target="_blank" rel="noopener" ')
return Markup(html)
def group_categories( def group_categories(
categories: list[dict], categories: list[dict],
resources: list[dict], resources: list[dict],
@@ -285,10 +75,11 @@ def group_categories(
"""Organize categories and resources into thematic section groups.""" """Organize categories and resources into thematic section groups."""
cat_by_name = {c["name"]: c for c in categories} cat_by_name = {c["name"]: c for c in categories}
groups = [] groups = []
grouped_names: set[str] = set()
for group_name, cat_names in SECTION_GROUPS: for group_name, cat_names in SECTION_GROUPS:
grouped_names.update(cat_names)
if group_name == "Resources": if group_name == "Resources":
# Resources group uses parsed resources directly
group_cats = list(resources) group_cats = list(resources)
else: else:
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name] group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
@@ -301,9 +92,6 @@ def group_categories(
}) })
# Any categories not in a group go into "Other" # Any categories not in a group go into "Other"
grouped_names = set()
for _, cat_names in SECTION_GROUPS:
grouped_names.update(cat_names)
ungrouped = [c for c in categories if c["name"] not in grouped_names] ungrouped = [c for c in categories if c["name"] not in grouped_names]
if ungrouped: if ungrouped:
groups.append({ groups.append({
@@ -323,13 +111,13 @@ class Entry(TypedDict):
group: str group: str
stars: int | None stars: int | None
owner: str | None owner: str | None
pushed_at: str | None last_commit_at: str | None
class StarData(TypedDict): class StarData(TypedDict):
stars: int stars: int
owner: str owner: str
pushed_at: str last_commit_at: str
fetched_at: str fetched_at: str
@@ -367,7 +155,6 @@ def sort_entries(entries: list[dict]) -> list[dict]:
def extract_entries( def extract_entries(
categories: list[dict], categories: list[dict],
resources: list[dict],
groups: list[dict], groups: list[dict],
) -> list[dict]: ) -> list[dict]:
"""Flatten categories into individual library entries for table display.""" """Flatten categories into individual library entries for table display."""
@@ -379,37 +166,17 @@ def extract_entries(
entries: list[dict] = [] entries: list[dict] = []
for cat in categories: for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other") group_name = cat_to_group.get(cat["name"], "Other")
last_entry_indent = -1 for entry in cat["entries"]:
for line in cat["content"].split("\n"):
indent_len = len(line) - len(line.lstrip())
# Link-only sub-item deeper than parent → "also see"
m_sub = re.match(r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*$", line)
if m_sub and indent_len > last_entry_indent >= 0 and entries:
entries[-1]["also_see"].append({
"name": m_sub.group(1),
"url": m_sub.group(2),
})
continue
if indent_len > 3:
continue
m = re.match(
r"\s*[-*]\s+\[([^\]]+)\]\(([^)]+)\)\s*(?:[-\u2013\u2014]\s*(.+))?$",
line,
)
if m:
last_entry_indent = indent_len
entries.append({ entries.append({
"name": m.group(1), "name": entry["name"],
"url": m.group(2), "url": entry["url"],
"description": render_inline_markdown(m.group(3)) if m.group(3) else "", "description": entry["description"],
"category": cat["name"], "category": cat["name"],
"group": group_name, "group": group_name,
"stars": None, "stars": None,
"owner": None, "owner": None,
"pushed_at": None, "last_commit_at": None,
"also_see": [], "also_see": entry["also_see"],
}) })
return entries return entries
@@ -420,7 +187,6 @@ def build(repo_root: str) -> None:
website = repo / "website" website = repo / "website"
readme_text = (repo / "README.md").read_text(encoding="utf-8") readme_text = (repo / "README.md").read_text(encoding="utf-8")
# Extract subtitle from the first non-empty, non-heading line
subtitle = "" subtitle = ""
for line in readme_text.split("\n"): for line in readme_text.split("\n"):
stripped = line.strip() stripped = line.strip()
@@ -429,47 +195,33 @@ def build(repo_root: str) -> None:
break break
categories, resources = parse_readme(readme_text) categories, resources = parse_readme(readme_text)
# All fields pre-computed: entry_count, content_html, preview, description
# Enrich with entry counts, rendered HTML, previews, and clean descriptions
for cat in categories + resources:
cat["entry_count"] = count_entries(cat["content"])
cat["content_html"] = render_content_html(cat["content"])
cat["preview"] = extract_preview(cat["content"])
cat["description"] = strip_markdown_links(cat["description"])
total_entries = sum(c["entry_count"] for c in categories) total_entries = sum(c["entry_count"] for c in categories)
# Organize into groups
groups = group_categories(categories, resources) groups = group_categories(categories, resources)
entries = extract_entries(categories, groups)
# Flatten entries for table view
entries = extract_entries(categories, resources, groups)
# Load and merge GitHub star data
stars_data = load_stars(website / "data" / "github_stars.json") stars_data = load_stars(website / "data" / "github_stars.json")
for entry in entries: for entry in entries:
repo_key = extract_github_repo(entry["url"]) repo_key = extract_github_repo(entry["url"])
if repo_key and repo_key in stars_data: if repo_key and repo_key in stars_data:
entry["stars"] = stars_data[repo_key]["stars"] sd = stars_data[repo_key]
entry["owner"] = stars_data[repo_key]["owner"] entry["stars"] = sd["stars"]
entry["pushed_at"] = stars_data[repo_key].get("pushed_at", "") entry["owner"] = sd["owner"]
entry["last_commit_at"] = sd.get("last_commit_at", "")
# Sort by stars descending
entries = sort_entries(entries) entries = sort_entries(entries)
# Set up Jinja2
env = Environment( env = Environment(
loader=FileSystemLoader(website / "templates"), loader=FileSystemLoader(website / "templates"),
autoescape=True, autoescape=True,
) )
# Output directory
site_dir = website / "output" site_dir = website / "output"
if site_dir.exists(): if site_dir.exists():
shutil.rmtree(site_dir) shutil.rmtree(site_dir)
site_dir.mkdir(parents=True) site_dir.mkdir(parents=True)
# Generate single index.html
tpl_index = env.get_template("index.html") tpl_index = env.get_template("index.html")
(site_dir / "index.html").write_text( (site_dir / "index.html").write_text(
tpl_index.render( tpl_index.render(
@@ -484,14 +236,10 @@ def build(repo_root: str) -> None:
encoding="utf-8", encoding="utf-8",
) )
# Copy static assets
static_src = website / "static" static_src = website / "static"
static_dst = site_dir / "static" static_dst = site_dir / "static"
if static_src.exists(): if static_src.exists():
shutil.copytree(static_src, static_dst) shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
# Write CNAME
(site_dir / "CNAME").write_text("awesome-python.com\n", encoding="utf-8")
print(f"Built single page with {len(categories)} categories + {len(resources)} resources") print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
print(f"Total entries: {total_entries}") print(f"Total entries: {total_entries}")
File diff suppressed because it is too large Load Diff
+8 -17
View File
@@ -10,14 +10,14 @@ from pathlib import Path
import httpx import httpx
from build import extract_github_repo from build import extract_github_repo, load_stars
CACHE_MAX_AGE_DAYS = 7 CACHE_MAX_AGE_DAYS = 7
DATA_DIR = Path(__file__).parent / "data" DATA_DIR = Path(__file__).parent / "data"
CACHE_FILE = DATA_DIR / "github_stars.json" CACHE_FILE = DATA_DIR / "github_stars.json"
README_PATH = Path(__file__).parent.parent / "README.md" README_PATH = Path(__file__).parent.parent / "README.md"
GRAPHQL_URL = "https://api.github.com/graphql" GRAPHQL_URL = "https://api.github.com/graphql"
BATCH_SIZE = 100 BATCH_SIZE = 50
def extract_github_repos(text: str) -> set[str]: def extract_github_repos(text: str) -> set[str]:
@@ -30,17 +30,6 @@ def extract_github_repos(text: str) -> set[str]:
return repos return repos
def load_cache() -> dict:
"""Load the star cache from disk. Returns empty dict if missing or corrupt."""
if CACHE_FILE.exists():
try:
return json.loads(CACHE_FILE.read_text(encoding="utf-8"))
except json.JSONDecodeError:
print(f"Warning: corrupt cache at {CACHE_FILE}, starting fresh.", file=sys.stderr)
return {}
return {}
def save_cache(cache: dict) -> None: def save_cache(cache: dict) -> None:
"""Write the star cache to disk, creating data/ dir if needed.""" """Write the star cache to disk, creating data/ dir if needed."""
DATA_DIR.mkdir(parents=True, exist_ok=True) DATA_DIR.mkdir(parents=True, exist_ok=True)
@@ -61,7 +50,7 @@ def build_graphql_query(repos: list[str]) -> str:
continue continue
parts.append( parts.append(
f'repo_{i}: repository(owner: "{owner}", name: "{name}") ' f'repo_{i}: repository(owner: "{owner}", name: "{name}") '
f"{{ stargazerCount pushedAt owner {{ login }} }}" f"{{ stargazerCount owner {{ login }} defaultBranchRef {{ target {{ ... on Commit {{ committedDate }} }} }} }}"
) )
if not parts: if not parts:
return "" return ""
@@ -78,10 +67,12 @@ def parse_graphql_response(
node = data.get(f"repo_{i}") node = data.get(f"repo_{i}")
if node is None: if node is None:
continue continue
default_branch = node.get("defaultBranchRef") or {}
target = default_branch.get("target") or {}
result[repo] = { result[repo] = {
"stars": node.get("stargazerCount", 0), "stars": node.get("stargazerCount", 0),
"owner": node.get("owner", {}).get("login", ""), "owner": node.get("owner", {}).get("login", ""),
"pushed_at": node.get("pushedAt", ""), "last_commit_at": target.get("committedDate", ""),
} }
return result return result
@@ -114,7 +105,7 @@ def main() -> None:
current_repos = extract_github_repos(readme_text) current_repos = extract_github_repos(readme_text)
print(f"Found {len(current_repos)} GitHub repos in README.md") print(f"Found {len(current_repos)} GitHub repos in README.md")
cache = load_cache() cache = load_stars(CACHE_FILE)
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
# Prune entries not in current README # Prune entries not in current README
@@ -173,7 +164,7 @@ def main() -> None:
cache[repo] = { cache[repo] = {
"stars": results[repo]["stars"], "stars": results[repo]["stars"],
"owner": results[repo]["owner"], "owner": results[repo]["owner"],
"pushed_at": results[repo]["pushed_at"], "last_commit_at": results[repo]["last_commit_at"],
"fetched_at": now_iso, "fetched_at": now_iso,
} }
fetched_count += 1 fetched_count += 1
+388
View File
@@ -0,0 +1,388 @@
"""Parse README.md into structured section data using markdown-it-py AST."""
from __future__ import annotations
import re
from typing import TypedDict
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
from markupsafe import escape
class AlsoSee(TypedDict):
name: str
url: str
class ParsedEntry(TypedDict):
name: str
url: str
description: str # inline HTML, properly escaped
also_see: list[AlsoSee]
class ParsedSection(TypedDict):
name: str
slug: str
description: str # plain text, links resolved to text
entries: list[ParsedEntry]
entry_count: int
preview: str
content_html: str # rendered HTML, properly escaped
# --- Slugify ----------------------------------------------------------------
_SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
_SLUG_WHITESPACE_RE = re.compile(r"[\s]+")
_SLUG_MULTI_DASH_RE = re.compile(r"-+")
def slugify(name: str) -> str:
"""Convert a category name to a URL-friendly slug."""
slug = name.lower()
slug = _SLUG_NON_ALNUM_RE.sub("", slug)
slug = _SLUG_WHITESPACE_RE.sub("-", slug.strip())
slug = _SLUG_MULTI_DASH_RE.sub("-", slug)
return slug
# --- Inline renderers -------------------------------------------------------
def render_inline_html(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to HTML with proper escaping."""
parts: list[str] = []
for child in children:
match child.type:
case "text":
parts.append(str(escape(child.content)))
case "softbreak":
parts.append(" ")
case "link":
href = str(escape(child.attrGet("href") or ""))
inner = render_inline_html(child.children)
parts.append(
f'<a href="{href}" target="_blank" rel="noopener">{inner}</a>'
)
case "em":
parts.append(f"<em>{render_inline_html(child.children)}</em>")
case "strong":
parts.append(f"<strong>{render_inline_html(child.children)}</strong>")
case "code_inline":
parts.append(f"<code>{escape(child.content)}</code>")
case "html_inline":
parts.append(str(escape(child.content)))
return "".join(parts)
def render_inline_text(children: list[SyntaxTreeNode]) -> str:
"""Render inline AST nodes to plain text (links become their text)."""
parts: list[str] = []
for child in children:
match child.type:
case "text":
parts.append(child.content)
case "softbreak":
parts.append(" ")
case "code_inline":
parts.append(child.content)
case "em" | "strong" | "link":
parts.append(render_inline_text(child.children))
return "".join(parts)
# --- AST helpers -------------------------------------------------------------
def _heading_text(node: SyntaxTreeNode) -> str:
"""Extract plain text from a heading node."""
for child in node.children:
if child.type == "inline":
return render_inline_text(child.children)
return ""
def _extract_description(nodes: list[SyntaxTreeNode]) -> str:
"""Extract description from the first paragraph if it's a single <em> block.
Pattern: _Libraries for foo._ -> "Libraries for foo."
"""
if not nodes:
return ""
first = nodes[0]
if first.type != "paragraph":
return ""
for child in first.children:
if child.type == "inline" and len(child.children) == 1:
em = child.children[0]
if em.type == "em":
return render_inline_text(em.children)
return ""
# --- Entry extraction --------------------------------------------------------
_DESC_SEP_RE = re.compile(r"^\s*[-\u2013\u2014]\s*")
def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
"""Find first direct child of a given type."""
for child in node.children:
if child.type == child_type:
return child
return None
def _find_inline(node: SyntaxTreeNode) -> SyntaxTreeNode | None:
"""Find the inline node in a list_item's paragraph."""
para = _find_child(node, "paragraph")
if para is None:
return None
return _find_child(para, "inline")
def _find_first_link(inline: SyntaxTreeNode) -> SyntaxTreeNode | None:
"""Find the first link node among inline children."""
for child in inline.children:
if child.type == "link":
return child
return None
def _is_leading_link(inline: SyntaxTreeNode, link: SyntaxTreeNode) -> bool:
"""Check if the link is the first child of inline (a real entry, not a subcategory label)."""
return bool(inline.children) and inline.children[0] is link
def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode) -> str:
"""Extract description HTML from inline content after the first link.
AST: [link("name"), text(" - Description.")] -> "Description."
The separator (- / en-dash / em-dash) is stripped.
"""
link_idx = next((i for i, c in enumerate(inline.children) if c is first_link), None)
if link_idx is None:
return ""
desc_children = inline.children[link_idx + 1 :]
if not desc_children:
return ""
html = render_inline_html(desc_children)
return _DESC_SEP_RE.sub("", html)
def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
"""Extract entries from a bullet_list AST node.
Handles three patterns:
- Text-only list_item -> subcategory label -> recurse into nested list
- Link list_item with nested link-only items -> entry with also_see
- Link list_item without nesting -> simple entry
"""
entries: list[ParsedEntry] = []
for list_item in bullet_list.children:
if list_item.type != "list_item":
continue
inline = _find_inline(list_item)
if inline is None:
continue
first_link = _find_first_link(inline)
if first_link is None or not _is_leading_link(inline, first_link):
# Subcategory label (plain text or text-before-link) — recurse into nested list
nested = _find_child(list_item, "bullet_list")
if nested:
entries.extend(_parse_list_entries(nested))
continue
# Entry with a link
name = render_inline_text(first_link.children)
url = first_link.attrGet("href") or ""
desc_html = _extract_description_html(inline, first_link)
# Collect also_see from nested bullet_list
also_see: list[AlsoSee] = []
nested = _find_child(list_item, "bullet_list")
if nested:
for sub_item in nested.children:
if sub_item.type != "list_item":
continue
sub_inline = _find_inline(sub_item)
if sub_inline:
sub_link = _find_first_link(sub_inline)
if sub_link:
also_see.append(AlsoSee(
name=render_inline_text(sub_link.children),
url=sub_link.attrGet("href") or "",
))
entries.append(ParsedEntry(
name=name,
url=url,
description=desc_html,
also_see=also_see,
))
return entries
def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEntry]:
"""Extract all entries from a section's content nodes."""
entries: list[ParsedEntry] = []
for node in content_nodes:
if node.type == "bullet_list":
entries.extend(_parse_list_entries(node))
return entries
# --- Content HTML rendering --------------------------------------------------
def _render_bullet_list_html(
bullet_list: SyntaxTreeNode,
*,
is_sub: bool = False,
) -> str:
"""Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
out: list[str] = []
for list_item in bullet_list.children:
if list_item.type != "list_item":
continue
inline = _find_inline(list_item)
if inline is None:
continue
first_link = _find_first_link(inline)
if first_link is None or not _is_leading_link(inline, first_link):
# Subcategory label (plain text or text-before-link)
label = str(escape(render_inline_text(inline.children)))
out.append(f'<div class="subcat">{label}</div>')
nested = _find_child(list_item, "bullet_list")
if nested:
out.append(_render_bullet_list_html(nested, is_sub=False))
continue
# Entry with a link
name = str(escape(render_inline_text(first_link.children)))
url = str(escape(first_link.attrGet("href") or ""))
if is_sub:
out.append(f'<div class="entry-sub"><a href="{url}">{name}</a></div>')
else:
desc = _extract_description_html(inline, first_link)
if desc:
out.append(
f'<div class="entry"><a href="{url}">{name}</a>'
f'<span class="sep">&mdash;</span>{desc}</div>'
)
else:
out.append(f'<div class="entry"><a href="{url}">{name}</a></div>')
# Nested items under an entry with a link are sub-entries
nested = _find_child(list_item, "bullet_list")
if nested:
out.append(_render_bullet_list_html(nested, is_sub=True))
return "\n".join(out)
def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
"""Render a section's content nodes to HTML."""
parts: list[str] = []
for node in content_nodes:
if node.type == "bullet_list":
parts.append(_render_bullet_list_html(node))
return "\n".join(parts)
# --- Section splitting -------------------------------------------------------
def _group_by_h2(
nodes: list[SyntaxTreeNode],
) -> list[ParsedSection]:
"""Group AST nodes into sections by h2 headings."""
sections: list[ParsedSection] = []
current_name: str | None = None
current_body: list[SyntaxTreeNode] = []
def flush() -> None:
nonlocal current_name
if current_name is None:
return
desc = _extract_description(current_body)
content_nodes = current_body[1:] if desc else current_body
entries = _parse_section_entries(content_nodes)
entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
preview = ", ".join(e["name"] for e in entries[:4])
content_html = _render_section_html(content_nodes)
sections.append(ParsedSection(
name=current_name,
slug=slugify(current_name),
description=desc,
entries=entries,
entry_count=entry_count,
preview=preview,
content_html=content_html,
))
current_name = None
for node in nodes:
if node.type == "heading" and node.tag == "h2":
flush()
current_name = _heading_text(node)
current_body = []
elif current_name is not None:
current_body.append(node)
flush()
return sections
def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
"""Parse README.md text into categories and resources.
Returns (categories, resources) where each is a list of ParsedSection dicts.
"""
md = MarkdownIt("commonmark")
tokens = md.parse(text)
root = SyntaxTreeNode(tokens)
children = root.children
# Find thematic break (---), # Resources, and # Contributing in one pass
hr_idx = None
resources_idx = None
contributing_idx = None
for i, node in enumerate(children):
if hr_idx is None and node.type == "hr":
hr_idx = i
elif node.type == "heading" and node.tag == "h1":
text_content = _heading_text(node)
if text_content == "Resources":
resources_idx = i
elif text_content == "Contributing":
contributing_idx = i
if hr_idx is None:
return [], []
# Slice into category and resource ranges
cat_end = resources_idx or contributing_idx or len(children)
cat_nodes = children[hr_idx + 1 : cat_end]
res_nodes: list[SyntaxTreeNode] = []
if resources_idx is not None:
res_end = contributing_idx or len(children)
res_nodes = children[resources_idx + 1 : res_end]
categories = _group_by_h2(cat_nodes)
resources = _group_by_h2(res_nodes)
return categories, resources
+6
View File
@@ -0,0 +1,6 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<path d="M8 2h16a6 6 0 0 1 6 6v8H2V8a6 6 0 0 1 6-6z" fill="#1d5fa6"/>
<path d="M2 16h28v8a6 6 0 0 1-6 6H8a6 6 0 0 1-6-6z" fill="#f0c73e"/>
<circle cx="11.5" cy="9.5" r="2.2" fill="#f0c73e"/>
<circle cx="20.5" cy="22.5" r="2.2" fill="#1d5fa6"/>
</svg>

After

Width:  |  Height:  |  Size: 320 B

+142 -4
View File
@@ -1,15 +1,44 @@
// State // State
var activeFilter = null; // { type: "cat"|"group", value: "..." } var activeFilter = null; // { type: "cat"|"group", value: "..." }
var activeSort = { col: 'stars', order: 'desc' };
var searchInput = document.querySelector('.search'); var searchInput = document.querySelector('.search');
var filterBar = document.querySelector('.filter-bar'); var filterBar = document.querySelector('.filter-bar');
var filterValue = document.querySelector('.filter-value'); var filterValue = document.querySelector('.filter-value');
var filterClear = document.querySelector('.filter-clear'); var filterClear = document.querySelector('.filter-clear');
var noResults = document.querySelector('.no-results'); var noResults = document.querySelector('.no-results');
var countEl = document.querySelector('.count');
var rows = document.querySelectorAll('.table tbody tr.row'); var rows = document.querySelectorAll('.table tbody tr.row');
var tags = document.querySelectorAll('.tag'); var tags = document.querySelectorAll('.tag');
var tbody = document.querySelector('.table tbody'); var tbody = document.querySelector('.table tbody');
// Relative time formatting
function relativeTime(isoStr) {
var date = new Date(isoStr);
var now = new Date();
var diffMs = now - date;
var diffHours = Math.floor(diffMs / 3600000);
var diffDays = Math.floor(diffMs / 86400000);
if (diffHours < 1) return 'just now';
if (diffHours < 24) return diffHours === 1 ? '1 hour ago' : diffHours + ' hours ago';
if (diffDays === 1) return 'yesterday';
if (diffDays < 30) return diffDays + ' days ago';
var diffMonths = Math.floor(diffDays / 30);
if (diffMonths < 12) return diffMonths === 1 ? '1 month ago' : diffMonths + ' months ago';
var diffYears = Math.floor(diffDays / 365);
return diffYears === 1 ? '1 year ago' : diffYears + ' years ago';
}
// Format all commit date cells
document.querySelectorAll('.col-commit[data-commit]').forEach(function (td) {
var time = td.querySelector('time');
if (time) time.textContent = relativeTime(td.dataset.commit);
});
// Store original row order for sort reset
rows.forEach(function (row, i) {
row._origIndex = i;
row._expandRow = row.nextElementSibling;
});
function collapseAll() { function collapseAll() {
var openRows = document.querySelectorAll('.table tbody tr.row.open'); var openRows = document.querySelectorAll('.table tbody tr.row.open');
openRows.forEach(function (row) { openRows.forEach(function (row) {
@@ -46,16 +75,18 @@ function applyFilters() {
show = row._searchText.includes(query); show = row._searchText.includes(query);
} }
row.hidden = !show; if (row.hidden !== !show) row.hidden = !show;
if (show) { if (show) {
visibleCount++; visibleCount++;
row.querySelector('.col-num').textContent = String(visibleCount); var numCell = row.cells[0];
if (numCell.textContent !== String(visibleCount)) {
numCell.textContent = String(visibleCount);
}
} }
}); });
if (noResults) noResults.hidden = visibleCount > 0; if (noResults) noResults.hidden = visibleCount > 0;
if (countEl) countEl.textContent = visibleCount;
// Update tag highlights // Update tag highlights
tags.forEach(function (tag) { tags.forEach(function (tag) {
@@ -74,6 +105,76 @@ function applyFilters() {
filterBar.hidden = true; filterBar.hidden = true;
} }
} }
updateURL();
}
function updateURL() {
var params = new URLSearchParams();
var query = searchInput ? searchInput.value.trim() : '';
if (query) params.set('q', query);
if (activeFilter) {
params.set(activeFilter.type === 'cat' ? 'category' : 'group', activeFilter.value);
}
if (activeSort.col !== 'stars' || activeSort.order !== 'desc') {
params.set('sort', activeSort.col);
params.set('order', activeSort.order);
}
var qs = params.toString();
history.replaceState(null, '', qs ? '?' + qs : location.pathname);
}
function getSortValue(row, col) {
if (col === 'name') {
return row.querySelector('.col-name a').textContent.trim().toLowerCase();
}
if (col === 'stars') {
var text = row.querySelector('.col-stars').textContent.trim().replace(/,/g, '');
var num = parseInt(text, 10);
return isNaN(num) ? -1 : num;
}
if (col === 'commit-time') {
var attr = row.querySelector('.col-commit').getAttribute('data-commit');
return attr ? new Date(attr).getTime() : 0;
}
return 0;
}
function sortRows() {
var arr = Array.prototype.slice.call(rows);
if (activeSort) {
arr.sort(function (a, b) {
var aVal = getSortValue(a, activeSort.col);
var bVal = getSortValue(b, activeSort.col);
if (activeSort.col === 'name') {
var cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
if (cmp === 0) return a._origIndex - b._origIndex;
return activeSort.order === 'desc' ? -cmp : cmp;
}
if (aVal <= 0 && bVal <= 0) return a._origIndex - b._origIndex;
if (aVal <= 0) return 1;
if (bVal <= 0) return -1;
var cmp = aVal - bVal;
if (cmp === 0) return a._origIndex - b._origIndex;
return activeSort.order === 'desc' ? -cmp : cmp;
});
} else {
arr.sort(function (a, b) { return a._origIndex - b._origIndex; });
}
arr.forEach(function (row) {
tbody.appendChild(row);
tbody.appendChild(row._expandRow);
});
applyFilters();
}
function updateSortIndicators() {
document.querySelectorAll('th[data-sort]').forEach(function (th) {
th.classList.remove('sort-asc', 'sort-desc');
if (activeSort && th.dataset.sort === activeSort.col) {
th.classList.add('sort-' + activeSort.order);
}
});
} }
// Expand/collapse: event delegation on tbody // Expand/collapse: event delegation on tbody
@@ -130,6 +231,23 @@ if (filterClear) {
}); });
} }
// Column sorting
document.querySelectorAll('th[data-sort]').forEach(function (th) {
th.addEventListener('click', function () {
var col = th.dataset.sort;
var defaultOrder = col === 'name' ? 'asc' : 'desc';
var altOrder = defaultOrder === 'asc' ? 'desc' : 'asc';
if (activeSort && activeSort.col === col) {
if (activeSort.order === defaultOrder) activeSort = { col: col, order: altOrder };
else activeSort = { col: 'stars', order: 'desc' };
} else {
activeSort = { col: col, order: defaultOrder };
}
sortRows();
updateSortIndicators();
});
});
// Search input // Search input
if (searchInput) { if (searchInput) {
var searchTimer; var searchTimer;
@@ -152,3 +270,23 @@ if (searchInput) {
} }
}); });
} }
// Restore state from URL
(function () {
var params = new URLSearchParams(location.search);
var q = params.get('q');
var cat = params.get('category');
var group = params.get('group');
var sort = params.get('sort');
var order = params.get('order');
if (q && searchInput) searchInput.value = q;
if (cat) activeFilter = { type: 'cat', value: cat };
else if (group) activeFilter = { type: 'group', value: group };
if ((sort === 'name' || sort === 'stars' || sort === 'commit-time') && (order === 'desc' || order === 'asc')) {
activeSort = { col: sort, order: order };
}
if (q || cat || group || sort) {
sortRows();
}
updateSortIndicators();
})();
+132 -23
View File
@@ -23,6 +23,8 @@
--accent-light: oklch(97% 0.015 240); --accent-light: oklch(97% 0.015 240);
--highlight: oklch(93% 0.10 90); --highlight: oklch(93% 0.10 90);
--highlight-text: oklch(35% 0.10 90); --highlight-text: oklch(35% 0.10 90);
--tag-text: oklch(45% 0.06 240);
--tag-hover-bg: oklch(93% 0.025 240);
} }
html { font-size: 16px; } html { font-size: 16px; }
@@ -65,8 +67,10 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.hero-main { .hero-main {
display: flex; display: flex;
flex-wrap: wrap;
justify-content: space-between; justify-content: space-between;
align-items: flex-start; align-items: flex-start;
gap: 1rem;
} }
.hero-submit { .hero-submit {
@@ -78,14 +82,21 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--text); color: var(--text);
text-decoration: none; text-decoration: none;
white-space: nowrap; white-space: nowrap;
transition: border-color 0.2s, background 0.2s, color 0.2s;
} }
.hero-submit:hover { .hero-submit:hover {
border-color: var(--accent); border-color: var(--accent);
background: var(--accent-light);
color: var(--accent); color: var(--accent);
text-decoration: none; text-decoration: none;
} }
.hero-submit:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 2px;
}
.hero h1 { .hero h1 {
font-family: var(--font-display); font-family: var(--font-display);
font-size: clamp(2rem, 5vw, 3rem); font-size: clamp(2rem, 5vw, 3rem);
@@ -144,6 +155,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
font-family: var(--font-body); font-family: var(--font-body);
font-size: var(--text-sm); font-size: var(--text-sm);
color: var(--text); color: var(--text);
transition: border-color 0.15s, background 0.15s;
} }
.search::placeholder { color: var(--text-muted); } .search::placeholder { color: var(--text-muted); }
@@ -174,11 +186,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
background: none; background: none;
border: 1px solid var(--border); border: 1px solid var(--border);
border-radius: 4px; border-radius: 4px;
padding: 0.15rem 0.5rem; padding: 0.35rem 0.65rem;
font-family: inherit; font-family: inherit;
font-size: var(--text-xs); font-size: var(--text-xs);
color: var(--text-muted); color: var(--text-muted);
cursor: pointer; cursor: pointer;
transition: border-color 0.15s, color 0.15s;
} }
.filter-clear:hover { .filter-clear:hover {
@@ -186,14 +199,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--text); color: var(--text);
} }
.stats { .filter-clear:focus-visible {
font-size: var(--text-sm); outline: 2px solid var(--accent);
color: var(--text-muted); outline-offset: 2px;
font-variant-numeric: tabular-nums;
} }
.stats strong { color: var(--text-secondary); }
/* === Table === */ /* === Table === */
.table-wrap { .table-wrap {
width: 100%; width: 100%;
@@ -201,6 +211,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
overflow-x: auto; overflow-x: auto;
} }
.table-wrap:focus {
outline: 2px solid var(--accent);
outline-offset: -2px;
}
.table { .table {
width: 100%; width: 100%;
border-collapse: separate; border-collapse: separate;
@@ -236,6 +251,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
padding: 0.7rem 0.75rem; padding: 0.7rem 0.75rem;
border-bottom: 1px solid var(--border); border-bottom: 1px solid var(--border);
vertical-align: top; vertical-align: top;
transition: background 0.15s;
} }
.table tbody tr.row:not(.open):hover td { .table tbody tr.row:not(.open):hover td {
@@ -253,9 +269,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.col-name { .col-name {
width: 35%; width: 35%;
overflow-wrap: break-word; overflow-wrap: anywhere;
word-wrap: break-word;
word-break: break-word;
} }
.col-name > a { .col-name > a {
@@ -266,12 +280,47 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.col-name > a:hover { text-decoration: underline; color: var(--accent-hover); } .col-name > a:hover { text-decoration: underline; color: var(--accent-hover); }
/* === Sortable Headers === */
th[data-sort] {
cursor: pointer;
user-select: none;
}
th[data-sort]:hover {
color: var(--accent);
}
th[data-sort]::after {
content: " ▼";
opacity: 0;
transition: opacity 0.15s;
}
th[data-sort="name"]::after {
content: " ▲";
}
th[data-sort]:hover::after {
opacity: 1;
}
th[data-sort].sort-desc::after {
content: " ▼";
opacity: 1;
}
th[data-sort].sort-asc::after {
content: " ▲";
opacity: 1;
}
/* === Stars Column === */ /* === Stars Column === */
.col-stars { .col-stars {
width: 5rem; width: 5rem;
font-variant-numeric: tabular-nums; font-variant-numeric: tabular-nums;
white-space: nowrap; white-space: nowrap;
color: var(--text-secondary); color: var(--text-secondary);
text-align: right;
} }
/* === Arrow Column === */ /* === Arrow Column === */
@@ -294,6 +343,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
/* === Row Click === */ /* === Row Click === */
.row { cursor: pointer; } .row { cursor: pointer; }
.row:focus-visible td {
outline: none;
background: var(--bg-hover);
box-shadow: inset 2px 0 0 var(--accent);
}
/* === Expand Row === */ /* === Expand Row === */
.expand-row { .expand-row {
display: none; display: none;
@@ -315,10 +370,36 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
border-bottom: 1px solid var(--border); border-bottom: 1px solid var(--border);
} }
@keyframes expand-in {
from {
opacity: 0;
transform: translateY(-4px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.expand-content { .expand-content {
font-size: var(--text-sm); font-size: var(--text-sm);
color: var(--text-secondary); color: var(--text-secondary);
line-height: 1.6; line-height: 1.6;
animation: expand-in 0.2s cubic-bezier(0.25, 1, 0.5, 1);
}
.expand-tags {
display: flex;
gap: 0.4rem;
margin-bottom: 0.4rem;
}
.expand-tag {
font-size: var(--text-xs);
color: var(--tag-text);
background: var(--bg);
padding: 0.15rem 0.4rem;
border-radius: 3px;
} }
.expand-also-see { .expand-also-see {
@@ -357,35 +438,63 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--border); color: var(--border);
} }
.col-cat, .col-group { .col-cat {
width: 13%; width: 13%;
white-space: nowrap; white-space: nowrap;
} }
/* === Last Commit Column === */
.col-commit {
width: 9rem;
white-space: nowrap;
color: var(--text-muted);
}
/* === Tags === */ /* === Tags === */
.tag { .tag {
position: relative;
background: var(--accent-light); background: var(--accent-light);
border: none; border: none;
font-family: inherit; font-family: inherit;
font-size: var(--text-xs); font-size: var(--text-xs);
color: oklch(45% 0.06 240); color: var(--tag-text);
cursor: pointer; cursor: pointer;
padding: 0.15rem 0.35rem; padding: 0.25rem 0.5rem;
border-radius: 3px; border-radius: 3px;
white-space: nowrap; white-space: nowrap;
transition: background 0.15s, color 0.15s;
}
/* Expand touch target to 44x44px minimum */
.tag::after {
content: "";
position: absolute;
inset: -0.5rem -0.25rem;
} }
.tag:hover { .tag:hover {
background: var(--accent-light); background: var(--tag-hover-bg);
color: var(--accent); color: var(--accent);
} }
.tag:focus-visible {
outline: 2px solid var(--accent);
outline-offset: 1px;
}
.tag.active { .tag.active {
background: var(--highlight); background: var(--highlight);
color: var(--highlight-text); color: var(--highlight-text);
font-weight: 600; font-weight: 600;
} }
/* === Noscript === */
.noscript-msg {
text-align: center;
padding: 1rem;
color: var(--text-muted);
}
/* === No Results === */ /* === No Results === */
.no-results { .no-results {
max-width: 1400px; max-width: 1400px;
@@ -407,20 +516,18 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
background: var(--bg-input); background: var(--bg-input);
display: flex; display: flex;
align-items: center; align-items: center;
justify-content: space-between; justify-content: flex-end;
gap: 0.5rem;
} }
.footer a { color: var(--text-muted); text-decoration: none; } .footer a { color: var(--accent); text-decoration: none; }
.footer a:hover { color: var(--accent); } .footer a:hover { color: var(--accent-hover); text-decoration: underline; }
.footer-links { .footer-sep { color: var(--border-strong); }
display: flex;
gap: 1rem;
}
/* === Responsive === */ /* === Responsive === */
@media (max-width: 900px) { @media (max-width: 900px) {
.col-group { display: none; } .col-commit { display: none; }
} }
@media (max-width: 640px) { @media (max-width: 640px) {
@@ -435,7 +542,7 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
.col-cat { display: none; } .col-cat { display: none; }
.col-name { white-space: normal; } .col-name { white-space: normal; }
.footer { padding: 1.25rem; flex-direction: column; gap: 0.5rem; } .footer { padding: 1.25rem; justify-content: center; flex-wrap: wrap; }
} }
/* === Screen Reader Only === */ /* === Screen Reader Only === */
@@ -454,6 +561,8 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
/* === Reduced Motion === */ /* === Reduced Motion === */
@media (prefers-reduced-motion: reduce) { @media (prefers-reduced-motion: reduce) {
*, *::before, *::after { *, *::before, *::after {
animation-duration: 0.01ms !important;
animation-iteration-count: 1 !important;
transition-duration: 0.01ms !important; transition-duration: 0.01ms !important;
} }
} }
+10 -13
View File
@@ -17,10 +17,7 @@
/> />
<meta property="og:url" content="https://awesome-python.com/" /> <meta property="og:url" content="https://awesome-python.com/" />
<meta name="twitter:card" content="summary" /> <meta name="twitter:card" content="summary" />
<link <link rel="icon" href="/static/favicon.svg" type="image/svg+xml" />
rel="icon"
href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🐍</text></svg>"
/>
<link rel="stylesheet" href="/static/style.css" /> <link rel="stylesheet" href="/static/style.css" />
<script <script
async async
@@ -41,24 +38,24 @@
<main id="content">{% block content %}{% endblock %}</main> <main id="content">{% block content %}{% endblock %}</main>
<footer class="footer"> <footer class="footer">
<div class="footer-links"> <span
>Made by
<a href="https://vinta.ws/" target="_blank" rel="noopener"
>Vinta</a
></span
>
<span class="footer-sep">/</span>
<a href="https://github.com/vinta" target="_blank" rel="noopener" <a href="https://github.com/vinta" target="_blank" rel="noopener"
>GitHub</a >GitHub</a
> >
<span class="footer-sep">/</span>
<a href="https://twitter.com/vinta" target="_blank" rel="noopener" <a href="https://twitter.com/vinta" target="_blank" rel="noopener"
>Twitter</a >Twitter</a
> >
</div>
<span
>Curated by
<a href="https://github.com/vinta" target="_blank" rel="noopener"
>Vinta</a
></span
>
</footer> </footer>
<noscript <noscript
><p style="text-align: center; padding: 1rem; color: #666"> ><p class="noscript-msg">
JavaScript is needed for search and filtering. JavaScript is needed for search and filtering.
</p></noscript </p></noscript
> >
+21 -14
View File
@@ -29,6 +29,7 @@
</div> </div>
</header> </header>
<h2 class="sr-only">Search and filter</h2>
<div class="controls"> <div class="controls">
<div class="search-wrap"> <div class="search-wrap">
<svg <svg
@@ -60,22 +61,24 @@
</div> </div>
</div> </div>
<div class="table-wrap"> <h2 class="sr-only">Results</h2>
<div class="table-wrap" tabindex="0" role="region" aria-label="Libraries table">
<table class="table"> <table class="table">
<thead> <thead>
<tr> <tr>
<th class="col-num"><span class="sr-only">#</span></th> <th class="col-num"><span class="sr-only">#</span></th>
<th class="col-name">Project Name</th> <th class="col-name" data-sort="name">Project Name</th>
<th class="col-stars">GitHub Stars</th> <th class="col-stars" data-sort="stars">GitHub Stars</th>
<th class="col-commit" data-sort="commit-time">Last Commit</th>
<th class="col-cat">Category</th> <th class="col-cat">Category</th>
<th class="col-group">Group</th> <th class="col-arrow"><span class="sr-only">Details</span></th>
<th class="col-arrow"></th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% for entry in entries %} {% for entry in entries %}
<tr <tr
class="row" class="row"
role="button"
data-cat="{{ entry.category }}" data-cat="{{ entry.category }}"
data-group="{{ entry.group }}" data-group="{{ entry.group }}"
tabindex="0" tabindex="0"
@@ -92,25 +95,24 @@
{% if entry.stars is not none %}{{ "{:,}".format(entry.stars) }}{% {% if entry.stars is not none %}{{ "{:,}".format(entry.stars) }}{%
else %}&mdash;{% endif %} else %}&mdash;{% endif %}
</td> </td>
<td class="col-commit"
{% if entry.last_commit_at %}data-commit="{{ entry.last_commit_at }}"{% endif %}
>{% if entry.last_commit_at %}<time datetime="{{ entry.last_commit_at }}">{{ entry.last_commit_at[:10] }}</time>{% else %}&mdash;{% endif %}</td>
<td class="col-cat"> <td class="col-cat">
<button class="tag" data-type="cat" data-value="{{ entry.category }}"> <button class="tag" data-type="cat" data-value="{{ entry.category }}">
{{ entry.category }} {{ entry.category }}
</button> </button>
</td> </td>
<td class="col-group">
<button class="tag" data-type="group" data-value="{{ entry.group }}">
{{ entry.group }}
</button>
</td>
<td class="col-arrow"><span class="arrow">&rarr;</span></td> <td class="col-arrow"><span class="arrow">&rarr;</span></td>
</tr> </tr>
<tr class="expand-row" id="expand-{{ loop.index }}"> <tr class="expand-row" id="expand-{{ loop.index }}">
<td></td> <td></td>
<td colspan="5"> <td colspan="3">
<div class="expand-content"> <div class="expand-content">
{% if entry.description %} {% if entry.description %}
<div class="expand-desc">{{ entry.description | safe }}</div> <div class="expand-desc">{{ entry.description | safe }}</div>
{% endif %} {% if entry.also_see %} {% endif %}
{% if entry.also_see %}
<div class="expand-also-see"> <div class="expand-also-see">
Also see: {% for see in entry.also_see %}<a Also see: {% for see in entry.also_see %}<a
href="{{ see.url }}" href="{{ see.url }}"
@@ -131,11 +133,16 @@
target="_blank" target="_blank"
rel="noopener" rel="noopener"
>{{ entry.url | replace("https://", "") }}</a >{{ entry.url | replace("https://", "") }}</a
>{% if entry.pushed_at %}<span class="expand-sep">&middot;</span >
>Last pushed {{ entry.pushed_at[:10] }}{% endif %}
</div> </div>
</div> </div>
</td> </td>
<td class="col-cat">
<button class="tag" data-type="group" data-value="{{ entry.group }}">
{{ entry.group }}
</button>
</td>
<td></td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
+1 -289
View File
@@ -1,27 +1,18 @@
"""Tests for the build module.""" """Tests for the build module."""
import json import json
import os
import shutil import shutil
import sys
import textwrap import textwrap
from pathlib import Path from pathlib import Path
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from build import ( from build import (
build, build,
count_entries,
extract_github_repo, extract_github_repo,
extract_preview,
group_categories, group_categories,
load_stars, load_stars,
parse_readme,
render_content_html,
slugify,
sort_entries, sort_entries,
) )
from readme_parser import slugify
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# slugify # slugify
@@ -51,244 +42,6 @@ class TestSlugify:
assert slugify(" Date and Time ") == "date-and-time" assert slugify(" Date and Time ") == "date-and-time"
# ---------------------------------------------------------------------------
# count_entries
# ---------------------------------------------------------------------------
class TestCountEntries:
def test_counts_dash_entries(self):
assert count_entries("- [a](url) - Desc.\n- [b](url) - Desc.") == 2
def test_counts_star_entries(self):
assert count_entries("* [a](url) - Desc.") == 1
def test_ignores_non_entries(self):
assert count_entries("Some text\n- [a](url) - Desc.\nMore text") == 1
def test_counts_indented_entries(self):
assert count_entries(" - [a](url) - Desc.") == 1
def test_empty_content(self):
assert count_entries("") == 0
# ---------------------------------------------------------------------------
# extract_preview
# ---------------------------------------------------------------------------
class TestExtractPreview:
def test_basic(self):
content = "* [alpha](url) - A.\n* [beta](url) - B.\n* [gamma](url) - C."
assert extract_preview(content) == "alpha, beta, gamma"
def test_max_four(self):
content = "\n".join(f"* [lib{i}](url) - Desc." for i in range(10))
assert extract_preview(content) == "lib0, lib1, lib2, lib3"
def test_empty(self):
assert extract_preview("") == ""
def test_skips_subcategory_labels(self):
content = "* Synchronous\n* [django](url) - Framework.\n* [flask](url) - Micro."
assert extract_preview(content) == "django, flask"
# ---------------------------------------------------------------------------
# render_content_html
# ---------------------------------------------------------------------------
class TestRenderContentHtml:
def test_basic_entry(self):
content = "* [django](https://example.com) - A web framework."
html = render_content_html(content)
assert 'href="https://example.com"' in html
assert "django" in html
assert "A web framework." in html
assert 'class="entry"' in html
def test_subcategory_label(self):
content = "* Synchronous\n* [django](https://x.com) - Framework."
html = render_content_html(content)
assert 'class="subcat"' in html
assert "Synchronous" in html
def test_sub_entry(self):
content = "* [django](https://x.com) - Framework.\n * [awesome-django](https://y.com)"
html = render_content_html(content)
assert 'class="entry-sub"' in html
assert "awesome-django" in html
def test_link_only_entry(self):
content = "* [tool](https://x.com)"
html = render_content_html(content)
assert 'href="https://x.com"' in html
assert "tool" in html
# ---------------------------------------------------------------------------
# parse_readme
# ---------------------------------------------------------------------------
MINIMAL_README = textwrap.dedent("""\
# Awesome Python
Some intro text.
---
## Alpha
_Libraries for alpha stuff._
- [lib-a](https://example.com/a) - Does A.
- [lib-b](https://example.com/b) - Does B.
## Beta
_Tools for beta._
- [lib-c](https://example.com/c) - Does C.
# Resources
Where to discover resources.
## Newsletters
- [News One](https://example.com/n1)
- [News Two](https://example.com/n2)
## Podcasts
- [Pod One](https://example.com/p1)
# Contributing
Please contribute!
""")
class TestParseReadme:
def test_category_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(cats) == 2
def test_resource_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(resources) == 2
def test_category_names(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["name"] == "Alpha"
assert cats[1]["name"] == "Beta"
def test_category_slugs(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["slug"] == "alpha"
assert cats[1]["slug"] == "beta"
def test_category_description(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_category_content_has_entries(self):
cats, _ = parse_readme(MINIMAL_README)
assert "lib-a" in cats[0]["content"]
assert "lib-b" in cats[0]["content"]
def test_resources_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_resources_content(self):
_, resources = parse_readme(MINIMAL_README)
assert "News One" in resources[0]["content"]
assert "Pod One" in resources[1]["content"]
def test_contributing_skipped(self):
cats, resources = parse_readme(MINIMAL_README)
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
assert "Contributing" not in all_names
def test_no_separator(self):
cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
assert cats == []
assert resources == []
def test_no_description(self):
readme = textwrap.dedent("""\
# Title
---
## NullDesc
- [item](https://x.com) - Thing.
# Resources
## Tips
- [tip](https://x.com)
# Contributing
Done.
""")
cats, resources = parse_readme(readme)
assert cats[0]["description"] == ""
assert "item" in cats[0]["content"]
# ---------------------------------------------------------------------------
# parse_readme on real README
# ---------------------------------------------------------------------------
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
self.cats, self.resources = parse_readme(self.readme_text)
def test_at_least_83_categories(self):
assert len(self.cats) >= 83
def test_resources_has_newsletters_and_podcasts(self):
names = [r["name"] for r in self.resources]
assert "Newsletters" in names
assert "Podcasts" in names
def test_contributing_not_in_results(self):
all_names = [c["name"] for c in self.cats] + [
r["name"] for r in self.resources
]
assert "Contributing" not in all_names
def test_first_category_is_admin_panels(self):
assert self.cats[0]["name"] == "Admin Panels"
assert self.cats[0]["slug"] == "admin-panels"
def test_last_category_is_wsgi_servers(self):
assert self.cats[-1]["name"] == "WSGI Servers"
assert self.cats[-1]["slug"] == "wsgi-servers"
def test_restful_api_slug(self):
slugs = [c["slug"] for c in self.cats]
assert "restful-api" in slugs
def test_descriptions_extracted(self):
admin = self.cats[0]
assert admin["description"] == "Libraries for administrative interfaces."
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# group_categories # group_categories
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -318,26 +71,6 @@ class TestGroupCategories:
assert "Resources" in group_names assert "Resources" in group_names
# ---------------------------------------------------------------------------
# render_markdown (kept for compatibility)
# ---------------------------------------------------------------------------
class TestRenderMarkdown:
def test_renders_link_list(self):
from build import render_markdown
html = render_markdown("- [lib](https://example.com) - Does stuff.")
assert "<li>" in html
assert '<a href="https://example.com">lib</a>' in html
def test_renders_plain_text(self):
from build import render_markdown
html = render_markdown("Hello world")
assert "<p>Hello world</p>" in html
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# build (integration) # build (integration)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -413,27 +146,6 @@ class TestBuild:
# No category sub-pages # No category sub-pages
assert not (site / "categories").exists() assert not (site / "categories").exists()
def test_build_creates_cname(self, tmp_path):
readme = textwrap.dedent("""\
# T
---
## Only
- [x](https://x.com) - X.
# Contributing
Done.
""")
self._make_repo(tmp_path, readme)
build(str(tmp_path))
cname = tmp_path / "website" / "output" / "CNAME"
assert cname.exists()
assert "awesome-python.com" in cname.read_text()
def test_build_cleans_stale_output(self, tmp_path): def test_build_cleans_stale_output(self, tmp_path):
readme = textwrap.dedent("""\ readme = textwrap.dedent("""\
# T # T
-22
View File
@@ -8,7 +8,6 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from fetch_github_stars import ( from fetch_github_stars import (
build_graphql_query, build_graphql_query,
extract_github_repos, extract_github_repos,
load_cache,
parse_graphql_response, parse_graphql_response,
save_cache, save_cache,
) )
@@ -65,27 +64,6 @@ class TestExtractGithubRepos:
assert result == {"org/repo"} assert result == {"org/repo"}
class TestLoadCache:
def test_returns_empty_when_missing(self, tmp_path, monkeypatch):
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", tmp_path / "nonexistent.json")
result = load_cache()
assert result == {}
def test_loads_valid_cache(self, tmp_path, monkeypatch):
cache_file = tmp_path / "stars.json"
cache_file.write_text('{"a/b": {"stars": 1}}', encoding="utf-8")
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
result = load_cache()
assert result == {"a/b": {"stars": 1}}
def test_returns_empty_on_corrupt_json(self, tmp_path, monkeypatch):
cache_file = tmp_path / "stars.json"
cache_file.write_text("not json", encoding="utf-8")
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
result = load_cache()
assert result == {}
class TestSaveCache: class TestSaveCache:
def test_creates_directory_and_writes_json(self, tmp_path, monkeypatch): def test_creates_directory_and_writes_json(self, tmp_path, monkeypatch):
data_dir = tmp_path / "data" data_dir = tmp_path / "data"
+424
View File
@@ -0,0 +1,424 @@
"""Tests for the readme_parser module."""
import os
import textwrap
import pytest
from readme_parser import (
_parse_section_entries,
_render_section_html,
parse_readme,
render_inline_html,
render_inline_text,
)
from markdown_it import MarkdownIt
from markdown_it.tree import SyntaxTreeNode
def _parse_inline(md_text: str) -> list[SyntaxTreeNode]:
"""Helper: parse a single paragraph and return its inline children."""
md = MarkdownIt("commonmark")
root = SyntaxTreeNode(md.parse(md_text))
# root > paragraph > inline > children
return root.children[0].children[0].children
class TestRenderInlineHtml:
def test_plain_text_escapes_html(self):
children = _parse_inline("Hello <world> & friends")
assert render_inline_html(children) == "Hello &lt;world&gt; &amp; friends"
def test_link_with_target(self):
children = _parse_inline("[name](https://example.com)")
html = render_inline_html(children)
assert 'href="https://example.com"' in html
assert 'target="_blank"' in html
assert 'rel="noopener"' in html
assert ">name</a>" in html
def test_emphasis(self):
children = _parse_inline("*italic* text")
assert "<em>italic</em>" in render_inline_html(children)
def test_strong(self):
children = _parse_inline("**bold** text")
assert "<strong>bold</strong>" in render_inline_html(children)
def test_code_inline(self):
children = _parse_inline("`some code`")
assert "<code>some code</code>" in render_inline_html(children)
def test_mixed_link_and_text(self):
children = _parse_inline("See [foo](https://x.com) for details.")
html = render_inline_html(children)
assert "See " in html
assert ">foo</a>" in html
assert " for details." in html
class TestRenderInlineText:
def test_plain_text(self):
children = _parse_inline("Hello world")
assert render_inline_text(children) == "Hello world"
def test_link_becomes_text(self):
children = _parse_inline("See [awesome-algos](https://github.com/x/y).")
assert render_inline_text(children) == "See awesome-algos."
def test_emphasis_stripped(self):
children = _parse_inline("*italic* text")
assert render_inline_text(children) == "italic text"
def test_code_inline_kept(self):
children = _parse_inline("`code` here")
assert render_inline_text(children) == "code here"
MINIMAL_README = textwrap.dedent("""\
# Awesome Python
Some intro text.
---
## Alpha
_Libraries for alpha stuff._
- [lib-a](https://example.com/a) - Does A.
- [lib-b](https://example.com/b) - Does B.
## Beta
_Tools for beta._
- [lib-c](https://example.com/c) - Does C.
# Resources
Where to discover resources.
## Newsletters
- [News One](https://example.com/n1)
- [News Two](https://example.com/n2)
## Podcasts
- [Pod One](https://example.com/p1)
# Contributing
Please contribute!
""")
class TestParseReadmeSections:
def test_category_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(cats) == 2
def test_resource_count(self):
cats, resources = parse_readme(MINIMAL_README)
assert len(resources) == 2
def test_category_names(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["name"] == "Alpha"
assert cats[1]["name"] == "Beta"
def test_category_slugs(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["slug"] == "alpha"
assert cats[1]["slug"] == "beta"
def test_category_description(self):
cats, _ = parse_readme(MINIMAL_README)
assert cats[0]["description"] == "Libraries for alpha stuff."
assert cats[1]["description"] == "Tools for beta."
def test_resource_names(self):
_, resources = parse_readme(MINIMAL_README)
assert resources[0]["name"] == "Newsletters"
assert resources[1]["name"] == "Podcasts"
def test_contributing_skipped(self):
cats, resources = parse_readme(MINIMAL_README)
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
assert "Contributing" not in all_names
def test_no_separator(self):
cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
assert cats == []
assert resources == []
def test_no_description(self):
readme = textwrap.dedent("""\
# Title
---
## NullDesc
- [item](https://x.com) - Thing.
# Resources
## Tips
- [tip](https://x.com)
# Contributing
Done.
""")
cats, resources = parse_readme(readme)
assert cats[0]["description"] == ""
assert cats[0]["entries"][0]["name"] == "item"
def test_description_with_link_stripped(self):
readme = textwrap.dedent("""\
# T
---
## Algos
_Algorithms. Also see [awesome-algos](https://example.com)._
- [lib](https://x.com) - Lib.
# Contributing
Done.
""")
cats, _ = parse_readme(readme)
assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
"""Helper: parse markdown and return all block nodes."""
md = MarkdownIt("commonmark")
root = SyntaxTreeNode(md.parse(md_text))
return root.children
class TestParseSectionEntries:
def test_flat_entries(self):
nodes = _content_nodes(
"- [django](https://example.com/d) - A web framework.\n"
"- [flask](https://example.com/f) - A micro framework.\n"
)
entries = _parse_section_entries(nodes)
assert len(entries) == 2
assert entries[0]["name"] == "django"
assert entries[0]["url"] == "https://example.com/d"
assert "web framework" in entries[0]["description"]
assert entries[0]["also_see"] == []
assert entries[1]["name"] == "flask"
def test_link_only_entry(self):
nodes = _content_nodes("- [tool](https://x.com)\n")
entries = _parse_section_entries(nodes)
assert len(entries) == 1
assert entries[0]["name"] == "tool"
assert entries[0]["description"] == ""
def test_subcategorized_entries(self):
nodes = _content_nodes(
"- Algorithms\n"
" - [algos](https://x.com/a) - Algo lib.\n"
" - [sorts](https://x.com/s) - Sort lib.\n"
"- Design Patterns\n"
" - [patterns](https://x.com/p) - Pattern lib.\n"
)
entries = _parse_section_entries(nodes)
assert len(entries) == 3
assert entries[0]["name"] == "algos"
assert entries[2]["name"] == "patterns"
def test_text_before_link_is_subcategory(self):
nodes = _content_nodes(
"- MySQL - [awesome-mysql](http://example.com/awesome-mysql/)\n"
" - [mysqlclient](https://example.com/mysqlclient) - MySQL connector.\n"
" - [pymysql](https://example.com/pymysql) - Pure Python MySQL driver.\n"
)
entries = _parse_section_entries(nodes)
# awesome-mysql is a subcategory label, not an entry
assert len(entries) == 2
names = [e["name"] for e in entries]
assert "awesome-mysql" not in names
assert "mysqlclient" in names
assert "pymysql" in names
def test_also_see_sub_entries(self):
nodes = _content_nodes(
"- [asyncio](https://docs.python.org/3/library/asyncio.html) - Async I/O.\n"
" - [awesome-asyncio](https://github.com/timofurrer/awesome-asyncio)\n"
"- [trio](https://github.com/python-trio/trio) - Friendly async.\n"
)
entries = _parse_section_entries(nodes)
assert len(entries) == 2
assert entries[0]["name"] == "asyncio"
assert len(entries[0]["also_see"]) == 1
assert entries[0]["also_see"][0]["name"] == "awesome-asyncio"
assert entries[1]["name"] == "trio"
assert entries[1]["also_see"] == []
def test_entry_count_includes_also_see(self):
readme = textwrap.dedent("""\
# T
---
## Async
- [asyncio](https://x.com) - Async I/O.
- [awesome-asyncio](https://y.com)
- [trio](https://z.com) - Friendly async.
# Contributing
Done.
""")
cats, _ = parse_readme(readme)
# 2 main entries + 1 also_see = 3
assert cats[0]["entry_count"] == 3
def test_preview_first_four_names(self):
readme = textwrap.dedent("""\
# T
---
## Libs
- [alpha](https://x.com) - A.
- [beta](https://x.com) - B.
- [gamma](https://x.com) - C.
- [delta](https://x.com) - D.
- [epsilon](https://x.com) - E.
# Contributing
Done.
""")
cats, _ = parse_readme(readme)
assert cats[0]["preview"] == "alpha, beta, gamma, delta"
def test_description_html_escapes_xss(self):
nodes = _content_nodes('- [lib](https://x.com) - A <script>alert(1)</script> lib.\n')
entries = _parse_section_entries(nodes)
assert "<script>" not in entries[0]["description"]
assert "&lt;script&gt;" in entries[0]["description"]
class TestRenderSectionHtml:
def test_basic_entry(self):
nodes = _content_nodes("- [django](https://example.com) - A web framework.\n")
html = _render_section_html(nodes)
assert 'class="entry"' in html
assert 'href="https://example.com"' in html
assert "django" in html
assert "A web framework." in html
def test_subcategory_label(self):
nodes = _content_nodes(
"- Synchronous\n - [django](https://x.com) - Framework.\n"
)
html = _render_section_html(nodes)
assert 'class="subcat"' in html
assert "Synchronous" in html
assert 'class="entry"' in html
def test_sub_entry(self):
nodes = _content_nodes(
"- [django](https://x.com) - Framework.\n"
" - [awesome-django](https://y.com)\n"
)
html = _render_section_html(nodes)
assert 'class="entry-sub"' in html
assert "awesome-django" in html
def test_link_only_entry(self):
nodes = _content_nodes("- [tool](https://x.com)\n")
html = _render_section_html(nodes)
assert 'class="entry"' in html
assert 'href="https://x.com"' in html
assert "tool" in html
def test_xss_escaped_in_name(self):
nodes = _content_nodes('- [<img onerror=alert(1)>](https://x.com) - Bad.\n')
html = _render_section_html(nodes)
assert "onerror" not in html or "&" in html
def test_xss_escaped_in_subcat(self):
nodes = _content_nodes("- <script>alert(1)</script>\n")
html = _render_section_html(nodes)
assert "<script>" not in html
class TestParseRealReadme:
@pytest.fixture(autouse=True)
def load_readme(self):
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
with open(readme_path, encoding="utf-8") as f:
self.readme_text = f.read()
self.cats, self.resources = parse_readme(self.readme_text)
def test_at_least_83_categories(self):
assert len(self.cats) >= 83
def test_resources_has_newsletters_and_podcasts(self):
names = [r["name"] for r in self.resources]
assert "Newsletters" in names
assert "Podcasts" in names
def test_contributing_not_in_results(self):
all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
assert "Contributing" not in all_names
def test_first_category_is_admin_panels(self):
assert self.cats[0]["name"] == "Admin Panels"
assert self.cats[0]["slug"] == "admin-panels"
def test_last_category_is_wsgi_servers(self):
assert self.cats[-1]["name"] == "WSGI Servers"
assert self.cats[-1]["slug"] == "wsgi-servers"
def test_restful_api_slug(self):
slugs = [c["slug"] for c in self.cats]
assert "restful-api" in slugs
def test_descriptions_extracted(self):
admin = self.cats[0]
assert admin["description"] == "Libraries for administrative interfaces."
def test_entry_counts_nonzero(self):
for cat in self.cats:
assert cat["entry_count"] > 0, f"{cat['name']} has 0 entries"
def test_previews_nonempty(self):
for cat in self.cats:
assert cat["preview"], f"{cat['name']} has empty preview"
def test_content_html_nonempty(self):
for cat in self.cats:
assert cat["content_html"], f"{cat['name']} has empty content_html"
def test_algorithms_has_subcategories(self):
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
assert 'class="subcat"' in algos["content_html"]
def test_async_has_also_see(self):
async_cat = next(c for c in self.cats if c["name"] == "Asynchronous Programming")
asyncio_entry = next(e for e in async_cat["entries"] if e["name"] == "asyncio")
assert len(asyncio_entry["also_see"]) >= 1
assert asyncio_entry["also_see"][0]["name"] == "awesome-asyncio"
def test_description_links_stripped_to_text(self):
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
assert "awesome-algorithms" in algos["description"]
assert "https://" not in algos["description"]