refactor: reformat build.py to Black style and add llms.txt output

Reformats dict and list literals to trailing-comma multiline style
throughout. Also copies README.md to llms.txt in the site output so
LLM crawlers can discover the full content.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen
2026-03-18 21:33:51 +08:00
parent 539edc4e20
commit ab18c7e54c
+165 -75
View File
@@ -8,62 +8,144 @@ from pathlib import Path
from typing import TypedDict from typing import TypedDict
from jinja2 import Environment, FileSystemLoader from jinja2 import Environment, FileSystemLoader
from readme_parser import parse_readme, slugify from readme_parser import parse_readme, slugify
# Thematic grouping of categories. Each category name must match exactly # Thematic grouping of categories. Each category name must match exactly
# as it appears in README.md (the ## heading text). # as it appears in README.md (the ## heading text).
SECTION_GROUPS: list[tuple[str, list[str]]] = [ SECTION_GROUPS: list[tuple[str, list[str]]] = [
("Web & API", [ (
"Web Frameworks", "RESTful API", "GraphQL", "WebSocket", "Web & API",
"ASGI Servers", "WSGI Servers", "HTTP Clients", "Template Engine", [
"Web Asset Management", "Web Content Extracting", "Web Crawling", "Web Frameworks",
]), "RESTful API",
("Data & ML", [ "GraphQL",
"Data Analysis", "Data Validation", "Data Visualization", "WebSocket",
"Machine Learning", "Deep Learning", "Computer Vision", "ASGI Servers",
"Natural Language Processing", "Recommender Systems", "Science", "WSGI Servers",
"Quantum Computing", "HTTP Clients",
]), "Template Engine",
("DevOps & Infrastructure", [ "Web Asset Management",
"DevOps Tools", "Distributed Computing", "Task Queues", "Web Content Extracting",
"Job Scheduler", "Serverless Frameworks", "Logging", "Processes", "Web Crawling",
"Shell", "Network Virtualization", "RPC Servers", ],
]), ),
("Database & Storage", [ (
"Database", "Database Drivers", "ORM", "Caching", "Search", "Data & ML",
"Serialization", [
]), "Data Analysis",
("Development Tools", [ "Data Validation",
"Testing", "Debugging Tools", "Code Analysis", "Build Tools", "Data Visualization",
"Refactoring", "Documentation", "Editor Plugins and IDEs", "Machine Learning",
"Interactive Interpreter", "Deep Learning",
]), "Computer Vision",
("CLI & GUI", [ "Natural Language Processing",
"Command-line Interface Development", "Command-line Tools", "Recommender Systems",
"GUI Development", "Science",
]), "Quantum Computing",
("Content & Media", [ ],
"Audio", "Video", "Image Processing", "HTML Manipulation", ),
"Text Processing", "Specific Formats Processing", (
"File Manipulation", "Downloader", "DevOps & Infrastructure",
]), [
("System & Runtime", [ "DevOps Tools",
"Asynchronous Programming", "Environment Management", "Distributed Computing",
"Package Management", "Package Repositories", "Distribution", "Task Queues",
"Implementations", "Built-in Classes Enhancement", "Job Scheduler",
"Functional Programming", "Configuration Files", "Serverless Frameworks",
]), "Logging",
("Security & Auth", [ "Processes",
"Authentication", "Cryptography", "Penetration Testing", "Shell",
"Permissions", "Network Virtualization",
]), "RPC Servers",
("Specialized", [ ],
"CMS", "Admin Panels", "Email", "Game Development", "Geolocation", ),
"Hardware", "Internationalization", "Date and Time", (
"URL Manipulation", "Robotics", "Microsoft Windows", "Miscellaneous", "Database & Storage",
"Algorithms and Design Patterns", "Static Site Generator", [
]), "Database",
"Database Drivers",
"ORM",
"Caching",
"Search",
"Serialization",
],
),
(
"Development Tools",
[
"Testing",
"Debugging Tools",
"Code Analysis",
"Build Tools",
"Refactoring",
"Documentation",
"Editor Plugins and IDEs",
"Interactive Interpreter",
],
),
(
"CLI & GUI",
[
"Command-line Interface Development",
"Command-line Tools",
"GUI Development",
],
),
(
"Content & Media",
[
"Audio",
"Video",
"Image Processing",
"HTML Manipulation",
"Text Processing",
"Specific Formats Processing",
"File Manipulation",
"Downloader",
],
),
(
"System & Runtime",
[
"Asynchronous Programming",
"Environment Management",
"Package Management",
"Package Repositories",
"Distribution",
"Implementations",
"Built-in Classes Enhancement",
"Functional Programming",
"Configuration Files",
],
),
(
"Security & Auth",
[
"Authentication",
"Cryptography",
"Penetration Testing",
"Permissions",
],
),
(
"Specialized",
[
"CMS",
"Admin Panels",
"Email",
"Game Development",
"Geolocation",
"Hardware",
"Internationalization",
"Date and Time",
"URL Manipulation",
"Robotics",
"Microsoft Windows",
"Miscellaneous",
"Algorithms and Design Patterns",
"Static Site Generator",
],
),
("Resources", []), # Filled dynamically from parsed resources ("Resources", []), # Filled dynamically from parsed resources
] ]
@@ -85,20 +167,24 @@ def group_categories(
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name] group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
if group_cats: if group_cats:
groups.append({ groups.append(
"name": group_name, {
"slug": slugify(group_name), "name": group_name,
"categories": group_cats, "slug": slugify(group_name),
}) "categories": group_cats,
}
)
# Any categories not in a group go into "Other" # Any categories not in a group go into "Other"
ungrouped = [c for c in categories if c["name"] not in grouped_names] ungrouped = [c for c in categories if c["name"] not in grouped_names]
if ungrouped: if ungrouped:
groups.append({ groups.append(
"name": "Other", {
"slug": "other", "name": "Other",
"categories": ungrouped, "slug": "other",
}) "categories": ungrouped,
}
)
return groups return groups
@@ -121,9 +207,7 @@ class StarData(TypedDict):
fetched_at: str fetched_at: str
GITHUB_REPO_URL_RE = re.compile( GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$"
)
def extract_github_repo(url: str) -> str | None: def extract_github_repo(url: str) -> str | None:
@@ -144,12 +228,14 @@ def load_stars(path: Path) -> dict[str, StarData]:
def sort_entries(entries: list[dict]) -> list[dict]: def sort_entries(entries: list[dict]) -> list[dict]:
"""Sort entries by stars descending, then name ascending. No-star entries go last.""" """Sort entries by stars descending, then name ascending. No-star entries go last."""
def sort_key(entry: dict) -> tuple[int, int, str]: def sort_key(entry: dict) -> tuple[int, int, str]:
stars = entry["stars"] stars = entry["stars"]
name = entry["name"].lower() name = entry["name"].lower()
if stars is None: if stars is None:
return (1, 0, name) return (1, 0, name)
return (0, -stars, name) return (0, -stars, name)
return sorted(entries, key=sort_key) return sorted(entries, key=sort_key)
@@ -167,17 +253,19 @@ def extract_entries(
for cat in categories: for cat in categories:
group_name = cat_to_group.get(cat["name"], "Other") group_name = cat_to_group.get(cat["name"], "Other")
for entry in cat["entries"]: for entry in cat["entries"]:
entries.append({ entries.append(
"name": entry["name"], {
"url": entry["url"], "name": entry["name"],
"description": entry["description"], "url": entry["url"],
"category": cat["name"], "description": entry["description"],
"group": group_name, "category": cat["name"],
"stars": None, "group": group_name,
"owner": None, "stars": None,
"last_commit_at": None, "owner": None,
"also_see": entry["also_see"], "last_commit_at": None,
}) "also_see": entry["also_see"],
}
)
return entries return entries
@@ -241,6 +329,8 @@ def build(repo_root: str) -> None:
if static_src.exists(): if static_src.exists():
shutil.copytree(static_src, static_dst, dirs_exist_ok=True) shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
shutil.copy(repo / "README.md", site_dir / "llms.txt")
print(f"Built single page with {len(categories)} categories + {len(resources)} resources") print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
print(f"Total entries: {total_entries}") print(f"Total entries: {total_entries}")
print(f"Output: {site_dir}") print(f"Output: {site_dir}")