From ab18c7e54c216753ed9eb61dd59a6acdd221ad0e Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Wed, 18 Mar 2026 21:33:51 +0800 Subject: [PATCH] refactor: reformat build.py to Black style and add llms.txt output Reformats dict and list literals to trailing-comma multiline style throughout. Also copies README.md to llms.txt in the site output so LLM crawlers can discover the full content. Co-Authored-By: Claude --- website/build.py | 240 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 165 insertions(+), 75 deletions(-) diff --git a/website/build.py b/website/build.py index 5b36526e..b396f753 100644 --- a/website/build.py +++ b/website/build.py @@ -8,62 +8,144 @@ from pathlib import Path from typing import TypedDict from jinja2 import Environment, FileSystemLoader - from readme_parser import parse_readme, slugify # Thematic grouping of categories. Each category name must match exactly # as it appears in README.md (the ## heading text). SECTION_GROUPS: list[tuple[str, list[str]]] = [ - ("Web & API", [ - "Web Frameworks", "RESTful API", "GraphQL", "WebSocket", - "ASGI Servers", "WSGI Servers", "HTTP Clients", "Template Engine", - "Web Asset Management", "Web Content Extracting", "Web Crawling", - ]), - ("Data & ML", [ - "Data Analysis", "Data Validation", "Data Visualization", - "Machine Learning", "Deep Learning", "Computer Vision", - "Natural Language Processing", "Recommender Systems", "Science", - "Quantum Computing", - ]), - ("DevOps & Infrastructure", [ - "DevOps Tools", "Distributed Computing", "Task Queues", - "Job Scheduler", "Serverless Frameworks", "Logging", "Processes", - "Shell", "Network Virtualization", "RPC Servers", - ]), - ("Database & Storage", [ - "Database", "Database Drivers", "ORM", "Caching", "Search", - "Serialization", - ]), - ("Development Tools", [ - "Testing", "Debugging Tools", "Code Analysis", "Build Tools", - "Refactoring", "Documentation", "Editor Plugins and IDEs", - "Interactive Interpreter", - ]), - ("CLI & GUI", [ - "Command-line Interface Development", "Command-line Tools", - "GUI Development", - ]), - ("Content & Media", [ - "Audio", "Video", "Image Processing", "HTML Manipulation", - "Text Processing", "Specific Formats Processing", - "File Manipulation", "Downloader", - ]), - ("System & Runtime", [ - "Asynchronous Programming", "Environment Management", - "Package Management", "Package Repositories", "Distribution", - "Implementations", "Built-in Classes Enhancement", - "Functional Programming", "Configuration Files", - ]), - ("Security & Auth", [ - "Authentication", "Cryptography", "Penetration Testing", - "Permissions", - ]), - ("Specialized", [ - "CMS", "Admin Panels", "Email", "Game Development", "Geolocation", - "Hardware", "Internationalization", "Date and Time", - "URL Manipulation", "Robotics", "Microsoft Windows", "Miscellaneous", - "Algorithms and Design Patterns", "Static Site Generator", - ]), + ( + "Web & API", + [ + "Web Frameworks", + "RESTful API", + "GraphQL", + "WebSocket", + "ASGI Servers", + "WSGI Servers", + "HTTP Clients", + "Template Engine", + "Web Asset Management", + "Web Content Extracting", + "Web Crawling", + ], + ), + ( + "Data & ML", + [ + "Data Analysis", + "Data Validation", + "Data Visualization", + "Machine Learning", + "Deep Learning", + "Computer Vision", + "Natural Language Processing", + "Recommender Systems", + "Science", + "Quantum Computing", + ], + ), + ( + "DevOps & Infrastructure", + [ + "DevOps Tools", + "Distributed Computing", + "Task Queues", + "Job Scheduler", + "Serverless Frameworks", + "Logging", + "Processes", + "Shell", + "Network Virtualization", + "RPC Servers", + ], + ), + ( + "Database & Storage", + [ + "Database", + "Database Drivers", + "ORM", + "Caching", + "Search", + "Serialization", + ], + ), + ( + "Development Tools", + [ + "Testing", + "Debugging Tools", + "Code Analysis", + "Build Tools", + "Refactoring", + "Documentation", + "Editor Plugins and IDEs", + "Interactive Interpreter", + ], + ), + ( + "CLI & GUI", + [ + "Command-line Interface Development", + "Command-line Tools", + "GUI Development", + ], + ), + ( + "Content & Media", + [ + "Audio", + "Video", + "Image Processing", + "HTML Manipulation", + "Text Processing", + "Specific Formats Processing", + "File Manipulation", + "Downloader", + ], + ), + ( + "System & Runtime", + [ + "Asynchronous Programming", + "Environment Management", + "Package Management", + "Package Repositories", + "Distribution", + "Implementations", + "Built-in Classes Enhancement", + "Functional Programming", + "Configuration Files", + ], + ), + ( + "Security & Auth", + [ + "Authentication", + "Cryptography", + "Penetration Testing", + "Permissions", + ], + ), + ( + "Specialized", + [ + "CMS", + "Admin Panels", + "Email", + "Game Development", + "Geolocation", + "Hardware", + "Internationalization", + "Date and Time", + "URL Manipulation", + "Robotics", + "Microsoft Windows", + "Miscellaneous", + "Algorithms and Design Patterns", + "Static Site Generator", + ], + ), ("Resources", []), # Filled dynamically from parsed resources ] @@ -85,20 +167,24 @@ def group_categories( group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name] if group_cats: - groups.append({ - "name": group_name, - "slug": slugify(group_name), - "categories": group_cats, - }) + groups.append( + { + "name": group_name, + "slug": slugify(group_name), + "categories": group_cats, + } + ) # Any categories not in a group go into "Other" ungrouped = [c for c in categories if c["name"] not in grouped_names] if ungrouped: - groups.append({ - "name": "Other", - "slug": "other", - "categories": ungrouped, - }) + groups.append( + { + "name": "Other", + "slug": "other", + "categories": ungrouped, + } + ) return groups @@ -121,9 +207,7 @@ class StarData(TypedDict): fetched_at: str -GITHUB_REPO_URL_RE = re.compile( - r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$" -) +GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$") def extract_github_repo(url: str) -> str | None: @@ -144,12 +228,14 @@ def load_stars(path: Path) -> dict[str, StarData]: def sort_entries(entries: list[dict]) -> list[dict]: """Sort entries by stars descending, then name ascending. No-star entries go last.""" + def sort_key(entry: dict) -> tuple[int, int, str]: stars = entry["stars"] name = entry["name"].lower() if stars is None: return (1, 0, name) return (0, -stars, name) + return sorted(entries, key=sort_key) @@ -167,17 +253,19 @@ def extract_entries( for cat in categories: group_name = cat_to_group.get(cat["name"], "Other") for entry in cat["entries"]: - entries.append({ - "name": entry["name"], - "url": entry["url"], - "description": entry["description"], - "category": cat["name"], - "group": group_name, - "stars": None, - "owner": None, - "last_commit_at": None, - "also_see": entry["also_see"], - }) + entries.append( + { + "name": entry["name"], + "url": entry["url"], + "description": entry["description"], + "category": cat["name"], + "group": group_name, + "stars": None, + "owner": None, + "last_commit_at": None, + "also_see": entry["also_see"], + } + ) return entries @@ -241,6 +329,8 @@ def build(repo_root: str) -> None: if static_src.exists(): shutil.copytree(static_src, static_dst, dirs_exist_ok=True) + shutil.copy(repo / "README.md", site_dir / "llms.txt") + print(f"Built single page with {len(categories)} categories + {len(resources)} resources") print(f"Total entries: {total_entries}") print(f"Output: {site_dir}")