mirror of
https://github.com/vinta/awesome-python.git
synced 2026-03-24 00:58:21 +08:00
refactor: reformat build.py to Black style and add llms.txt output
Reformats dict and list literals to trailing-comma multiline style throughout. Also copies README.md to llms.txt in the site output so LLM crawlers can discover the full content. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
240
website/build.py
240
website/build.py
@@ -8,62 +8,144 @@ from pathlib import Path
|
||||
from typing import TypedDict
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from readme_parser import parse_readme, slugify
|
||||
|
||||
# Thematic grouping of categories. Each category name must match exactly
|
||||
# as it appears in README.md (the ## heading text).
|
||||
SECTION_GROUPS: list[tuple[str, list[str]]] = [
|
||||
("Web & API", [
|
||||
"Web Frameworks", "RESTful API", "GraphQL", "WebSocket",
|
||||
"ASGI Servers", "WSGI Servers", "HTTP Clients", "Template Engine",
|
||||
"Web Asset Management", "Web Content Extracting", "Web Crawling",
|
||||
]),
|
||||
("Data & ML", [
|
||||
"Data Analysis", "Data Validation", "Data Visualization",
|
||||
"Machine Learning", "Deep Learning", "Computer Vision",
|
||||
"Natural Language Processing", "Recommender Systems", "Science",
|
||||
"Quantum Computing",
|
||||
]),
|
||||
("DevOps & Infrastructure", [
|
||||
"DevOps Tools", "Distributed Computing", "Task Queues",
|
||||
"Job Scheduler", "Serverless Frameworks", "Logging", "Processes",
|
||||
"Shell", "Network Virtualization", "RPC Servers",
|
||||
]),
|
||||
("Database & Storage", [
|
||||
"Database", "Database Drivers", "ORM", "Caching", "Search",
|
||||
"Serialization",
|
||||
]),
|
||||
("Development Tools", [
|
||||
"Testing", "Debugging Tools", "Code Analysis", "Build Tools",
|
||||
"Refactoring", "Documentation", "Editor Plugins and IDEs",
|
||||
"Interactive Interpreter",
|
||||
]),
|
||||
("CLI & GUI", [
|
||||
"Command-line Interface Development", "Command-line Tools",
|
||||
"GUI Development",
|
||||
]),
|
||||
("Content & Media", [
|
||||
"Audio", "Video", "Image Processing", "HTML Manipulation",
|
||||
"Text Processing", "Specific Formats Processing",
|
||||
"File Manipulation", "Downloader",
|
||||
]),
|
||||
("System & Runtime", [
|
||||
"Asynchronous Programming", "Environment Management",
|
||||
"Package Management", "Package Repositories", "Distribution",
|
||||
"Implementations", "Built-in Classes Enhancement",
|
||||
"Functional Programming", "Configuration Files",
|
||||
]),
|
||||
("Security & Auth", [
|
||||
"Authentication", "Cryptography", "Penetration Testing",
|
||||
"Permissions",
|
||||
]),
|
||||
("Specialized", [
|
||||
"CMS", "Admin Panels", "Email", "Game Development", "Geolocation",
|
||||
"Hardware", "Internationalization", "Date and Time",
|
||||
"URL Manipulation", "Robotics", "Microsoft Windows", "Miscellaneous",
|
||||
"Algorithms and Design Patterns", "Static Site Generator",
|
||||
]),
|
||||
(
|
||||
"Web & API",
|
||||
[
|
||||
"Web Frameworks",
|
||||
"RESTful API",
|
||||
"GraphQL",
|
||||
"WebSocket",
|
||||
"ASGI Servers",
|
||||
"WSGI Servers",
|
||||
"HTTP Clients",
|
||||
"Template Engine",
|
||||
"Web Asset Management",
|
||||
"Web Content Extracting",
|
||||
"Web Crawling",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Data & ML",
|
||||
[
|
||||
"Data Analysis",
|
||||
"Data Validation",
|
||||
"Data Visualization",
|
||||
"Machine Learning",
|
||||
"Deep Learning",
|
||||
"Computer Vision",
|
||||
"Natural Language Processing",
|
||||
"Recommender Systems",
|
||||
"Science",
|
||||
"Quantum Computing",
|
||||
],
|
||||
),
|
||||
(
|
||||
"DevOps & Infrastructure",
|
||||
[
|
||||
"DevOps Tools",
|
||||
"Distributed Computing",
|
||||
"Task Queues",
|
||||
"Job Scheduler",
|
||||
"Serverless Frameworks",
|
||||
"Logging",
|
||||
"Processes",
|
||||
"Shell",
|
||||
"Network Virtualization",
|
||||
"RPC Servers",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Database & Storage",
|
||||
[
|
||||
"Database",
|
||||
"Database Drivers",
|
||||
"ORM",
|
||||
"Caching",
|
||||
"Search",
|
||||
"Serialization",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Development Tools",
|
||||
[
|
||||
"Testing",
|
||||
"Debugging Tools",
|
||||
"Code Analysis",
|
||||
"Build Tools",
|
||||
"Refactoring",
|
||||
"Documentation",
|
||||
"Editor Plugins and IDEs",
|
||||
"Interactive Interpreter",
|
||||
],
|
||||
),
|
||||
(
|
||||
"CLI & GUI",
|
||||
[
|
||||
"Command-line Interface Development",
|
||||
"Command-line Tools",
|
||||
"GUI Development",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Content & Media",
|
||||
[
|
||||
"Audio",
|
||||
"Video",
|
||||
"Image Processing",
|
||||
"HTML Manipulation",
|
||||
"Text Processing",
|
||||
"Specific Formats Processing",
|
||||
"File Manipulation",
|
||||
"Downloader",
|
||||
],
|
||||
),
|
||||
(
|
||||
"System & Runtime",
|
||||
[
|
||||
"Asynchronous Programming",
|
||||
"Environment Management",
|
||||
"Package Management",
|
||||
"Package Repositories",
|
||||
"Distribution",
|
||||
"Implementations",
|
||||
"Built-in Classes Enhancement",
|
||||
"Functional Programming",
|
||||
"Configuration Files",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Security & Auth",
|
||||
[
|
||||
"Authentication",
|
||||
"Cryptography",
|
||||
"Penetration Testing",
|
||||
"Permissions",
|
||||
],
|
||||
),
|
||||
(
|
||||
"Specialized",
|
||||
[
|
||||
"CMS",
|
||||
"Admin Panels",
|
||||
"Email",
|
||||
"Game Development",
|
||||
"Geolocation",
|
||||
"Hardware",
|
||||
"Internationalization",
|
||||
"Date and Time",
|
||||
"URL Manipulation",
|
||||
"Robotics",
|
||||
"Microsoft Windows",
|
||||
"Miscellaneous",
|
||||
"Algorithms and Design Patterns",
|
||||
"Static Site Generator",
|
||||
],
|
||||
),
|
||||
("Resources", []), # Filled dynamically from parsed resources
|
||||
]
|
||||
|
||||
@@ -85,20 +167,24 @@ def group_categories(
|
||||
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
|
||||
|
||||
if group_cats:
|
||||
groups.append({
|
||||
"name": group_name,
|
||||
"slug": slugify(group_name),
|
||||
"categories": group_cats,
|
||||
})
|
||||
groups.append(
|
||||
{
|
||||
"name": group_name,
|
||||
"slug": slugify(group_name),
|
||||
"categories": group_cats,
|
||||
}
|
||||
)
|
||||
|
||||
# Any categories not in a group go into "Other"
|
||||
ungrouped = [c for c in categories if c["name"] not in grouped_names]
|
||||
if ungrouped:
|
||||
groups.append({
|
||||
"name": "Other",
|
||||
"slug": "other",
|
||||
"categories": ungrouped,
|
||||
})
|
||||
groups.append(
|
||||
{
|
||||
"name": "Other",
|
||||
"slug": "other",
|
||||
"categories": ungrouped,
|
||||
}
|
||||
)
|
||||
|
||||
return groups
|
||||
|
||||
@@ -121,9 +207,7 @@ class StarData(TypedDict):
|
||||
fetched_at: str
|
||||
|
||||
|
||||
GITHUB_REPO_URL_RE = re.compile(
|
||||
r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$"
|
||||
)
|
||||
GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$")
|
||||
|
||||
|
||||
def extract_github_repo(url: str) -> str | None:
|
||||
@@ -144,12 +228,14 @@ def load_stars(path: Path) -> dict[str, StarData]:
|
||||
|
||||
def sort_entries(entries: list[dict]) -> list[dict]:
|
||||
"""Sort entries by stars descending, then name ascending. No-star entries go last."""
|
||||
|
||||
def sort_key(entry: dict) -> tuple[int, int, str]:
|
||||
stars = entry["stars"]
|
||||
name = entry["name"].lower()
|
||||
if stars is None:
|
||||
return (1, 0, name)
|
||||
return (0, -stars, name)
|
||||
|
||||
return sorted(entries, key=sort_key)
|
||||
|
||||
|
||||
@@ -167,17 +253,19 @@ def extract_entries(
|
||||
for cat in categories:
|
||||
group_name = cat_to_group.get(cat["name"], "Other")
|
||||
for entry in cat["entries"]:
|
||||
entries.append({
|
||||
"name": entry["name"],
|
||||
"url": entry["url"],
|
||||
"description": entry["description"],
|
||||
"category": cat["name"],
|
||||
"group": group_name,
|
||||
"stars": None,
|
||||
"owner": None,
|
||||
"last_commit_at": None,
|
||||
"also_see": entry["also_see"],
|
||||
})
|
||||
entries.append(
|
||||
{
|
||||
"name": entry["name"],
|
||||
"url": entry["url"],
|
||||
"description": entry["description"],
|
||||
"category": cat["name"],
|
||||
"group": group_name,
|
||||
"stars": None,
|
||||
"owner": None,
|
||||
"last_commit_at": None,
|
||||
"also_see": entry["also_see"],
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
@@ -241,6 +329,8 @@ def build(repo_root: str) -> None:
|
||||
if static_src.exists():
|
||||
shutil.copytree(static_src, static_dst, dirs_exist_ok=True)
|
||||
|
||||
shutil.copy(repo / "README.md", site_dir / "llms.txt")
|
||||
|
||||
print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
|
||||
print(f"Total entries: {total_entries}")
|
||||
print(f"Output: {site_dir}")
|
||||
|
||||
Reference in New Issue
Block a user