feat: skip fetching repos whose cache entry is still fresh

Introduce CACHE_MAX_AGE_HOURS (12 h) and filter current_repos before
the fetch loop so repos that were updated recently are not re-requested.
Prints a breakdown of fetched vs cached count.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen
2026-03-18 22:55:21 +08:00
parent 484515775f
commit 6148c13c0c
2 changed files with 105 additions and 2 deletions

View File

@@ -12,6 +12,7 @@ import httpx
from build import extract_github_repo, load_stars
CACHE_MAX_AGE_HOURS = 12
DATA_DIR = Path(__file__).parent / "data"
CACHE_FILE = DATA_DIR / "github_stars.json"
README_PATH = Path(__file__).parent.parent / "README.md"
@@ -113,8 +114,18 @@ def main() -> None:
print(f"Pruned {len(cache) - len(pruned)} stale cache entries")
cache = pruned
to_fetch = sorted(current_repos)
print(f"{len(to_fetch)} repos to fetch")
# Determine which repos need fetching (missing or stale)
to_fetch = []
for repo in sorted(current_repos):
entry = cache.get(repo)
if entry and "fetched_at" in entry:
fetched = datetime.fromisoformat(entry["fetched_at"])
age_hours = (now - fetched).total_seconds() / 3600
if age_hours < CACHE_MAX_AGE_HOURS:
continue
to_fetch.append(repo)
print(f"{len(to_fetch)} repos to fetch ({len(current_repos) - len(to_fetch)} cached)")
if not to_fetch:
save_cache(cache)