mirror of
https://github.com/vinta/awesome-python.git
synced 2026-03-23 13:56:43 +08:00
feat: skip fetching repos whose cache entry is still fresh
Introduce CACHE_MAX_AGE_HOURS (12 h) and filter current_repos before the fetch loop so repos that were updated recently are not re-requested. Prints a breakdown of fetched vs cached count. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ import httpx
|
|||||||
|
|
||||||
from build import extract_github_repo, load_stars
|
from build import extract_github_repo, load_stars
|
||||||
|
|
||||||
|
CACHE_MAX_AGE_HOURS = 12
|
||||||
DATA_DIR = Path(__file__).parent / "data"
|
DATA_DIR = Path(__file__).parent / "data"
|
||||||
CACHE_FILE = DATA_DIR / "github_stars.json"
|
CACHE_FILE = DATA_DIR / "github_stars.json"
|
||||||
README_PATH = Path(__file__).parent.parent / "README.md"
|
README_PATH = Path(__file__).parent.parent / "README.md"
|
||||||
@@ -113,8 +114,18 @@ def main() -> None:
|
|||||||
print(f"Pruned {len(cache) - len(pruned)} stale cache entries")
|
print(f"Pruned {len(cache) - len(pruned)} stale cache entries")
|
||||||
cache = pruned
|
cache = pruned
|
||||||
|
|
||||||
to_fetch = sorted(current_repos)
|
# Determine which repos need fetching (missing or stale)
|
||||||
print(f"{len(to_fetch)} repos to fetch")
|
to_fetch = []
|
||||||
|
for repo in sorted(current_repos):
|
||||||
|
entry = cache.get(repo)
|
||||||
|
if entry and "fetched_at" in entry:
|
||||||
|
fetched = datetime.fromisoformat(entry["fetched_at"])
|
||||||
|
age_hours = (now - fetched).total_seconds() / 3600
|
||||||
|
if age_hours < CACHE_MAX_AGE_HOURS:
|
||||||
|
continue
|
||||||
|
to_fetch.append(repo)
|
||||||
|
|
||||||
|
print(f"{len(to_fetch)} repos to fetch ({len(current_repos) - len(to_fetch)} cached)")
|
||||||
|
|
||||||
if not to_fetch:
|
if not to_fetch:
|
||||||
save_cache(cache)
|
save_cache(cache)
|
||||||
|
|||||||
@@ -137,3 +137,95 @@ class TestParseGraphqlResponse:
|
|||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result["a/x"]["stars"] == 100
|
assert result["a/x"]["stars"] == 100
|
||||||
assert result["b/y"]["stars"] == 200
|
assert result["b/y"]["stars"] == 200
|
||||||
|
|
||||||
|
|
||||||
|
class TestMainSkipsFreshCache:
|
||||||
|
"""Verify that main() skips fetching when all cache entries are fresh."""
|
||||||
|
|
||||||
|
def test_skips_fetch_when_cache_is_fresh(self, tmp_path, monkeypatch, capsys):
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
|
||||||
|
from fetch_github_stars import main
|
||||||
|
|
||||||
|
# Set up a minimal README with one repo
|
||||||
|
readme = tmp_path / "README.md"
|
||||||
|
readme.write_text("* [req](https://github.com/psf/requests) - HTTP.\n")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.README_PATH", readme)
|
||||||
|
|
||||||
|
# Pre-populate cache with a fresh entry (1 hour ago)
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
cache_file = data_dir / "github_stars.json"
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
fresh_cache = {
|
||||||
|
"psf/requests": {
|
||||||
|
"stars": 52000,
|
||||||
|
"owner": "psf",
|
||||||
|
"last_commit_at": "2025-01-01T00:00:00+00:00",
|
||||||
|
"fetched_at": (now - timedelta(hours=1)).isoformat(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cache_file.write_text(json.dumps(fresh_cache), encoding="utf-8")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
|
||||||
|
monkeypatch.setattr("fetch_github_stars.DATA_DIR", data_dir)
|
||||||
|
monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "0 repos to fetch" in output
|
||||||
|
assert "Cache is up to date" in output
|
||||||
|
|
||||||
|
def test_fetches_when_cache_is_stale(self, tmp_path, monkeypatch, capsys):
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from fetch_github_stars import main
|
||||||
|
|
||||||
|
# Set up a minimal README with one repo
|
||||||
|
readme = tmp_path / "README.md"
|
||||||
|
readme.write_text("* [req](https://github.com/psf/requests) - HTTP.\n")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.README_PATH", readme)
|
||||||
|
|
||||||
|
# Pre-populate cache with a stale entry (24 hours ago)
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
cache_file = data_dir / "github_stars.json"
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
stale_cache = {
|
||||||
|
"psf/requests": {
|
||||||
|
"stars": 52000,
|
||||||
|
"owner": "psf",
|
||||||
|
"last_commit_at": "2025-01-01T00:00:00+00:00",
|
||||||
|
"fetched_at": (now - timedelta(hours=24)).isoformat(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cache_file.write_text(json.dumps(stale_cache), encoding="utf-8")
|
||||||
|
monkeypatch.setattr("fetch_github_stars.CACHE_FILE", cache_file)
|
||||||
|
monkeypatch.setattr("fetch_github_stars.DATA_DIR", data_dir)
|
||||||
|
monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
|
||||||
|
|
||||||
|
# Mock httpx.Client to avoid real API calls
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.json.return_value = {
|
||||||
|
"data": {
|
||||||
|
"repo_0": {
|
||||||
|
"stargazerCount": 53000,
|
||||||
|
"owner": {"login": "psf"},
|
||||||
|
"defaultBranchRef": {"target": {"committedDate": "2025-06-01T00:00:00Z"}},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mock_response.raise_for_status = MagicMock()
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||||
|
mock_client.__exit__ = MagicMock(return_value=False)
|
||||||
|
mock_client.post.return_value = mock_response
|
||||||
|
monkeypatch.setattr("fetch_github_stars.httpx.Client", lambda **kwargs: mock_client)
|
||||||
|
|
||||||
|
main()
|
||||||
|
|
||||||
|
output = capsys.readouterr().out
|
||||||
|
assert "1 repos to fetch" in output
|
||||||
|
assert "Done. Fetched 1 repos" in output
|
||||||
|
mock_client.post.assert_called_once()
|
||||||
|
|||||||
Reference in New Issue
Block a user