mirror of
https://github.com/vinta/awesome-python.git
synced 2026-05-10 11:18:26 +08:00
fix: stricter GitHub owner/repo regexes and injection tests
Split _GITHUB_NAME_RE into separate owner and repo patterns. Owner regex now rejects leading/trailing hyphens and dots (matching GitHub's actual username rules). Repo regex requires alphanumeric start but allows dots and underscores anywhere after. New tests cover GraphQL injection attempts, invalid leading chars, and valid hyphenated/underscore/dot combinations. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -19,9 +19,10 @@ README_PATH = Path(__file__).parent.parent / "README.md"
|
||||
GRAPHQL_URL = "https://api.github.com/graphql"
|
||||
BATCH_SIZE = 50
|
||||
|
||||
# Allowlist for valid GitHub owner/repo name characters.
|
||||
# GitHub usernames and repo names only allow letters, digits, hyphens, underscores, and dots.
|
||||
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$")
|
||||
# GitHub usernames: alphanumeric and hyphens, must start/end with alphanumeric.
|
||||
_GITHUB_OWNER_RE = re.compile(r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$")
|
||||
# GitHub repo names: alphanumeric, hyphens, underscores, dots, must start with alphanumeric.
|
||||
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
|
||||
|
||||
|
||||
def extract_github_repos(text: str) -> set[str]:
|
||||
@@ -50,7 +51,7 @@ def build_graphql_query(repos: list[str]) -> str:
|
||||
parts = []
|
||||
for i, repo in enumerate(repos):
|
||||
owner, name = repo.split("/", 1)
|
||||
if not _GITHUB_NAME_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
|
||||
if not _GITHUB_OWNER_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
|
||||
continue
|
||||
parts.append(
|
||||
f'repo_{i}: repository(owner: "{owner}", name: "{name}") '
|
||||
|
||||
Reference in New Issue
Block a user