fix: stricter GitHub owner/repo regexes and injection tests

Split _GITHUB_NAME_RE into separate owner and repo patterns.
Owner regex now rejects leading/trailing hyphens and dots (matching
GitHub's actual username rules). Repo regex requires alphanumeric
start but allows dots and underscores anywhere after.

New tests cover GraphQL injection attempts, invalid leading chars,
and valid hyphenated/underscore/dot combinations.

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Vinta Chen
2026-03-30 15:03:06 +08:00
parent 87c5f3bde9
commit 1ae889b4fd
2 changed files with 37 additions and 4 deletions
+5 -4
View File
@@ -19,9 +19,10 @@ README_PATH = Path(__file__).parent.parent / "README.md"
GRAPHQL_URL = "https://api.github.com/graphql"
BATCH_SIZE = 50
# Allowlist for valid GitHub owner/repo name characters.
# GitHub usernames and repo names only allow letters, digits, hyphens, underscores, and dots.
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9._-]+$")
# GitHub usernames: alphanumeric and hyphens, must start/end with alphanumeric.
_GITHUB_OWNER_RE = re.compile(r"^[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?$")
# GitHub repo names: alphanumeric, hyphens, underscores, dots, must start with alphanumeric.
_GITHUB_NAME_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
def extract_github_repos(text: str) -> set[str]:
@@ -50,7 +51,7 @@ def build_graphql_query(repos: list[str]) -> str:
parts = []
for i, repo in enumerate(repos):
owner, name = repo.split("/", 1)
if not _GITHUB_NAME_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
if not _GITHUB_OWNER_RE.match(owner) or not _GITHUB_NAME_RE.match(name):
continue
parts.append(
f'repo_{i}: repository(owner: "{owner}", name: "{name}") '