[ci] Match symbols with changed signatures in memory impact analysis (#14600)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
J. Nick Koston
2026-03-08 17:23:58 -10:00
committed by GitHub
parent 5d3893368d
commit 088a8a4338
3 changed files with 174 additions and 0 deletions
+75
View File
@@ -160,6 +160,76 @@ def format_change(before: int, after: int, threshold: float | None = None) -> st
return f"{emoji} {delta_str} ({pct_str})"
def _sig_base(sym: str) -> str:
"""Strip argument types from a symbol name for fuzzy matching.
Removes the entire outermost parenthesized argument list (including
the parentheses) from the symbol string.
This makes, for example, "foo(int)::nested" and "foo(float)::nested"
share the same key "foo::nested", while "foo(int)" maps to "foo" and
therefore does NOT collide with "foo(int)::nested".
"""
start = sym.find("(")
if start == -1:
return sym
end = sym.rfind(")")
if end == -1:
return sym
return sym[:start] + sym[end + 1 :]
_AMBIGUOUS = object()
def _match_signature_changes(
changed_symbols: list[tuple[str, int, int, int]],
new_symbols: list[tuple[str, int]],
removed_symbols: list[tuple[str, int]],
) -> tuple[
list[tuple[str, int, int, int]],
list[tuple[str, int]],
list[tuple[str, int]],
]:
"""Match new/removed symbol pairs that only differ in argument types.
When a function's argument types change (e.g. foo(vector<>&) -> foo(Buffer&)),
it appears as a new + removed symbol. This matches them by base name and moves
them to changed_symbols. Only matches unambiguous 1:1 pairs.
"""
if not new_symbols or not removed_symbols:
return changed_symbols, new_symbols, removed_symbols
# Build base -> entry maps; mark ambiguous bases with sentinel
new_by_base: dict[str, tuple[str, int] | object] = {}
for entry in new_symbols:
base = _sig_base(entry[0])
new_by_base[base] = _AMBIGUOUS if base in new_by_base else entry
removed_by_base: dict[str, tuple[str, int] | object] = {}
for entry in removed_symbols:
base = _sig_base(entry[0])
removed_by_base[base] = _AMBIGUOUS if base in removed_by_base else entry
matched: set[str] = set() # matched base keys
for base, new_entry in new_by_base.items():
if new_entry is _AMBIGUOUS:
continue
rem_entry = removed_by_base.get(base)
if rem_entry is None or rem_entry is _AMBIGUOUS:
continue
pr_sym, pr_size = new_entry
_rm_sym, target_size = rem_entry
delta = pr_size - target_size
if delta != 0:
changed_symbols.append((pr_sym, target_size, pr_size, delta))
matched.add(base)
if matched:
new_symbols = [e for e in new_symbols if _sig_base(e[0]) not in matched]
removed_symbols = [e for e in removed_symbols if _sig_base(e[0]) not in matched]
return changed_symbols, new_symbols, removed_symbols
def prepare_symbol_changes_data(
target_symbols: dict | None, pr_symbols: dict | None
) -> dict | None:
@@ -200,6 +270,11 @@ def prepare_symbol_changes_data(
delta = pr_size - target_size
changed_symbols.append((symbol, target_size, pr_size, delta))
# Match new/removed symbols that only differ in argument types
changed_symbols, new_symbols, removed_symbols = _match_signature_changes(
changed_symbols, new_symbols, removed_symbols
)
if not changed_symbols and not new_symbols and not removed_symbols:
return None
@@ -0,0 +1,99 @@
"""Tests for script/ci_memory_impact_comment.py symbol matching."""
from pathlib import Path
import sys
# Add script directory to path so we can import the module
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "script"))
from ci_memory_impact_comment import prepare_symbol_changes_data # noqa: E402
def test_prepare_symbol_changes_signature_match() -> None:
"""Symbols with same base name but different args are matched as changed."""
target = {
"Foo::bar(std::vector<unsigned char>&, int)": 300,
"unchanged()": 50,
}
pr = {
"Foo::bar(ProtoByteBuffer&, int)": 320,
"unchanged()": 50,
}
result = prepare_symbol_changes_data(target, pr)
assert result is not None
assert len(result["changed_symbols"]) == 1
assert len(result["new_symbols"]) == 0
assert len(result["removed_symbols"]) == 0
sym, t_size, p_size, delta = result["changed_symbols"][0]
assert sym == "Foo::bar(ProtoByteBuffer&, int)"
assert t_size == 300
assert p_size == 320
assert delta == 20
def test_prepare_symbol_changes_ambiguous_overloads_not_matched() -> None:
"""Multiple overloads with same base name stay as new/removed."""
target = {
"Foo::bar(int)": 100,
"Foo::bar(float)": 200,
}
pr = {
"Foo::bar(double)": 150,
"Foo::bar(long)": 250,
}
result = prepare_symbol_changes_data(target, pr)
assert result is not None
assert len(result["changed_symbols"]) == 0
assert len(result["new_symbols"]) == 2
assert len(result["removed_symbols"]) == 2
def test_prepare_symbol_changes_no_parens_not_matched() -> None:
"""Symbols without parens (variables) are not fuzzy-matched."""
target = {"my_global_var": 100}
pr = {"my_global_var_v2": 120}
result = prepare_symbol_changes_data(target, pr)
assert result is not None
assert len(result["changed_symbols"]) == 0
assert len(result["new_symbols"]) == 1
assert len(result["removed_symbols"]) == 1
def test_prepare_symbol_changes_nested_symbols_matched_separately() -> None:
"""Nested symbols like ::__pstr__ don't collide with parent function."""
target = {
"Foo::bar(std::vector<unsigned char>&, int)": 300,
"Foo::bar(std::vector<unsigned char>&, int)::__pstr__": 19,
}
pr = {
"Foo::bar(ProtoByteBuffer&, int)": 320,
"Foo::bar(ProtoByteBuffer&, int)::__pstr__": 19,
}
result = prepare_symbol_changes_data(target, pr)
assert result is not None
# Both the function and its nested __pstr__ should be matched (not new/removed)
assert len(result["new_symbols"]) == 0
assert len(result["removed_symbols"]) == 0
# __pstr__ has delta=0 so it's silently dropped, only the function shows
assert len(result["changed_symbols"]) == 1
sym, t_size, p_size, delta = result["changed_symbols"][0]
assert sym == "Foo::bar(ProtoByteBuffer&, int)"
assert delta == 20
def test_prepare_symbol_changes_exact_match_preferred() -> None:
"""Exact name matches are found before fuzzy matching runs."""
target = {
"Foo::bar(int)": 100,
}
pr = {
"Foo::bar(int)": 120,
}
result = prepare_symbol_changes_data(target, pr)
assert result is not None
assert len(result["changed_symbols"]) == 1
assert len(result["new_symbols"]) == 0
assert len(result["removed_symbols"]) == 0
sym, t_size, p_size, delta = result["changed_symbols"][0]
assert sym == "Foo::bar(int)"
assert delta == 20