diff --git a/script/ci_memory_impact_comment.py b/script/ci_memory_impact_comment.py index a2961306457..01316da27fe 100755 --- a/script/ci_memory_impact_comment.py +++ b/script/ci_memory_impact_comment.py @@ -160,6 +160,76 @@ def format_change(before: int, after: int, threshold: float | None = None) -> st return f"{emoji} {delta_str} ({pct_str})" +def _sig_base(sym: str) -> str: + """Strip argument types from a symbol name for fuzzy matching. + + Removes the entire outermost parenthesized argument list (including + the parentheses) from the symbol string. + + This makes, for example, "foo(int)::nested" and "foo(float)::nested" + share the same key "foo::nested", while "foo(int)" maps to "foo" and + therefore does NOT collide with "foo(int)::nested". + """ + start = sym.find("(") + if start == -1: + return sym + end = sym.rfind(")") + if end == -1: + return sym + return sym[:start] + sym[end + 1 :] + + +_AMBIGUOUS = object() + + +def _match_signature_changes( + changed_symbols: list[tuple[str, int, int, int]], + new_symbols: list[tuple[str, int]], + removed_symbols: list[tuple[str, int]], +) -> tuple[ + list[tuple[str, int, int, int]], + list[tuple[str, int]], + list[tuple[str, int]], +]: + """Match new/removed symbol pairs that only differ in argument types. + + When a function's argument types change (e.g. foo(vector<>&) -> foo(Buffer&)), + it appears as a new + removed symbol. This matches them by base name and moves + them to changed_symbols. Only matches unambiguous 1:1 pairs. + """ + if not new_symbols or not removed_symbols: + return changed_symbols, new_symbols, removed_symbols + + # Build base -> entry maps; mark ambiguous bases with sentinel + new_by_base: dict[str, tuple[str, int] | object] = {} + for entry in new_symbols: + base = _sig_base(entry[0]) + new_by_base[base] = _AMBIGUOUS if base in new_by_base else entry + removed_by_base: dict[str, tuple[str, int] | object] = {} + for entry in removed_symbols: + base = _sig_base(entry[0]) + removed_by_base[base] = _AMBIGUOUS if base in removed_by_base else entry + + matched: set[str] = set() # matched base keys + for base, new_entry in new_by_base.items(): + if new_entry is _AMBIGUOUS: + continue + rem_entry = removed_by_base.get(base) + if rem_entry is None or rem_entry is _AMBIGUOUS: + continue + pr_sym, pr_size = new_entry + _rm_sym, target_size = rem_entry + delta = pr_size - target_size + if delta != 0: + changed_symbols.append((pr_sym, target_size, pr_size, delta)) + matched.add(base) + + if matched: + new_symbols = [e for e in new_symbols if _sig_base(e[0]) not in matched] + removed_symbols = [e for e in removed_symbols if _sig_base(e[0]) not in matched] + return changed_symbols, new_symbols, removed_symbols + + def prepare_symbol_changes_data( target_symbols: dict | None, pr_symbols: dict | None ) -> dict | None: @@ -200,6 +270,11 @@ def prepare_symbol_changes_data( delta = pr_size - target_size changed_symbols.append((symbol, target_size, pr_size, delta)) + # Match new/removed symbols that only differ in argument types + changed_symbols, new_symbols, removed_symbols = _match_signature_changes( + changed_symbols, new_symbols, removed_symbols + ) + if not changed_symbols and not new_symbols and not removed_symbols: return None diff --git a/tests/unit_tests/analyze_memory/__init__.py b/tests/unit_tests/analyze_memory/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/unit_tests/analyze_memory/test_ci_memory_impact_comment.py b/tests/unit_tests/analyze_memory/test_ci_memory_impact_comment.py new file mode 100644 index 00000000000..8399ac0303c --- /dev/null +++ b/tests/unit_tests/analyze_memory/test_ci_memory_impact_comment.py @@ -0,0 +1,99 @@ +"""Tests for script/ci_memory_impact_comment.py symbol matching.""" + +from pathlib import Path +import sys + +# Add script directory to path so we can import the module +sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "script")) + +from ci_memory_impact_comment import prepare_symbol_changes_data # noqa: E402 + + +def test_prepare_symbol_changes_signature_match() -> None: + """Symbols with same base name but different args are matched as changed.""" + target = { + "Foo::bar(std::vector&, int)": 300, + "unchanged()": 50, + } + pr = { + "Foo::bar(ProtoByteBuffer&, int)": 320, + "unchanged()": 50, + } + result = prepare_symbol_changes_data(target, pr) + assert result is not None + assert len(result["changed_symbols"]) == 1 + assert len(result["new_symbols"]) == 0 + assert len(result["removed_symbols"]) == 0 + sym, t_size, p_size, delta = result["changed_symbols"][0] + assert sym == "Foo::bar(ProtoByteBuffer&, int)" + assert t_size == 300 + assert p_size == 320 + assert delta == 20 + + +def test_prepare_symbol_changes_ambiguous_overloads_not_matched() -> None: + """Multiple overloads with same base name stay as new/removed.""" + target = { + "Foo::bar(int)": 100, + "Foo::bar(float)": 200, + } + pr = { + "Foo::bar(double)": 150, + "Foo::bar(long)": 250, + } + result = prepare_symbol_changes_data(target, pr) + assert result is not None + assert len(result["changed_symbols"]) == 0 + assert len(result["new_symbols"]) == 2 + assert len(result["removed_symbols"]) == 2 + + +def test_prepare_symbol_changes_no_parens_not_matched() -> None: + """Symbols without parens (variables) are not fuzzy-matched.""" + target = {"my_global_var": 100} + pr = {"my_global_var_v2": 120} + result = prepare_symbol_changes_data(target, pr) + assert result is not None + assert len(result["changed_symbols"]) == 0 + assert len(result["new_symbols"]) == 1 + assert len(result["removed_symbols"]) == 1 + + +def test_prepare_symbol_changes_nested_symbols_matched_separately() -> None: + """Nested symbols like ::__pstr__ don't collide with parent function.""" + target = { + "Foo::bar(std::vector&, int)": 300, + "Foo::bar(std::vector&, int)::__pstr__": 19, + } + pr = { + "Foo::bar(ProtoByteBuffer&, int)": 320, + "Foo::bar(ProtoByteBuffer&, int)::__pstr__": 19, + } + result = prepare_symbol_changes_data(target, pr) + assert result is not None + # Both the function and its nested __pstr__ should be matched (not new/removed) + assert len(result["new_symbols"]) == 0 + assert len(result["removed_symbols"]) == 0 + # __pstr__ has delta=0 so it's silently dropped, only the function shows + assert len(result["changed_symbols"]) == 1 + sym, t_size, p_size, delta = result["changed_symbols"][0] + assert sym == "Foo::bar(ProtoByteBuffer&, int)" + assert delta == 20 + + +def test_prepare_symbol_changes_exact_match_preferred() -> None: + """Exact name matches are found before fuzzy matching runs.""" + target = { + "Foo::bar(int)": 100, + } + pr = { + "Foo::bar(int)": 120, + } + result = prepare_symbol_changes_data(target, pr) + assert result is not None + assert len(result["changed_symbols"]) == 1 + assert len(result["new_symbols"]) == 0 + assert len(result["removed_symbols"]) == 0 + sym, t_size, p_size, delta = result["changed_symbols"][0] + assert sym == "Foo::bar(int)" + assert delta == 20