[analyze-memory] Add function call frequency analysis (#14779)

This commit is contained in:
J. Nick Koston
2026-03-13 13:20:17 -10:00
committed by GitHub
parent ab3b677113
commit 22062d79a2
2 changed files with 162 additions and 3 deletions
+53 -3
View File
@@ -1,6 +1,6 @@
"""Memory usage analyzer for ESPHome compiled binaries."""
from collections import defaultdict
from collections import Counter, defaultdict
from dataclasses import dataclass, field
import logging
from pathlib import Path
@@ -40,6 +40,15 @@ _READELF_SECTION_PATTERN = re.compile(
r"\s*\[\s*\d+\]\s+([\.\w]+)\s+\w+\s+[\da-fA-F]+\s+[\da-fA-F]+\s+([\da-fA-F]+)"
)
# Regex for extracting call targets from objdump disassembly
# Matches direct call instructions across architectures:
# Xtensa: call0/call4/call8/call12/callx0/callx4/callx8/callx12 <addr> <symbol>
# ARM: bl/blx <addr> <symbol>
# Captures the mangled symbol name inside angle brackets.
_CALL_TARGET_PATTERN = re.compile(
r"\t(?:call(?:0|4|8|12)|callx(?:0|4|8|12)|blx?)\s+[\da-fA-F]+ <([^>]+)>"
)
# Component category prefixes
_COMPONENT_PREFIX_ESPHOME = "[esphome]"
_COMPONENT_PREFIX_EXTERNAL = "[external]"
@@ -197,6 +206,8 @@ class MemoryAnalyzer:
self._lib_hash_to_name: dict[str, str] = {}
# Heuristic category to library redirect: "mdns_lib" -> "[lib]mdns"
self._heuristic_to_lib: dict[str, str] = {}
# Function call counts: mangled_name -> call_count
self._function_call_counts: Counter[str] = Counter()
def analyze(self) -> dict[str, ComponentMemory]:
"""Analyze the ELF file and return component memory usage."""
@@ -206,6 +217,7 @@ class MemoryAnalyzer:
self._categorize_symbols()
self._analyze_cswtch_symbols()
self._analyze_sdk_libraries()
self._analyze_function_calls()
return dict(self.components)
def _parse_sections(self) -> None:
@@ -384,8 +396,9 @@ class MemoryAnalyzer:
return
_LOGGER.info("Demangling %d symbols", len(symbols))
self._demangle_cache = batch_demangle(symbols, objdump_path=self.objdump_path)
_LOGGER.info("Successfully demangled %d symbols", len(self._demangle_cache))
demangled = batch_demangle(symbols, objdump_path=self.objdump_path)
self._demangle_cache.update(demangled)
_LOGGER.info("Successfully demangled %d symbols", len(demangled))
def _demangle_symbol(self, symbol: str) -> str:
"""Get demangled C++ symbol name from cache."""
@@ -1011,6 +1024,43 @@ class MemoryAnalyzer:
total_size,
)
def _analyze_function_calls(self) -> None:
"""Count function call sites by parsing disassembly output.
Parses direct call instructions (call0/call8/bl/blx) from objdump -d
to count how many times each function is called. This helps identify
inlining candidates — frequently called small functions benefit most
from inlining.
"""
result = run_tool(
[self.objdump_path, "-d", str(self.elf_path)],
timeout=60,
)
if result is None or result.returncode != 0:
_LOGGER.debug("Failed to disassemble ELF for function call analysis")
return
self._function_call_counts = Counter(
match.group(1)
for line in result.stdout.splitlines()
if (match := _CALL_TARGET_PATTERN.search(line))
)
# Demangle any call targets not already in the cache
missing = [
name
for name in self._function_call_counts
if name not in self._demangle_cache
]
if missing:
self._batch_demangle_symbols(missing)
_LOGGER.debug(
"Function call analysis: %d unique targets, %d total calls",
len(self._function_call_counts),
sum(self._function_call_counts.values()),
)
def get_unattributed_ram(self) -> tuple[int, int, int]:
"""Get unattributed RAM sizes (SDK/framework overhead).
+109
View File
@@ -231,6 +231,110 @@ class MemoryAnalyzerCLI(MemoryAnalyzer):
lines.append(f" {size:>6,} B {sym_name}")
lines.append("")
# Number of top called functions to show
TOP_CALLS_LIMIT: int = 50
# Number of inlining candidates to show
INLINE_CANDIDATES_LIMIT: int = 25
# Maximum function size in bytes to consider for inlining
INLINE_SIZE_THRESHOLD: int = 16
def _build_symbol_sizes(self) -> dict[str, int]:
"""Build a size lookup from all component symbols: mangled_name -> size."""
return {
symbol: size
for symbols in self._component_symbols.values()
for symbol, _, size, _ in symbols
}
def _format_call_row(
self, index: int, mangled: str, count: int, symbol_sizes: dict[str, int]
) -> str:
"""Format a single row for call frequency tables."""
demangled = self._demangle_cache.get(mangled, mangled)
if len(demangled) > 80:
demangled = f"{demangled[:77]}..."
size = symbol_sizes.get(mangled)
size_str = f"{size:>5,} B" if size is not None else " ?"
return f"{index:>3} {count:>5} {size_str} {demangled}"
def _add_call_table_header(self, lines: list[str]) -> None:
"""Add the header row for call frequency tables."""
lines.append(f"{'#':>3} {'Calls':>5} {'Size':>7} Function")
lines.append(f"{'---':>3} {'-----':>5} {'-------':>7} {'-' * 60}")
def _add_function_call_analysis(self, lines: list[str]) -> None:
"""Add function call frequency analysis section.
Shows the most frequently called functions by call site count.
"""
self._add_section_header(lines, "Top Called Functions")
symbol_sizes = self._build_symbol_sizes()
# Sort by call count descending
sorted_calls = sorted(
self._function_call_counts.items(), key=lambda x: x[1], reverse=True
)
self._add_call_table_header(lines)
for i, (mangled, count) in enumerate(sorted_calls[: self.TOP_CALLS_LIMIT]):
lines.append(self._format_call_row(i + 1, mangled, count, symbol_sizes))
total_calls = sum(self._function_call_counts.values())
lines.append("")
lines.append(
f"Total: {len(self._function_call_counts)} unique targets, "
f"{total_calls:,} call sites"
)
lines.append("")
def _add_inline_candidates(self, lines: list[str]) -> None:
"""Add inlining candidates section.
Shows frequently called functions that are small enough to benefit
from inlining (< 16 bytes). These are the best candidates for
reducing call overhead.
"""
self._add_section_header(
lines,
f"Inlining Candidates (<{self.INLINE_SIZE_THRESHOLD} B, by call count)",
)
symbol_sizes = self._build_symbol_sizes()
# Filter to small functions with known size, sort by call count
candidates = sorted(
(
(mangled, count)
for mangled, count in self._function_call_counts.items()
if mangled in symbol_sizes
and symbol_sizes[mangled] < self.INLINE_SIZE_THRESHOLD
),
key=lambda x: x[1],
reverse=True,
)
if not candidates:
lines.append("No candidates found.")
lines.append("")
return
self._add_call_table_header(lines)
for i, (mangled, count) in enumerate(
candidates[: self.INLINE_CANDIDATES_LIMIT]
):
lines.append(self._format_call_row(i + 1, mangled, count, symbol_sizes))
lines.append("")
lines.append(
f"Showing top {min(len(candidates), self.INLINE_CANDIDATES_LIMIT)} "
f"of {len(candidates)} functions under "
f"{self.INLINE_SIZE_THRESHOLD} B"
)
lines.append("")
def generate_report(self, detailed: bool = False) -> str:
"""Generate a formatted memory report."""
components = sorted(
@@ -533,6 +637,11 @@ class MemoryAnalyzerCLI(MemoryAnalyzer):
if self._cswtch_symbols:
self._add_cswtch_analysis(lines)
# Function call frequency analysis
if self._function_call_counts:
self._add_function_call_analysis(lines)
self._add_inline_candidates(lines)
lines.append(
"Note: This analysis covers symbols in the ELF file. Some runtime allocations may not be included."
)