[analyze-memory] Add function call frequency analysis (#14779)

This commit is contained in:
J. Nick Koston
2026-03-13 13:20:17 -10:00
committed by GitHub
parent ab3b677113
commit 22062d79a2
2 changed files with 162 additions and 3 deletions
+53 -3
View File
@@ -1,6 +1,6 @@
"""Memory usage analyzer for ESPHome compiled binaries.""" """Memory usage analyzer for ESPHome compiled binaries."""
from collections import defaultdict from collections import Counter, defaultdict
from dataclasses import dataclass, field from dataclasses import dataclass, field
import logging import logging
from pathlib import Path from pathlib import Path
@@ -40,6 +40,15 @@ _READELF_SECTION_PATTERN = re.compile(
r"\s*\[\s*\d+\]\s+([\.\w]+)\s+\w+\s+[\da-fA-F]+\s+[\da-fA-F]+\s+([\da-fA-F]+)" r"\s*\[\s*\d+\]\s+([\.\w]+)\s+\w+\s+[\da-fA-F]+\s+[\da-fA-F]+\s+([\da-fA-F]+)"
) )
# Regex for extracting call targets from objdump disassembly
# Matches direct call instructions across architectures:
# Xtensa: call0/call4/call8/call12/callx0/callx4/callx8/callx12 <addr> <symbol>
# ARM: bl/blx <addr> <symbol>
# Captures the mangled symbol name inside angle brackets.
_CALL_TARGET_PATTERN = re.compile(
r"\t(?:call(?:0|4|8|12)|callx(?:0|4|8|12)|blx?)\s+[\da-fA-F]+ <([^>]+)>"
)
# Component category prefixes # Component category prefixes
_COMPONENT_PREFIX_ESPHOME = "[esphome]" _COMPONENT_PREFIX_ESPHOME = "[esphome]"
_COMPONENT_PREFIX_EXTERNAL = "[external]" _COMPONENT_PREFIX_EXTERNAL = "[external]"
@@ -197,6 +206,8 @@ class MemoryAnalyzer:
self._lib_hash_to_name: dict[str, str] = {} self._lib_hash_to_name: dict[str, str] = {}
# Heuristic category to library redirect: "mdns_lib" -> "[lib]mdns" # Heuristic category to library redirect: "mdns_lib" -> "[lib]mdns"
self._heuristic_to_lib: dict[str, str] = {} self._heuristic_to_lib: dict[str, str] = {}
# Function call counts: mangled_name -> call_count
self._function_call_counts: Counter[str] = Counter()
def analyze(self) -> dict[str, ComponentMemory]: def analyze(self) -> dict[str, ComponentMemory]:
"""Analyze the ELF file and return component memory usage.""" """Analyze the ELF file and return component memory usage."""
@@ -206,6 +217,7 @@ class MemoryAnalyzer:
self._categorize_symbols() self._categorize_symbols()
self._analyze_cswtch_symbols() self._analyze_cswtch_symbols()
self._analyze_sdk_libraries() self._analyze_sdk_libraries()
self._analyze_function_calls()
return dict(self.components) return dict(self.components)
def _parse_sections(self) -> None: def _parse_sections(self) -> None:
@@ -384,8 +396,9 @@ class MemoryAnalyzer:
return return
_LOGGER.info("Demangling %d symbols", len(symbols)) _LOGGER.info("Demangling %d symbols", len(symbols))
self._demangle_cache = batch_demangle(symbols, objdump_path=self.objdump_path) demangled = batch_demangle(symbols, objdump_path=self.objdump_path)
_LOGGER.info("Successfully demangled %d symbols", len(self._demangle_cache)) self._demangle_cache.update(demangled)
_LOGGER.info("Successfully demangled %d symbols", len(demangled))
def _demangle_symbol(self, symbol: str) -> str: def _demangle_symbol(self, symbol: str) -> str:
"""Get demangled C++ symbol name from cache.""" """Get demangled C++ symbol name from cache."""
@@ -1011,6 +1024,43 @@ class MemoryAnalyzer:
total_size, total_size,
) )
def _analyze_function_calls(self) -> None:
"""Count function call sites by parsing disassembly output.
Parses direct call instructions (call0/call8/bl/blx) from objdump -d
to count how many times each function is called. This helps identify
inlining candidates — frequently called small functions benefit most
from inlining.
"""
result = run_tool(
[self.objdump_path, "-d", str(self.elf_path)],
timeout=60,
)
if result is None or result.returncode != 0:
_LOGGER.debug("Failed to disassemble ELF for function call analysis")
return
self._function_call_counts = Counter(
match.group(1)
for line in result.stdout.splitlines()
if (match := _CALL_TARGET_PATTERN.search(line))
)
# Demangle any call targets not already in the cache
missing = [
name
for name in self._function_call_counts
if name not in self._demangle_cache
]
if missing:
self._batch_demangle_symbols(missing)
_LOGGER.debug(
"Function call analysis: %d unique targets, %d total calls",
len(self._function_call_counts),
sum(self._function_call_counts.values()),
)
def get_unattributed_ram(self) -> tuple[int, int, int]: def get_unattributed_ram(self) -> tuple[int, int, int]:
"""Get unattributed RAM sizes (SDK/framework overhead). """Get unattributed RAM sizes (SDK/framework overhead).
+109
View File
@@ -231,6 +231,110 @@ class MemoryAnalyzerCLI(MemoryAnalyzer):
lines.append(f" {size:>6,} B {sym_name}") lines.append(f" {size:>6,} B {sym_name}")
lines.append("") lines.append("")
# Number of top called functions to show
TOP_CALLS_LIMIT: int = 50
# Number of inlining candidates to show
INLINE_CANDIDATES_LIMIT: int = 25
# Maximum function size in bytes to consider for inlining
INLINE_SIZE_THRESHOLD: int = 16
def _build_symbol_sizes(self) -> dict[str, int]:
"""Build a size lookup from all component symbols: mangled_name -> size."""
return {
symbol: size
for symbols in self._component_symbols.values()
for symbol, _, size, _ in symbols
}
def _format_call_row(
self, index: int, mangled: str, count: int, symbol_sizes: dict[str, int]
) -> str:
"""Format a single row for call frequency tables."""
demangled = self._demangle_cache.get(mangled, mangled)
if len(demangled) > 80:
demangled = f"{demangled[:77]}..."
size = symbol_sizes.get(mangled)
size_str = f"{size:>5,} B" if size is not None else " ?"
return f"{index:>3} {count:>5} {size_str} {demangled}"
def _add_call_table_header(self, lines: list[str]) -> None:
"""Add the header row for call frequency tables."""
lines.append(f"{'#':>3} {'Calls':>5} {'Size':>7} Function")
lines.append(f"{'---':>3} {'-----':>5} {'-------':>7} {'-' * 60}")
def _add_function_call_analysis(self, lines: list[str]) -> None:
"""Add function call frequency analysis section.
Shows the most frequently called functions by call site count.
"""
self._add_section_header(lines, "Top Called Functions")
symbol_sizes = self._build_symbol_sizes()
# Sort by call count descending
sorted_calls = sorted(
self._function_call_counts.items(), key=lambda x: x[1], reverse=True
)
self._add_call_table_header(lines)
for i, (mangled, count) in enumerate(sorted_calls[: self.TOP_CALLS_LIMIT]):
lines.append(self._format_call_row(i + 1, mangled, count, symbol_sizes))
total_calls = sum(self._function_call_counts.values())
lines.append("")
lines.append(
f"Total: {len(self._function_call_counts)} unique targets, "
f"{total_calls:,} call sites"
)
lines.append("")
def _add_inline_candidates(self, lines: list[str]) -> None:
"""Add inlining candidates section.
Shows frequently called functions that are small enough to benefit
from inlining (< 16 bytes). These are the best candidates for
reducing call overhead.
"""
self._add_section_header(
lines,
f"Inlining Candidates (<{self.INLINE_SIZE_THRESHOLD} B, by call count)",
)
symbol_sizes = self._build_symbol_sizes()
# Filter to small functions with known size, sort by call count
candidates = sorted(
(
(mangled, count)
for mangled, count in self._function_call_counts.items()
if mangled in symbol_sizes
and symbol_sizes[mangled] < self.INLINE_SIZE_THRESHOLD
),
key=lambda x: x[1],
reverse=True,
)
if not candidates:
lines.append("No candidates found.")
lines.append("")
return
self._add_call_table_header(lines)
for i, (mangled, count) in enumerate(
candidates[: self.INLINE_CANDIDATES_LIMIT]
):
lines.append(self._format_call_row(i + 1, mangled, count, symbol_sizes))
lines.append("")
lines.append(
f"Showing top {min(len(candidates), self.INLINE_CANDIDATES_LIMIT)} "
f"of {len(candidates)} functions under "
f"{self.INLINE_SIZE_THRESHOLD} B"
)
lines.append("")
def generate_report(self, detailed: bool = False) -> str: def generate_report(self, detailed: bool = False) -> str:
"""Generate a formatted memory report.""" """Generate a formatted memory report."""
components = sorted( components = sorted(
@@ -533,6 +637,11 @@ class MemoryAnalyzerCLI(MemoryAnalyzer):
if self._cswtch_symbols: if self._cswtch_symbols:
self._add_cswtch_analysis(lines) self._add_cswtch_analysis(lines)
# Function call frequency analysis
if self._function_call_counts:
self._add_function_call_analysis(lines)
self._add_inline_candidates(lines)
lines.append( lines.append(
"Note: This analysis covers symbols in the ELF file. Some runtime allocations may not be included." "Note: This analysis covers symbols in the ELF file. Some runtime allocations may not be included."
) )