Files
nuttx/tools/stackusage.py
Xiang Xiao fe217e533b tools/stackusage.py: use toolchain binaries instead of Python libraries
Replace pyelftools and capstone Python library dependencies with
subprocess calls to standard toolchain binaries (readelf, objdump,
addr2line). This eliminates all external Python package requirements,
removes manual DWARF parsing, and reduces the code from 818 to 563
lines.

Works with both GNU and LLVM toolchains via the -p/--prefix option:
  -p arm-none-eabi-  (GCC cross-compiler)
  -p llvm-           (LLVM/clang toolchain)

Signed-off-by: Xiang Xiao <xiaoxiang@xiaomi.com>
2026-03-17 11:26:44 -03:00

679 lines
21 KiB
Python
Executable File

#!/usr/bin/env python3
# tools/stackusage.py
#
# SPDX-License-Identifier: Apache-2.0
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. The
# ASF licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
"""Static stack usage analyzer using DWARF debug info and disassembly.
Extracts per-function stack frame sizes from DWARF .debug_frame CFA offsets
(via readelf -wF) and builds call graphs from objdump -d disassembly.
Reports worst-case total stack depth through the call chain, flagging
uncertain cases such as recursion and indirect calls (function pointers).
Works with both GNU and LLVM toolchains (readelf/objdump/addr2line).
No Python package dependencies required.
"""
import argparse
import os
import re
import subprocess
import sys
# Call mnemonics across all supported architectures.
# Direct vs indirect is determined by whether objdump annotates
# the target with <function_name>.
CALL_MNEMONICS = {
"bl",
"blx",
"blr", # ARM / AArch64
"call",
"callq", # x86 / x86_64 / SPARC
"jal",
"jalr", # MIPS / RISC-V
"bctrl", # PowerPC
"jmpl", # SPARC
"bsr",
"jsr", # SuperH
"call0",
"call4",
"call8",
"call12", # Xtensa
"callx0",
"callx4",
"callx8",
"callx12", # Xtensa indirect
"calli", # TriCore
}
# Function names that indicate dynamic stack allocation.
ALLOCA_NAMES = frozenset(
{
"alloca",
"__alloca",
"__builtin_alloca",
"__builtin_alloca_with_align",
}
)
def build_symbol_map(prefix, elf):
"""Build address<->name maps by parsing ``readelf -sW`` output.
Returns (addr_to_name, name_to_addr, func_sizes, is_thumb, machine).
"""
out = subprocess.run(
[prefix + "readelf", "-hsW", elf],
capture_output=True,
text=True,
)
if out.returncode != 0:
return {}, {}, {}, False, ""
addr_to_name = {}
name_to_addr = {}
func_sizes = {}
is_thumb = False
machine = ""
for line in out.stdout.splitlines():
# Detect machine type from readelf header
if "Machine:" in line:
machine = line.split(":", 1)[1].strip()
if "ARM" in machine and "AArch64" not in machine:
is_thumb = True
continue
# Symbol table line format (readelf -sW):
# Num: Value Size Type Bind Vis Ndx Name
# 1: 08000100 64 FUNC GLOBAL DEFAULT 1 main
parts = line.split()
if len(parts) < 8:
continue
if parts[3] != "FUNC":
continue
if parts[6] == "UND":
continue
try:
addr = int(parts[1], 16)
size = int(parts[2])
except ValueError:
continue
name = parts[7]
if name.startswith("$"):
continue
if is_thumb:
addr = addr & ~1
if addr in addr_to_name and len(addr_to_name[addr]) >= len(name):
continue
addr_to_name[addr] = name
name_to_addr[name] = addr
func_sizes[addr] = size
return addr_to_name, name_to_addr, func_sizes, is_thumb, machine
def parse_stack_sizes(prefix, elf, addr_to_name, name_to_addr, func_sizes):
"""Parse ``readelf -wF`` output for per-function CFA stack sizes.
Returns funcs dict: name -> {file, line, self, code_size, qualifier,
reasons}.
"""
out = subprocess.run(
[prefix + "readelf", "-wF", elf],
capture_output=True,
text=True,
)
funcs = {}
current_addr = None
max_offset = 0
for line in out.stdout.splitlines():
# FDE header: look for pc=ADDR..ADDR
m = re.search(r"pc=([0-9a-f]+)\.\.", line, re.IGNORECASE)
if m:
# Flush previous FDE
if current_addr is not None:
name = addr_to_name.get(current_addr)
if name and name not in funcs:
code_size = func_sizes.get(current_addr, 0)
if not code_size:
sym_addr = name_to_addr.get(name)
if sym_addr is not None:
code_size = func_sizes.get(sym_addr, 0)
funcs[name] = {
"file": "?",
"line": "?",
"self": max_offset,
"code_size": code_size,
"qualifier": "static",
"reasons": [],
}
current_addr = int(m.group(1), 16)
# Also try with Thumb bit cleared
if current_addr not in addr_to_name:
alt = current_addr & ~1
if alt in addr_to_name:
current_addr = alt
max_offset = 0
continue
# CFA row: extract offset from e.g. "sp+16" or "r13+24"
m = re.search(r"[a-zA-Z]\w*\+(\d+)", line)
if m and current_addr is not None:
val = int(m.group(1))
if val > max_offset:
max_offset = val
# Flush last FDE
if current_addr is not None:
name = addr_to_name.get(current_addr)
if name and name not in funcs:
code_size = func_sizes.get(current_addr, 0)
if not code_size:
sym_addr = name_to_addr.get(name)
if sym_addr is not None:
code_size = func_sizes.get(sym_addr, 0)
funcs[name] = {
"file": "?",
"line": "?",
"self": max_offset,
"code_size": code_size,
"qualifier": "static",
"reasons": [],
}
return funcs
def resolve_sources(prefix, elf, funcs, name_to_addr):
"""Batch-resolve source file:line via ``addr2line -fe``."""
addrs = []
names = []
for name in funcs:
addr = name_to_addr.get(name)
if addr is not None:
addrs.append("%x" % addr)
names.append(name)
if not addrs:
return
out = subprocess.run(
[prefix + "addr2line", "-fe", elf],
capture_output=True,
text=True,
input="\n".join(addrs) + "\n",
)
lines = out.stdout.splitlines()
# addr2line outputs two lines per address: function_name, file:line
for i in range(0, min(len(lines), len(names) * 2), 2):
idx = i // 2
if idx >= len(names):
break
if i + 1 < len(lines):
loc = lines[i + 1]
if loc and loc != "??:0" and loc != "??:?":
parts = loc.rsplit(":", 1)
if len(parts) == 2:
funcs[names[idx]]["file"] = parts[0]
funcs[names[idx]]["line"] = parts[1]
def _sp_pattern(machine):
"""Return a compiled regex matching the stack pointer register.
Architecture-specific SP register names:
ARM: sp, r13
AArch64: sp
x86: %esp, %rsp
MIPS: $sp, $29
RISC-V: sp, x2
Xtensa: a1
PowerPC: r1
SPARC: %sp, %o6
SuperH: r15
TriCore: a10, sp
"""
m = machine.upper()
if "ARM" in m and "AARCH64" not in m:
return re.compile(r"\bsp\b|\br13\b", re.IGNORECASE)
if "AARCH64" in m:
return re.compile(r"\bsp\b", re.IGNORECASE)
if "80386" in m or "X86" in m or "AMD" in m:
return re.compile(r"%[er]sp\b", re.IGNORECASE)
if "MIPS" in m:
return re.compile(r"\$sp\b|\$29\b", re.IGNORECASE)
if "RISC-V" in m:
return re.compile(r"\bsp\b|\bx2\b", re.IGNORECASE)
if "XTENSA" in m or "TENSILICA" in m:
return re.compile(r"\ba1\b", re.IGNORECASE)
if "POWER" in m or "PPC" in m:
return re.compile(r"\br1\b", re.IGNORECASE)
if "SPARC" in m:
return re.compile(r"\bsp\b|%sp\b|%o6\b", re.IGNORECASE)
if "SH" in m or "SUPER" in m or "RENESAS" in m:
return re.compile(r"\br15\b", re.IGNORECASE)
if "TRICORE" in m:
return re.compile(r"\ba10\b|\bsp\b", re.IGNORECASE)
# Fallback: common SP names
return re.compile(r"\bsp\b", re.IGNORECASE)
def _is_dynamic_sp_sub(mnemonic, operand_str, sp_re):
"""Return True if instruction dynamically adjusts the stack pointer.
Detects VLA / alloca patterns where a sub instruction adjusts SP
by a register operand (not an immediate constant).
"""
if not mnemonic.startswith("sub"):
return False
ops = operand_str.lower()
if not sp_re.search(ops):
return False
# ARM/AArch64: immediates use # prefix
if "#" in ops:
return False
# x86 AT&T syntax: $ means immediate
if "%" in ops and "$" in ops:
return False
return True
def parse_call_graph(prefix, elf, machine):
"""Build call graph by parsing ``objdump -d`` output.
Returns (graph, dynamic_funcs) where:
graph: dict mapping caller -> set of (callee_name_or_None, is_indirect).
dynamic_funcs: set of function names with dynamic stack (alloca/VLA).
"""
sp_re = _sp_pattern(machine)
graph = {}
dynamic_funcs = set()
current_func = None
proc = subprocess.Popen(
[prefix + "objdump", "-d", elf],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
)
for line in proc.stdout:
# Function boundary: "0000abcd <func_name>:"
m = re.match(r"^[0-9a-f]+ <(.+)>:\s*$", line)
if m:
current_func = m.group(1)
graph.setdefault(current_func, set())
continue
if current_func is None:
continue
# Instruction line: " addr: bytes mnemonic operands"
parts = line.split("\t")
if len(parts) < 3:
continue
mnemonic = parts[2].strip().split()[0].lower()
# Extract operand string (used by both call and dynamic-SP checks)
operands = parts[2].strip().split(None, 1)
operand_str = operands[1] if len(operands) > 1 else ""
if len(parts) > 3:
operand_str += " " + parts[3]
# Check for call instructions
if mnemonic in CALL_MNEMONICS:
target_m = re.search(r"<([^>+]+)>", operand_str)
if target_m:
callee = target_m.group(1)
if callee in ALLOCA_NAMES:
dynamic_funcs.add(current_func)
else:
graph[current_func].add((callee, False))
else:
graph[current_func].add((None, True))
continue
# Check for dynamic stack pointer adjustment (VLA / inlined alloca)
if _is_dynamic_sp_sub(mnemonic, operand_str, sp_re):
dynamic_funcs.add(current_func)
proc.wait()
return graph, dynamic_funcs
def compute_worst_stack(funcs, graph, recursion_depth):
"""Compute worst-case total stack for every function via memoized DFS.
Args:
funcs: dict from parse_stack_sizes
graph: dict from parse_call_graph
recursion_depth: how many times a recursive cycle body is counted
(0 = back-edges contribute nothing)
Returns dict mapping function name to result dict with keys:
self, total, uncertain, reasons, stack, self_reasons
"""
cache = {}
def dfs(func, path):
if func in cache:
return cache[func]
info = funcs.get(func)
self_size = info["self"] if info else 0
self_reasons = set(info["reasons"]) if info else {"no DWARF data"}
reasons = set(self_reasons)
uncertain = bool(reasons)
callees = graph.get(func, set())
worst_callee = 0
best_substack = []
for callee, is_indirect in callees:
if is_indirect:
uncertain = True
reasons.add("indirect call (function pointer)")
self_reasons.add("indirect call (function pointer)")
continue
cur_path = path + [func]
if callee in cur_path:
# Recursion detected
uncertain = True
idx = cur_path.index(callee)
cycle = cur_path[idx:] + [callee]
reason = "recursion: %s" % "->".join(cycle)
reasons.add(reason)
self_reasons.add(reason)
if recursion_depth > 0:
cycle_nodes = cycle[:-1]
cycle_cost = sum(
funcs[c]["self"] if c in funcs else 0 for c in cycle_nodes
)
cycle_total = cycle_cost * recursion_depth
if cycle_total > worst_callee:
worst_callee = cycle_total
est_frames = []
for _ in range(recursion_depth):
for node in cycle_nodes:
est_frames.append((node, "recursive estimate"))
best_substack = est_frames
continue
callee_total, callee_unc, callee_reasons, callee_stack, _ = dfs(
callee, cur_path
)
if callee_unc:
uncertain = True
reasons.update(callee_reasons)
if callee_total > worst_callee:
worst_callee = callee_total
best_substack = callee_stack
total = self_size + worst_callee
result = (
total,
uncertain,
reasons,
[(func, None)] + best_substack,
self_reasons,
)
cache[func] = result
return result
results = {}
all_funcs = set(funcs.keys()) | set(graph.keys())
for func in all_funcs:
total, uncertain, reasons, stack, self_reasons = dfs(func, [])
# Skip symbols with no DWARF data and zero stack
if func not in funcs and total == 0:
continue
results[func] = {
"self": funcs[func]["self"] if func in funcs else 0,
"total": total,
"uncertain": uncertain,
"reasons": list(reasons),
"stack": stack,
"self_reasons": list(self_reasons),
}
return results
def reason_marker(reasons):
"""Return a single-char marker for uncertainty.
Marker priority (highest first):
? = no DWARF data
* = dynamic stack (alloca/VLA)
@ = recursion
^ = indirect call (function pointer)
"""
for reason in reasons:
if reason == "no DWARF data":
return "?"
for reason in reasons:
if reason.startswith("dynamic stack"):
return "*"
for reason in reasons:
if reason.startswith("recursion:"):
return "@"
for reason in reasons:
if reason == "indirect call (function pointer)":
return "^"
return ""
def frame_info(funcs, results, frame):
"""Return (size_str, func_name, file:line) for one stack frame.
size_str is prefixed with a marker indicating the function's own
uncertainty type (see reason_marker). func_name includes the code
size in parentheses when available, e.g. ``main(128)``.
"""
func, _note = frame
info = funcs.get(func)
res = results.get(func)
if info:
self_reasons = res["self_reasons"] if res else info["reasons"]
prefix = reason_marker(self_reasons)
code_size = info.get("code_size", 0)
name = "%s(%d)" % (func, code_size) if code_size else func
return (
"%s%d" % (prefix, info["self"]),
name,
"%s:%s" % (info["file"], info["line"]),
)
return ("?", func, "?:?")
def format_text(results, funcs, rank):
"""Format results as aligned table with deepest call stack frames."""
# Compute common path prefix to shorten File:Line column
paths = [
f["file"]
for f in funcs.values()
if f["file"] not in ("?", "") and f["file"].startswith("/")
]
if paths:
prefix = os.path.commonpath(paths)
if not prefix.endswith("/"):
prefix += "/"
else:
prefix = ""
maxname = 40
items = sorted(results.items(), key=lambda x: x[1]["total"], reverse=True)
if rank > 0:
items = items[:rank]
hdr = ("Total", "Self", "Backtrace", "File:Line")
w = list(map(len, hdr))
rows = []
for func, info in items:
frames = [frame_info(funcs, results, f) for f in info["stack"]]
# Strip common prefix from file paths
if prefix:
plen = len(prefix)
frames = [
(
sz,
fn,
loc[plen:] if loc.startswith(prefix) else loc,
)
for sz, fn, loc in frames
]
# Derive Total marker from the displayed stack frames' own reasons,
# not from all propagated reasons across every branch.
stack_reasons = []
for f, _note in info["stack"]:
res = results.get(f)
if res:
stack_reasons.extend(res["self_reasons"])
elif f not in funcs:
stack_reasons.append("no DWARF data")
marker = reason_marker(stack_reasons) if info["uncertain"] else ""
total_str = "%s%d" % (marker, info["total"])
w[0] = max(w[0], len(total_str))
for sz, fn, loc in frames:
w[1] = max(w[1], len(sz))
w[2] = max(w[2], min(len(fn), maxname))
w[3] = max(w[3], len(loc))
rows.append((total_str, frames))
def trunc(s):
return s[: maxname - 3] + "..." if len(s) > maxname else s
fmt = "%%%ds %%%ds %%-%ds %%s" % (w[0], w[1], w[2])
lines = [fmt % hdr]
lines.append(fmt % ("-" * w[0], "-" * w[1], "-" * w[2], "-" * w[3]))
for total_str, frames in rows:
for i, (sz, fn, loc) in enumerate(frames):
if i == 0:
lines.append(fmt % (total_str, sz, trunc(fn + "~"), loc))
else:
lines.append(fmt % ("", sz, trunc(fn), loc))
if not frames:
lines.append(fmt % (total_str, "", "", ""))
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="Analyze per-function stack usage from DWARF debug info, "
"combined with disassembly-based call graph analysis to compute "
"worst-case total (self + callees) stack depth.",
)
parser.add_argument(
"elf",
help="path to ELF file with DWARF debug info",
)
parser.add_argument(
"-p",
"--prefix",
default="",
help="toolchain prefix (e.g. arm-none-eabi- or llvm-)",
)
parser.add_argument(
"-n",
"--rank",
type=int,
default=0,
help="show top N functions (default: 0 = all)",
)
parser.add_argument(
"-r",
"--recursion-depth",
type=int,
default=0,
help="assumed recursion depth for stack estimation (default: 0). "
"When > 0, recursive cycle cost is multiplied by this value.",
)
args = parser.parse_args()
if not os.path.isfile(args.elf):
print("ELF file not found: %s" % args.elf, file=sys.stderr)
sys.exit(1)
# Phase 1: Symbol table
addr_to_name, name_to_addr, func_sizes, is_thumb, machine = build_symbol_map(
args.prefix,
args.elf,
)
if not addr_to_name:
print("No function symbols found in ELF", file=sys.stderr)
sys.exit(1)
# Phase 2: Stack sizes from DWARF .debug_frame
funcs = parse_stack_sizes(
args.prefix,
args.elf,
addr_to_name,
name_to_addr,
func_sizes,
)
if not funcs:
print(
"No stack size data found in DWARF .debug_frame",
file=sys.stderr,
)
sys.exit(1)
# Phase 3: Source locations via addr2line
resolve_sources(args.prefix, args.elf, funcs, name_to_addr)
# Phase 4: Call graph via objdump
graph, dynamic_funcs = parse_call_graph(args.prefix, args.elf, machine)
# Mark functions with dynamic stack allocation (alloca/VLA)
for name in dynamic_funcs:
if name in funcs:
funcs[name]["reasons"].append("dynamic stack (alloca/VLA)")
results = compute_worst_stack(funcs, graph, args.recursion_depth)
print(format_text(results, funcs, args.rank))
if __name__ == "__main__":
main()