Files
nuttx/tools/abi_check.py
anjiahao adddfc3bfd abi_check.py:Check ABI compatibility between different ELF versions.
1. The input consists of multiple static libraries and an ELF file. The tool searches
   for external APIs used by the static libraries, then locates these API function signatures
   in the ELF file, and outputs the results as a JSON file.
2. Using the first feature, with the static libraries unchanged,
   the tool can take a new ELF file and an old ELF file as input, output two JSON files,
   and compare the function signatures of functions with the same name in the two JSON files.
   The comparison includes return values, parameters, and if they are structures,
   it also compares the structure size, member offsets, member types, etc.
3.When the input is a single ELF file, the tool can check if structures with the same name have different members.

Signed-off-by: anjiahao <anjiahao@xiaomi.com>
2026-01-28 18:32:39 +01:00

595 lines
19 KiB
Python
Executable File

#!/usr/bin/env python3
# tools/abi_check.py
#
# SPDX-License-Identifier: Apache-2.0
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership. The
# ASF licenses this file to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import argparse
import json
import os
import re
import subprocess
import sys
import tempfile
from itertools import zip_longest
from elftools.elf.elffile import ELFFile
program_description = """
This tool is used to check the binary compatibility of static libraries and has the following features:
1. The input consists of multiple static libraries and an ELF file. The tool searches
for external APIs used by the static libraries, then locates these API function signatures
in the ELF file, and outputs the results as a JSON file.
2. Using the first feature, with the static libraries unchanged,
the tool can take a new ELF file and an old ELF file as input, output two JSON files,
and compare the function signatures of functions with the same name in the two JSON files.
The comparison includes return values, parameters, and if they are structures,
it also compares the structure size, member offsets, member types, etc.
3.When the input is a single ELF file, the tool can check if structures with the same name have different members.
"""
struct_re = re.compile(r"struct\s+(\w+)\s+{")
member_re = re.compile(r"{(.*?)};", re.DOTALL)
file_re = re.compile(r"/\*\s*<[\da-f]+>\s+([\w\/\.\-_]+\.h:\d+)\s*\*/", re.DOTALL)
def member_mismatch(member1, member2):
member1 = member1.replace("_Bool", "bool")
member1 = member1.replace(" ", "")
member2 = member2.replace("_Bool", "bool")
member2 = member2.replace(" ", "")
return member1 != member2
def print_struct(name, member, fileinfo):
print(f"struct {name} {{")
print(member)
print(f"}}; at {fileinfo}")
def struct_check(elf):
p = subprocess.Popen(
["pahole", "-M", "--sort", "-I", elf],
env=os.environ.copy(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
out, err = p.communicate()
if p.returncode != 0:
print(f"Error: {err}")
sys.exit(1)
struct_blocks = re.split(r"/\* Used at:", out)
structs = {}
for block in struct_blocks:
name_match = struct_re.search(block)
if not name_match:
continue
struct_name = name_match.group(1)
if not struct_name:
continue
member_match = member_re.search(block)
if not member_match:
continue
file_match = file_re.search(block)
if not file_match:
continue
members = member_match.group(1)
fileinfo = file_match.group(1)
if struct_name in structs and member_mismatch(structs[struct_name][0], members):
print_struct(struct_name, structs[struct_name][0], structs[struct_name][1])
print("------")
print_struct(struct_name, members, fileinfo)
print("")
else:
structs[struct_name] = (members, fileinfo)
def objfile_iter(path):
"""Get .obj file from .a file"""
temp_dir = tempfile.mkdtemp()
for p in path:
subprocess.run(["ar", "x", p], cwd=temp_dir)
for filename in os.listdir(temp_dir):
yield os.path.join(temp_dir, filename)
def symbol_collect(path):
"""get undefined and defined symbols from an object file"""
obj = ELFFile.load_from_path(path)
symtab = obj.get_section_by_name(".symtab")
if symtab is None:
return (None, None)
undef = []
defined = []
for symbol in symtab.iter_symbols():
if (
symbol["st_info"]["type"] == "STT_NOTYPE"
and symbol["st_info"]["bind"] == "STB_GLOBAL"
):
undef.append(symbol.name)
else:
defined.append(symbol.name)
return undef, defined
def resolve_type(dwarfinfo, die):
"""Get type name and size from DIE"""
if die is None:
return ("void", 0)
type_size = attr.value if (attr := die.attributes.get("DW_AT_byte_size")) else 0
def get_name(die, prefix="", suffix="", default="void", size=0):
name = die.attributes.get("DW_AT_name")
return (
f"{prefix}{name.value.decode('utf-8')}{suffix}" if name else default,
size,
)
if die.tag == "DW_TAG_base_type":
return get_name(die, size=type_size)
if die.tag == "DW_TAG_pointer_type":
base_type_die = resolve_referenced_die(dwarfinfo, die)
if base_type_die:
name, size = resolve_type(dwarfinfo, base_type_die)
return (f"{name}*", type_size)
else:
return ("void*", type_size)
if die.tag == "DW_TAG_typedef":
base_type_die = resolve_referenced_die(dwarfinfo, die)
_, size = resolve_type(dwarfinfo, base_type_die)
return get_name(die, default="unknown", size=size)
if die.tag == "DW_TAG_structure_type":
return get_name(
die, prefix="struct ", default="anonymous struct", size=type_size
)
if die.tag == "DW_TAG_union_type":
return get_name(die, prefix="union ", default="anonymous union", size=type_size)
if die.tag == "DW_TAG_enumeration_type":
return get_name(die, prefix="enum ", default="anonymous enum", size=type_size)
if die.tag == "DW_TAG_subroutine_type":
base_type_die = resolve_referenced_die(dwarfinfo, die)
return resolve_type(dwarfinfo, base_type_die)
if die.tag == "DW_TAG_array_type":
base_type_die = resolve_referenced_die(dwarfinfo, die)
name, _ = resolve_type(dwarfinfo, base_type_die)
size = 0
for child in die.iter_children():
if (
child.tag == "DW_TAG_subrange_type"
and "DW_AT_upper_bound" in child.attributes
):
size = child.attributes["DW_AT_upper_bound"].value + 1
break
return (f"{name}[{size}]", size)
if die.tag == "DW_TAG_const_type":
base_type_die = resolve_referenced_die(dwarfinfo, die)
name, size = resolve_type(dwarfinfo, base_type_die)
return (f"const {name}", size)
return ("void", type_size)
def resolve_referenced_die(dwarfinfo, die):
"""Get the referenced DIE"""
if "DW_AT_type" in die.attributes:
type_offset = die.attributes["DW_AT_type"].value
cu_offset = die.cu.cu_offset
absolute_offset = type_offset + cu_offset
return dwarfinfo.get_DIE_from_refaddr(absolute_offset)
elif "DW_AT_specification" in die.attributes:
type_offset = die.attributes["DW_AT_specification"].value
cu_offset = die.cu.cu_offset
absolute_offset = type_offset + cu_offset
return dwarfinfo.get_DIE_from_refaddr(absolute_offset())
return None
def resolve_field_info(dwarfinfo, die):
"""Get field information from DIE"""
if die.tag != "DW_TAG_member":
return None
field_name = die.attributes.get("DW_AT_name", None)
field_name = field_name.value.decode("utf-8")
field_type_die = resolve_referenced_die(dwarfinfo, die)
field_type, field_size = resolve_type(dwarfinfo, field_type_die)
field_offset = die.attributes.get("DW_AT_data_member_location", None)
field_offset = field_offset.value if field_offset else "unknown"
return {
"name": field_name,
"type": field_type,
"size": field_size,
"offset": field_offset,
}
def find_combination_die(dwarfinfo, die):
if die.tag == "DW_TAG_structure_type" or die.tag == "DW_TAG_structure_type":
return die
if "DW_AT_type" in die.attributes:
base_type_die = resolve_referenced_die(dwarfinfo, die)
return find_combination_die(dwarfinfo, base_type_die)
return None
def resolve_combination_type(dwarfinfo, die):
combination_die = find_combination_die(dwarfinfo, die)
if not combination_die:
return []
field_info = []
for child in combination_die.iter_children():
if child.tag == "DW_TAG_member":
field_info.append(resolve_field_info(dwarfinfo, child))
return field_info
def get_die_file_path(die, dwarfinfo):
cu = die.cu
name = cu.get_top_DIE().attributes.get("DW_AT_name")
return name.value.decode("utf-8") if name else "unknown file"
def die_is_prototyped(die):
return (
die.tag == "DW_TAG_subprogram"
and "DW_AT_external" in die.attributes
and "DW_AT_name" in die.attributes
and "DW_AT_prototyped" in die.attributes
)
def die_is_real_function(die):
return (
die.tag == "DW_TAG_subprogram"
and "DW_AT_external" in die.attributes
and "DW_AT_name" in die.attributes
and ("DW_AT_low_pc" in die.attributes or "DW_AT_inline" in die.attributes)
)
def find_function_signature(elf_path, function_list, die_check):
signature = []
function_list = function_list.copy()
with open(elf_path, "rb") as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
print("No DWARF debug info found in the ELF file.")
return (signature, function_list)
dwarf_info = elffile.get_dwarf_info()
for CU in dwarf_info.iter_CUs():
for die in (die for die in CU.iter_DIEs() if die_check(die)):
name = die.attributes["DW_AT_name"].value.decode("utf-8")
if "DW_AT_linkage_name" in die.attributes:
if name not in function_list:
name = die.attributes["DW_AT_linkage_name"].value.decode(
"utf-8"
)
if name not in function_list:
continue
prototype = {}
return_type = "void"
return_type_size = 0
retrun_type_field = []
if "DW_AT_type" in die.attributes:
return_type_ref = resolve_referenced_die(dwarf_info, die)
return_type, return_type_size = resolve_type(
dwarf_info, return_type_ref
)
retrun_type_field = resolve_combination_type(
dwarf_info, return_type_ref
)
prototype["return"] = {
"type": return_type,
"size": return_type_size,
"field": retrun_type_field,
}
prototype["parameters"] = []
for child in die.iter_children():
if child.tag != "DW_TAG_formal_parameter":
continue
param_type_ref = resolve_referenced_die(dwarf_info, child)
param_type, param_type_size = resolve_type(
dwarf_info, param_type_ref
)
param_name = (
child.attributes["DW_AT_name"].value.decode("utf-8")
if "DW_AT_name" in child.attributes
else "unnamed"
)
param_type_field = resolve_combination_type(
dwarf_info, param_type_ref
)
prototype["parameters"].append(
{
"name": param_name,
"type": param_type,
"size": param_type_size,
"field": param_type_field,
}
)
prototype["file_path"] = get_die_file_path(die, dwarf_info)
signature.append((name, prototype))
function_list.remove(name)
dwarf_info._cu_cache.clear()
dwarf_info._cu_offsets_map.clear()
return (signature, function_list)
def print_function_signature(function_name, prototype):
print(f"{prototype['return']['type']} {function_name} (", end="")
for i in range(len(prototype["parameters"])):
print(
f"{prototype['parameters'][i]['type']} {prototype['parameters'][i]['name']}",
end="",
)
if i != len(prototype["parameters"]) - 1:
print(", ", end="")
print(");")
print(
f" Return type {prototype['return']['type']} size: {prototype['return']['size']}"
)
if len(prototype["return"]["field"]) != 0:
print(" Return type fields:")
for field in prototype["return"]["field"]:
print(
f" - Name: {field['name']}, Type: {field['type']}, Size: {field['size']}, Offset: {field['offset']}"
)
for param in prototype["parameters"]:
print(f" Parameter {param['name']} type {param['type']} size: {param['size']}")
if len(param["field"]) != 0:
print(" Parameter fields:")
for field in param["field"]:
print(
f" - Name: {field['name']}, Type: {field['type']}, Size: {field['size']}, Offset: {field['offset']}"
)
print(f"File at {prototype['file_path']}")
def dump_result(result):
for function_name, prototype in result[0]:
print_function_signature(function_name, prototype)
for u in result[1]:
print(f"Function {u} not found in the elf file")
def prototype_diff(prototype1, prototype2):
if prototype1["type"] != prototype2["type"]:
print(
f" Prototype {prototype1['type']} is different with {prototype2['type']}"
)
print(f" {prototype1['type']} have Different:")
if prototype1["field"] != [] and prototype2["field"] != []:
for field1, field2 in zip_longest(prototype1["field"], prototype2["field"]):
if field1 is None and field2 is not None:
print(f" Field2 {field2['type']} not found in field1")
continue
if field1 is None and field2 is not None:
print(f" Field1 {field1['type']} not found in field2")
continue
if field1["type"] != field2["type"]:
print(f" Field {field1['type']} type is different")
if field1["size"] != field2["size"]:
print(f" Field {field1['type']} size is different")
if field1["offset"] != field2["offset"]:
print(f" Field {field1['type']} offset is different")
else:
if prototype1["size"] != prototype2["size"]:
print(
f" size is different {prototype1['size']} != {prototype2['size']}"
)
def diff(result1, result2):
dict1 = dict(result1)
dict2 = dict(result2)
for function_name in dict1.keys():
if function_name not in dict2:
print(f"Function {function_name} not found in the second result")
continue
prototype1 = dict1[function_name]
prototype2 = dict2[function_name]
if prototype1["return"] != prototype2["return"]:
print(f"Function {function_name} return type is different")
prototype_diff(prototype1["return"], prototype2["return"])
print("")
if len(prototype1["parameters"]) != len(prototype2["parameters"]):
print(f"Function {function_name} parameters count is different")
print("")
continue
i = 0
for param1, param2 in zip(prototype1["parameters"], prototype2["parameters"]):
i += 1
if (
param1["type"] != param2["type"]
or param1["size"] != param2["size"]
or param1["field"] != param2["field"]
):
print(f"Function {function_name} parameter {i} is different")
prototype_diff(param1, param2)
print("")
def find_signature_lib(args, undef):
signature = []
for objfile in objfile_iter(args.lib):
if result := find_function_signature(objfile, undef, die_is_prototyped):
signature += result[0]
undef = result[1]
result = []
result.append(signature)
result.append(undef)
return result
def parse_symbols(args):
undef = []
defined = []
for objfile in objfile_iter(args.lib):
u, d = symbol_collect(objfile)
if u is not None:
undef.extend(u)
if d is not None:
defined.extend(d)
undef = list(set(undef))
defined = list(set(defined))
for i in range(len(undef) - 1, -1, -1):
if undef[i] in defined:
undef.remove(undef[i])
signature, not_found = find_function_signature(
args.elf, undef, die_is_real_function
)
signature = sorted(signature, key=lambda x: x[0])
not_found = sorted(not_found)
result = []
result.append(signature)
result.append(not_found)
if args.check:
lib_result = find_signature_lib(args, undef)
json.dump(list(lib_result), open(f"lib_{args.json}", "w"), indent=4)
if args.dump:
print("Dump Elf function signature")
dump_result(result)
json.dump(list(result), open(f"{args.json}", "w"), indent=4)
def args_parse():
parser = argparse.ArgumentParser(
description=program_description, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("-a", "--lib", nargs="+", help="Path to liba.so or lib.a")
parser.add_argument("-e", "--elf", help="Path to elf file")
parser.add_argument(
"-c",
"--check",
action="store_true",
help="""If the static library contains debug information,
try to find the function in the static library,
and output the result to lib_<json> file
""",
)
parser.add_argument("-d", "--dump", action="store_true", help="Dump result")
parser.add_argument(
"-j", "--json", default="out.json", help="Save result to json file"
)
parser.add_argument(
"-s", "--struct_check", action="store_true", help="Dump struct different"
)
parser.add_argument("-i", "--input_json", nargs=2, help="Diff two json files")
args = parser.parse_args()
if (not args.input_json and not args.struct_check) and (
args.elf is None or args.lib is None
):
print("Error: elf and lib must be provided")
exit(1)
if args.struct_check and args.elf is None:
print("Error: elf must be provided")
exit(1)
if os.path.exists(args.json):
print(f"Warning file {args.json} already exists, Will be overwritten")
if args.check and os.path.exists(f"lib_{args.json}"):
print(f"Warning file lib_{args.json} already exists, Will be overwritten")
return args
if __name__ == "__main__":
args = args_parse()
if args.input_json:
result1 = json.load(open(args.input_json[0]))
result2 = json.load(open(args.input_json[1]))
diff(result1[0], result2[0])
elif args.struct_check:
struct_check(args.elf)
else:
parse_symbols(args)