GP-5308 - Decompiler flattening; some optional PDB tweaks for discussion

This commit is contained in:
ghizard
2024-12-12 07:51:23 -05:00
parent 4d9441abc8
commit ae71313b2d
4 changed files with 406 additions and 19 deletions
@@ -27,6 +27,7 @@ import ghidra.app.util.pdb.classtype.*;
import ghidra.program.model.data.*;
import ghidra.program.model.gclass.ClassID;
import ghidra.program.model.gclass.ClassUtils;
import ghidra.util.InvalidNameException;
import ghidra.util.Msg;
import ghidra.util.exception.CancelledException;
import ghidra.util.task.TaskMonitor;
@@ -43,6 +44,11 @@ public class CppCompositeType {
private static final String VIRTUAL_BASE_SPECULATIVE_COMMENT =
"Virtual Base - Speculative Placement";
private static final boolean CREATE_BASE_NAMES =
Boolean.getBoolean("ghidra.pdb.createBaseNames");
private static final boolean CREATE_MY_DATA = Boolean.getBoolean("ghidra.pdb.createMyData");
private boolean isFinal;
private ClassKey classKey;
private String className; // String for now.
@@ -1230,7 +1236,7 @@ public class CppCompositeType {
*/
private void createClassLayout(MsVxtManager vxtManager, ObjectOrientedClassLayout layoutOptions,
TaskMonitor monitor) throws CancelledException, PdbException {
List<ClassPdbMember> selfBaseMembers = getSelfBaseClassMembers();
List<ClassPdbMember> selfBaseMembers = getSelfBaseClassMembers(monitor);
mainVft = getMainVft(vxtManager);
if (mainVft != null) {
updateMainVft();
@@ -1249,8 +1255,8 @@ public class CppCompositeType {
selfBaseMembers, msg -> Msg.warn(this, msg), monitor)) {
clearComponents(composite);
}
ClassPdbMember directClassPdbMember =
new ClassPdbMember("", selfBaseType, false, 0, SELF_BASE_COMMENT);
ClassPdbMember directClassPdbMember = new ClassPdbMember(getBaseClassName(selfBaseType),
selfBaseType, false, 0, SELF_BASE_COMMENT);
mainVbt = getMainVbt(vxtManager);
if (mainVbt != null) {
@@ -1286,7 +1292,34 @@ public class CppCompositeType {
sourceHierarchy = determineBaseSourceOrder();
composite.setDescription(sourceHierarchy);
}
/**
* Temporary debug method
* @param applicator the applicator
* @param structure the input structure
* @return the flattened structure or null if could not or didn't need to be flattened
*/
public static Structure createFlattenedTemp(DefaultPdbApplicator applicator,
Structure structure) {
Structure f = ClassUtils.getReplacementType(structure);
if (f == null) {
return null;
}
// CategoryPath p = ClassUtils.getClassInternalsPath(f);
CategoryPath p = f.getCategoryPath();
if (f instanceof StructureDataType s) {
String n = structure.getName() + "_TEMP_Flattened";
try {
s.setName(n);
s.setCategoryPath(p);
return s;
}
catch (InvalidNameException e) {
//
}
}
return null;
}
// Taken from PdbUtil without change. Would have had to change access on class PdbUtil and
@@ -1316,7 +1349,8 @@ public class CppCompositeType {
* regular members
* @return the members
*/
private List<ClassPdbMember> getSelfBaseClassMembers() {
private List<ClassPdbMember> getSelfBaseClassMembers(TaskMonitor monitor)
throws CancelledException {
// Using TreeMap to get base classes and vxtptrs in the correct order. None of these
// should have the same offset unless there are zero-sized base classes in play. Found
// examples, however where some "empty" base classes were given unique offsets (e.g., 12,
@@ -1370,8 +1404,8 @@ public class CppCompositeType {
String comment = BASE_COMMENT;
Composite baseDataType = base.getSelfBaseDataType();
// This does not have attributes like "Member" does (consider changes?)
ClassPdbMember classPdbMember =
new ClassPdbMember("", baseDataType, false, offset, comment);
ClassPdbMember classPdbMember = new ClassPdbMember(getBaseClassName(baseDataType),
baseDataType, false, offset, comment);
map.put(new OffsetOrdinal(offset, ordinal++), classPdbMember);
}
hasZeroBaseSize = hasZeroParentBaseSize;
@@ -1402,14 +1436,7 @@ public class CppCompositeType {
List<ClassPdbMember> members = new ArrayList<>(map.values());
int lastOffset = members.isEmpty() ? -1 : members.getLast().getOffset();
List<ClassPdbMember> standardMembers = new ArrayList<>();
for (Member member : layoutMembers) {
ClassPdbMember classPdbMember =
new ClassPdbMember(member.getName(), member.getDataType(),
member.isFlexibleArray(), member.getOffset(), member.getComment());
standardMembers.add(classPdbMember);
//members.add(classPdbMember);
}
List<ClassPdbMember> standardMembers = getStandardMembers(monitor);
int firstStandardOffset =
standardMembers.isEmpty() ? lastOffset + 1 : standardMembers.getFirst().getOffset();
@@ -1456,7 +1483,8 @@ public class CppCompositeType {
Composite baseDataType = base.getSelfBaseDataType();
// This does not have attributes
ClassPdbMember classPdbMember =
new ClassPdbMember("", baseDataType, false, offset.intValue(), comment);
new ClassPdbMember(getBaseClassName(baseDataType), baseDataType, false,
offset.intValue(), comment);
map.put(offset, classPdbMember);
accumulatedComment = "";
}
@@ -1472,6 +1500,56 @@ public class CppCompositeType {
return map;
}
private String getBaseClassName(Composite baseDataType) {
if (!CREATE_BASE_NAMES) {
return "";
}
return baseDataType.getName() + "_base";
}
private List<ClassPdbMember> getStandardMembers(TaskMonitor monitor)
throws CancelledException {
List<ClassPdbMember> members = new ArrayList<>();
if (layoutMembers.isEmpty()) {
return members;
}
if (CREATE_MY_DATA) {
int minOffset = Integer.MAX_VALUE;
for (Member member : layoutMembers) {
minOffset = Integer.min(minOffset, member.getOffset());
}
for (Member member : layoutMembers) {
// subtracts minOffset
ClassPdbMember classPdbMember =
new ClassPdbMember(member.getName(), member.getDataType(),
member.isFlexibleArray(), member.getOffset() - minOffset,
member.getComment());
members.add(classPdbMember);
}
DataTypePath selfBasePath = createSelfBaseCategoryPath(this); // use same path as self
String dataName = composite.getName() + "_data";
Composite data = new StructureDataType(selfBasePath.getCategoryPath(), dataName, 0,
composite.getDataTypeManager());
data.setDescription("Data of " + selfBasePath.getDataTypeName());
if (!DefaultCompositeMember.applyDataTypeMembers(data, false, false, 0,
members, msg -> Msg.warn(this, msg), monitor)) {
clearComponents(composite);
}
members.clear();
members.add(new ClassPdbMember(dataName, data, false, minOffset, ""));
}
else {
// does not subtract minOffset
for (Member member : layoutMembers) {
ClassPdbMember classPdbMember =
new ClassPdbMember(member.getName(), member.getDataType(),
member.isFlexibleArray(), member.getOffset(), member.getComment());
members.add(classPdbMember);
}
}
return members;
}
/**
* Finds all virtual base and virtual function pointers in the hierarchy of this class's
* self base.
@@ -81,6 +81,9 @@ public class DefaultPdbApplicator implements PdbApplicator {
private static final String THUNK_NAME_PREFIX = "[thunk]:";
private static final boolean CREATE_FLATTENED_CLASSES =
Boolean.getBoolean("ghidra.pdb.createFlattenedClasses");
//==============================================================================================
private static final String PDB_ANALYSIS_LOOKUP_STATE = "PDB_UNIVERSAL_ANALYSIS_STATE";
@@ -373,6 +376,7 @@ public class DefaultPdbApplicator implements PdbApplicator {
processTypes();
processSymbols();
vxtManager.createTables(dataTypeManager, ClearDataMode.CLEAR_ALL_CONFLICT_DATA);
doTempResearch();
break;
default:
throw new PdbException("PDB: Invalid Application Control: " +
@@ -381,6 +385,20 @@ public class DefaultPdbApplicator implements PdbApplicator {
Msg.info(this, "PDB Types and Main Symbols Processing Terminated Normally");
}
private void doTempResearch() {
if (!CREATE_FLATTENED_CLASSES) {
return;
}
for (CppCompositeType cppType : classTypeByMsTypeNum.values()) {
if (cppType.getComposite() instanceof Structure s) {
Structure x = CppCompositeType.createFlattenedTemp(this, s);
if (x != null) {
resolve(x);
}
}
}
}
private void doDisassemblyWork() throws PdbException, CancelledException {
if (program != null) {
disassembleFunctions();
@@ -15,11 +15,19 @@
*/
package ghidra.program.model.gclass;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import ghidra.app.util.SymbolPath;
import ghidra.program.model.data.*;
/**
* Utility class for Class-related software modeling.
* <p>
* This class is experimental and subject to unannounced changes, including changes to processing
* philosophies and removal of methods
*/
public class ClassUtils {
@@ -303,4 +311,262 @@ public class ClassUtils {
return new CategoryPath(category, symbolPath.getName());
}
/**
* Finds and returns list of replacement pointer types for the specified owner class structure
* @param dtm the data type manager
* @param type the class structure type
* @return the map of offset to owner replacement types
*/
public static Map<Long, Pointer> getReplacementPointers(DataTypeManager dtm,
Structure type) {
CategoryPath path = getClassPath(type);
Map<Long, Pointer> results = new HashMap<>();
Category category = dtm.getCategory(path);
if (category == null) {
return results;
}
for (DataType dt : category.getDataTypes()) {
if (!(dt instanceof Structure struct)) {
continue;
}
Long offset =
ClassUtils.validateVtableDescriptionOffsetTag(struct.getDescription());
if (offset == null) {
continue;
}
Pointer vxtptr = new PointerDataType(struct);
results.put(offset, vxtptr);
}
return results;
}
/**
* Record containing a name and a pointer data type
* @param name the name
* @param pointer the pointer type
*/
public static record NameAndPointer(String name, DataType pointer) {}
/**
* Tries to provide an appropriate data type replacement for special components, particularly
* for class objects such as virtual function table and virtual base table pointers within
* a flattened class structure
* @param component the component to be checked
* @param accumulatedOffset the accumulated offset of the component due to flattening
* @param ownerVxtptrs the map of offset to owner vxtptr types
* @return the replacement data type or the original type if there is no replacement needed
*/
public static NameAndPointer getReplacementType(DataTypeComponent component,
long accumulatedOffset, Map<Long, Pointer> ownerVxtptrs) {
if (!hasReplaceAttribute(component)) {
return null;
}
String fieldName = component.getFieldName();
Pointer vxtptr = ownerVxtptrs.get(accumulatedOffset);
if (vxtptr == null) {
return null;
}
DataType dt = vxtptr.getDataType();
String dtName = dt.getName(); // We are not using the full path name
String newFieldName;
if (dtName.startsWith(ClassUtils.VTABLE)) {
if (!ClassUtils.VTPTR.equals(fieldName)) {
return null;
}
newFieldName = fieldName + dtName.substring(VTABLE.length()); //crash if not more char
}
else if (dtName.startsWith(ClassUtils.VBTABLE)) {
if (!ClassUtils.VBPTR.equals(fieldName)) {
return null;
}
newFieldName = fieldName + dtName.substring(VFTABLE.length()); //crash if not more char
}
else if (dtName.startsWith(ClassUtils.VFTABLE)) {
if (!ClassUtils.VFPTR.equals(fieldName)) {
return null;
}
newFieldName = fieldName + dtName.substring(VBTABLE.length()); //crash if not more char
}
else {
return null;
}
return new NameAndPointer(newFieldName, vxtptr);
}
/**
* Tries to provide an appropriate data type replacement for special components, particularly
* for class objects such as virtual function table and virtual base table pointers within
* a flattened class structure. The {@code structure} argument becomes the return type if
* {@code enabled} is {@code false}, if the argument structure does not have class
* attributes, or if there is no suitable replacement for it
* @param structure the structure to process
* @param enabled {@code false} will immediately return the argument type
* @return the replacement data type or null if could not or did not need to be replaced
*/
public static Structure getReplacementType(Structure structure, boolean enabled) {
if (!enabled) {
return structure;
}
if (!hasClassAttribute(structure)) {
return structure;
}
Structure replacement = getReplacementType(structure);
return replacement == null ? structure : replacement;
}
/**
* Tries to provide an appropriate data type replacement for special components, particularly
* for class objects such as virtual function table and virtual base table pointers within
* a flattened class structure
* @param structure the structure to process
* @return the replacement data type or null if could not or did not need to be replaced
*/
public static Structure getReplacementType(Structure structure) {
DataTypeManager dtm = structure.getDataTypeManager();
Map<Long, Pointer> vxtptrs = ClassUtils.getReplacementPointers(dtm, structure);
StructureDataType newStruct = new StructureDataType(structure.getCategoryPath(),
structure.getName(), 0, structure.getDataTypeManager());
newStruct.setPackingEnabled(false);
// Future: consider whether we need to strip the class attribute from the description
// of the resultant type. Decompiler might still want/need it; but we probably don't
// want it if it allows us to do replacement again (unless it doesn't really do
// anything on another pass of replacement). This comment is really for while we are
// using the description field to hold an attribute; this comment can be deleted once
// we are not using the field for holding an attribute.
newStruct.setDescription(structure.getDescription());
try {
if (processComponents(structure, newStruct, 0, vxtptrs)) {
newStruct.setLength(structure.getLength());
// The original structure should be packed, so we can use its alignment
// as the alignment of our flattened structure. We do not want to turn on
// packing for the flattened structure unless we supply appropriate padding
newStruct.align(structure.getAlignment());
return newStruct;
}
}
catch (InvalidDataTypeException e) {
// squelch
}
return null;
}
/**
* Tries to provide an appropriate data type replacement for special components, particularly
* for class objects such as virtual function table and virtual base table pointers within
* a flattened class structure
* @param type the structure to process
* @param newType the new structure being created
* @param baseOffset the accumulated offset of the component due to flattening
* @param ownerVxtptrs the map of offset to owner vxtptr types
* @return {@code true} if successful
* @throws InvalidDataTypeException upon error
*/
private static boolean processComponents(Structure type, StructureDataType newType,
int baseOffset, Map<Long, Pointer> ownerVxtptrs) throws InvalidDataTypeException {
DataTypeComponent[] comps = type.getDefinedComponents();
boolean mod = false;
for (DataTypeComponent comp : comps) {
int accumulatedOffset = baseOffset + comp.getOffset();
if ((comp.getDataType() instanceof Structure struct && hasFlattenAttribute(comp))) {
processComponents(struct, newType, accumulatedOffset, ownerVxtptrs);
mod = true;
continue;
}
if (comp.getLength() == 0) {
continue;
}
if (comp instanceof BitFieldDataType bfComp) {
DataTypeComponent bfdtc = newType.insertBitFieldAt(accumulatedOffset,
bfComp.getBaseTypeSize(), bfComp.getBitOffset(), comp.getDataType(),
bfComp.getBitSize(), comp.getFieldName(), comp.getComment());
if (bfdtc.getOffset() != accumulatedOffset) {
throw new InvalidDataTypeException();
}
continue;
}
ClassUtils.NameAndPointer nap =
ClassUtils.getReplacementType(comp, accumulatedOffset, ownerVxtptrs);
String fieldName;
DataType fieldType;
if (nap == null) {
fieldName = comp.getFieldName();
fieldType = comp.getDataType();
}
else {
fieldName = nap.name();
fieldType = nap.pointer();
mod = true;
}
if (fieldName == null || fieldName.length() == 0) {
fieldName = comp.getDefaultFieldName();
}
DataTypeComponent dtc =
newType.insertAtOffset(accumulatedOffset, fieldType, fieldType.getLength(),
fieldName, comp.getComment());
if (dtc.getOffset() != accumulatedOffset) {
throw new InvalidDataTypeException();
}
}
return mod;
}
/**
* This method returns true if the argument structure has a class attribute
* @param structure the structure under question
* @return {@code true} if has a class attribute
*/
public static boolean hasClassAttribute(Structure structure) {
// Future: Check attribute on structure
String description = structure.getDescription();
if (StringUtils.isEmpty(description)) {
return false;
}
return true; // true for now... later do the next line
//return description.contains("{{class}}");
}
/**
* We hope to have the ability to set and use a "flatten" attribute on the component of
* the structure
* <p> This method is temporary for investigations... should rely on a real component attribute
* @param component the member to check
* @return {@code true} if has flatten attribute
*/
private static boolean hasFlattenAttribute(DataTypeComponent component) {
// Future: Check ComponentMutationEnum/Mode
String comment = component.getComment();
if (comment == null) {
return false;
}
if (comment.startsWith("Base") || comment.startsWith("Self Base") ||
comment.startsWith("Virtual Base")) {
return true;
}
return comment.contains("{{flatten}}");
}
/**
* We hope to have the ability to set and use a "replace" attribute on the component of
* the structure
* <p> This method is temporary for investigations... should rely on a real component attribute
* @param component the member to check
* @return {@code true} if has replace attribute
*/
private static boolean hasReplaceAttribute(DataTypeComponent component) {
// Future: Check ComponentMutationEnum/Mode
DataType fieldType = component.getDataType();
if (!(fieldType instanceof Pointer ptr) || ptr.getDataType() != null) {
return false;
}
String fieldName = component.getFieldName();
if (ClassUtils.VFPTR.equals(fieldName) || ClassUtils.VBPTR.equals(fieldName)) {
return true;
}
String comment = component.getComment();
if (comment == null) {
return false;
}
return comment.contains("{{replace}}");
}
}
@@ -28,6 +28,7 @@ import ghidra.program.database.data.PointerTypedefInspector;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.data.*;
import ghidra.program.model.data.Enum;
import ghidra.program.model.gclass.ClassUtils;
import ghidra.program.model.lang.CompilerSpec;
import ghidra.program.model.lang.DecompilerLanguage;
import ghidra.program.model.listing.Program;
@@ -38,7 +39,7 @@ import ghidra.xml.XmlParseException;
/**
*
* Class for marshaling DataType objects to and from the Decompiler.
*
*
*/
public class PcodeDataTypeManager {
@@ -119,6 +120,17 @@ public class PcodeDataTypeManager {
private TypeMap byteMap;
private int pointerWordSize; // Wordsize to assign to all pointer datatypes
// If we continue down this path, the following will probably get replaced with a tool option
// and might eventually be eliminated, such that we always do or never do type replacement
// in this manner.
private static final boolean TYPE_REPLACEMENT_ENABLED =
Boolean.getBoolean("ghidra.decompiler.typeReplacement");
/**
* Constructor
* @param prog the program for the p-code data type maanger
* @param simplifier the name transformer to be used
*/
public PcodeDataTypeManager(Program prog, NameTransformer simplifier) {
program = prog;
@@ -134,14 +146,26 @@ public class PcodeDataTypeManager {
pointerWordSize = ((SleighLanguage) prog.getLanguage()).getDefaultPointerWordSize();
}
/**
* Returns the program associated with this PcodeDataTypeManager
* @return the program
*/
public Program getProgram() {
return program;
}
/**
* Returns the name transformer
* @return the name transformer
*/
public NameTransformer getNameTransformer() {
return nameTransformer;
}
/**
* Sets the name transformer
* @param newTransformer the name transformer to set to this manager
*/
public void setNameTransformer(NameTransformer newTransformer) {
nameTransformer = newTransformer;
}
@@ -184,7 +208,7 @@ public class PcodeDataTypeManager {
* Decode a data-type from the stream
* @param decoder is the stream decoder
* @return the decoded data-type object
* @throws DecoderException for invalid encodings
* @throws DecoderException for invalid encodings
*/
public DataType decodeDataType(Decoder decoder) throws DecoderException {
int el = decoder.openElement();
@@ -484,6 +508,7 @@ public class PcodeDataTypeManager {
encoder.writeString(ATTRIB_METATYPE, "struct");
encoder.writeSignedInteger(ATTRIB_SIZE, sz);
encoder.writeSignedInteger(ATTRIB_ALIGNMENT, type.getAlignment());
type = ClassUtils.getReplacementType(type, TYPE_REPLACEMENT_ENABLED);
DataTypeComponent[] comps = type.getDefinedComponents();
for (DataTypeComponent comp : comps) {
if (comp.getLength() == 0) {
@@ -1082,7 +1107,7 @@ public class PcodeDataTypeManager {
/**
* Encode information for a data-type to the stream
*
*
* @param encoder is the stream encoder
* @param type is the data-type to encode
* @param size is the size of the data-type
@@ -1159,7 +1184,7 @@ public class PcodeDataTypeManager {
/**
* Build the list of core data-types. Data-types that are always available to the Decompiler
* and are associated with a (metatype,size) pair.
*
*
*/
private void generateCoreTypes() {
voidDt = new VoidDataType(progDataTypes);