GP-4717 - Add DemangledNamespaceNode and refine MDMangGhidra namespace processing, including setting anonymous namespace names to their underlying name

This commit is contained in:
ghizard
2024-07-15 15:15:22 -04:00
parent 947709ef0a
commit 08c95d2d8c
8 changed files with 457 additions and 26 deletions
@@ -0,0 +1,120 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.app.util.demangler;
import org.apache.commons.lang3.StringUtils;
import ghidra.program.model.symbol.Namespace;
/**
* Represents a plain namespace node that is not a type or method
*/
public class DemangledNamespaceNode implements Demangled {
// The intention is for this to be as refined a part of a larger mangled string as possible,
// but it is up to the user to know if they can pass that more refined string or if they
// just have to pass a bigger piece.
protected String mangled;
private String originalDemangled;
private String demangledName;
private String name; // 'safe' name
protected Demangled namespace;
/**
* Constructor
* @param mangled as a refined a piece of the (larger) original mangled stream as the user
* can provide, though many times the larger piece is all that the user can provide
* @param originalDemangled the original demangled string to match mangled string with the
* same caveats
* @param name the name of the namespace node
*/
public DemangledNamespaceNode(String mangled, String originalDemangled, String name) {
this.mangled = mangled;
this.originalDemangled = originalDemangled;
setName(name);
}
@Override
public void setName(String name) {
if (StringUtils.isBlank(name)) {
throw new IllegalArgumentException("Name cannot be blank");
}
demangledName = name;
this.name = DemanglerUtil.stripSuperfluousSignatureSpaces(name).replace(' ', '_');
}
@Override
public String getName() {
return name;
}
@Override
public String getMangledString() {
return mangled;
}
@Override
public String getOriginalDemangled() {
return originalDemangled;
}
@Override
public String getDemangledName() {
return demangledName;
}
@Override
public void setNamespace(Demangled ns) {
namespace = ns;
}
@Override
public Demangled getNamespace() {
return namespace;
}
@Override
public String getNamespaceString() {
return getName(true);
}
@Override
public String getNamespaceName() {
return name;
}
@Override
public String getSignature() {
return getNamespaceName();
}
private String getName(boolean includeNamespace) {
StringBuilder builder = new StringBuilder();
if (includeNamespace && namespace != null) {
builder.append(namespace.getNamespaceString());
builder.append(Namespace.DELIMITER);
}
builder.append(demangledName);
return builder.toString();
}
@Override
public String toString() {
return getNamespaceString();
}
}
@@ -37,9 +37,9 @@ public class DemangledType implements Demangled {
private boolean isConst;
private boolean isVolatile;
public DemangledType(String mangled, String originaDemangled, String name) {
public DemangledType(String mangled, String originalDemangled, String name) {
this.mangled = mangled;
this.originalDemangled = originaDemangled;
this.originalDemangled = originalDemangled;
setName(name);
}
@@ -94,35 +94,81 @@ public class MDMangGhidra extends MDMang {
return returnedType;
}
public DemangledType processNamespace(MDQualifiedName qualifiedName) {
public Demangled processNamespace(MDQualifiedName qualifiedName) {
return processNamespace(qualifiedName.getQualification());
}
private DemangledType processNamespace(MDQualification qualification) {
private Demangled processNamespace(MDQualification qualification) {
Iterator<MDQualifier> it = qualification.iterator();
if (!it.hasNext()) {
return null;
}
MDQualifier qual = it.next();
DemangledType type = new DemangledType(mangledSource, demangledSource, qual.toString());
DemangledType parentType = type;
Demangled type = getDemangled(qual);
Demangled current = type;
// Note that qualifiers come in reverse order, from most refined to root being the last
while (it.hasNext()) {
qual = it.next();
DemangledType newType;
if (qual.isNested()) {
String subMangled = qual.getNested().getMangled();
newType = new DemangledType(subMangled, demangledSource, qual.toString());
}
else {
newType = new DemangledType(mangledSource, demangledSource, qual.toString());
}
parentType.setNamespace(newType);
parentType = newType;
Demangled parent = getDemangled(qual);
current.setNamespace(parent);
current = parent;
}
return type;
}
private Demangled getDemangled(MDQualifier qual) {
Demangled demangled;
if (qual.isNested()) {
String subMangled = qual.getNested().getMangled();
MDObjectCPP obj = qual.getNested().getNestedObject();
MDTypeInfo typeInfo = obj.getTypeInfo();
MDType type = typeInfo.getMDType();
if (type instanceof MDDataType dt) {
demangled = new DemangledType(subMangled, qual.toString(), qual.toString());
}
else if (type instanceof MDFunctionType ft) {
// We currently cannot handle functions as part of a namespace, so we will just
// treat the demangled function namespace string as a plain namespace.
//demangled = new DemangledFunction(subMangled, qual.toString(), qual.toString());
demangled =
new DemangledNamespaceNode(subMangled, qual.toString(), qual.toString());
}
else {
demangled =
new DemangledNamespaceNode(subMangled, qual.toString(), qual.toString());
}
}
else if (qual.isAnon()) {
// Instead of using the standard qual.toString() method, which returns
// "`anonymous namespace'" for anonymous qualifiers, we use qual.getAnonymousName()
// which will have the underlying anonymous name of the form "A0xfedcba98" to create
// a standardized anonymous name that is distinguishable from other anonymous names.
// The standardized name comes from createStandardAnonymousNamespaceNode(). This
// is especially important when there are sibling anonymous names.
String anon = MDMangUtils.createStandardAnonymousNamespaceNode(qual.getAnonymousName());
demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), anon);
}
else if (qual.isInterface()) {
// TODO: need to do better; setting namespace for now
demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), qual.toString());
}
else if (qual.isNameQ()) {
// TODO: need to do better; setting namespace for now, as it looks like interface
demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), qual.toString());
}
else if (qual.isNameC()) {
// TODO: need to do better; setting type for now, but not processed yet and not sure
// what it is
demangled = new DemangledType(mangledSource, qual.toString(), qual.toString());
}
else {
// This takes care of plain and local namespaces
demangled = new DemangledNamespaceNode(mangledSource, qual.toString(), qual.toString());
}
return demangled;
}
private DemangledObject processItem() {
objectResult = null;
if (item instanceof MDObjectReserved) {
@@ -17,8 +17,13 @@ package mdemangler;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import ghidra.app.util.SymbolPath;
import ghidra.app.util.SymbolPathParser;
import mdemangler.datatype.complex.MDComplexType;
import mdemangler.datatype.modifier.MDModifierType;
import mdemangler.naming.*;
@@ -97,13 +102,20 @@ public class MDMangUtils {
myParts.addAll(0, nestedParts);
}
else if (qual.isAnon()) {
myParts.add(0, qual.getAnonymousName());
// Instead of using the standard qual.toString() method, which returns
// "`anonymous namespace'" for anonymous qualifiers, we use qual.getAnonymousName()
// which will have the underlying anonymous name of the form "A0xfedcba98" to create
// a standardized anonymous name that is distinguishable from other anonymous names.
// The standardized name comes from createStandardAnonymousNamespaceNode(). This
// is especially important when there are sibling anonymous names.
String anon = createStandardAnonymousNamespaceNode(qual.getAnonymousName());
myParts.add(0, anon);
}
else {
myParts.add(0, qual.toString());
myParts.add(0, stripTags(qual.toString()));
}
}
myParts.add(name);
myParts.add(stripTags(name));
parts.addAll(myParts);
}
@@ -115,4 +127,185 @@ public class MDMangUtils {
return item;
}
/**
* Checks that the given String begins with standard "A0x" (under-the-hood MDMang name)
* pattern or with the "`" pattern that is found with MDQuestionModifier type
* @param anon the input string or the original string if is not standard
* @return the standardized anonymous namespace component
*/
public static String createStandardAnonymousNamespaceNode(String anon) {
/*
* Note that we are converting to upper case and doing zero padding to 8 hex digits.
* Rationale: In analyzing mangled symbols with anonymous namespaces, we found an LLVM
* PDB that had a mix of anonymous namespaces:
* that used only lower case a-f hex digits
* that used only upper case A-F hex digits
* that had zero-padding, leading zeros to 8 hex digits
* that did not have zero-padding, leading zeros to 8 hex digits
* There were matching namespaces between upper-case-only hex and lower-case-only that were
* found often enough to be beyond coincidence.
* There was only one anon NS node that had the zero-padding and this is one that also
* had the 8-hex-digit suffix that we (have initially) parsed in the MDQuestionmodifier
* type. Knowing that this matches has, for all practical purposes, confirmed that the
* suffix is to represent an anonymous namespace.
* Since there was only one anon NS with zero padding, we could not convince ourselves
* completely that a "short" namespace and one with leading zeros that shared the
* meaningful numeric part were essentially the same, but it would make sense to assume
* this is true, especially in the context of the 8-hex digit suffix case (the need for
* this suffix is probably what causes the namespace to be created, and it uses formatting
* that provides the zero-padding; later, when the A0x namespace is needed, it uses the
* name already given)
* TODO: probably want to wind this into special MDMang processing as some sort of option
* and possibly an optional user-specified format.
*/
String str;
if (anon.startsWith("A0x")) {
str = anon.substring(3);
}
else if (anon.startsWith("`")) {
str = anon.substring(1);
}
else {
return anon;
}
Integer num = Integer.valueOf(str, 16);
return String.format("anon_%08X", num);
}
// @formatter:off
private static String[] searchList = {
"<class ", "<struct ", "<union ", "<coclass ", "<cointerface ", "<enum ",
"(class ", "(struct ", "(union ", "(coclass ", "(cointerface ", "(enum ",
"`class ", "`struct ", "`union ", "`coclass ", "`cointerface ", "`enum ",
",class ", ",struct ", ",union ", ",coclass ", ",cointerface ", ",enum ",
" __ptr64", "__unaligned ", " __restrict"}; // purposeful trailing space on "__unaligned "
private static String[] replacementList = {
"<", "<", "<", "<", "<", "<",
"(", "(", "(", "(", "(", "(",
",", "`", "`", "`", "`", "`",
",", ",", ",", ",", ",", ",",
"", "", ""};
// @formatter:on
// Quick and dirty way to do this... We need to work on MDMang object model
// and then add control (MDControl) to emit methods (insert(), append(), other,
// and probably need to rework/replace these too)
private static String stripTags(String str) {
return StringUtils.replaceEach(str, searchList, replacementList);
}
public static SymbolPath consolidateSymbolPath(MDParsableItem parsableItem,
String regularPathName, boolean simple) {
List<String> demangledParts = new ArrayList<>();
// When simple is true, we need to recurse the nested hierarchy to pull the names
// up to the main namespace level, so we set recurse = true
recurseNamespace(demangledParts, parsableItem, simple);
List<String> regularParts = SymbolPathParser.parse(regularPathName);
int m = Integer.min(demangledParts.size(), regularParts.size());
List<String> parts = new ArrayList<>();
for (int i = 1; i <= m; i++) {
int ni = demangledParts.size() - i;
String n = demangledParts.get(ni);
// Prefer the mangled part, but could get more sophisticated and decide to use
// regular parts too
parts.add(0, n);
}
for (int i = m + 1; i <= regularParts.size(); i++) {
int ri = regularParts.size() - i;
String r = regularParts.get(ri);
if (r.equals("`anonymous-namespace'")) {
parts.add(0, "`anonymous namespace'");
}
else {
parts.add(0, r);
}
}
for (int i = m + 1; i <= demangledParts.size(); i++) {
int ni = demangledParts.size() - i;
String n = demangledParts.get(ni);
parts.add(0, n);
}
SymbolPath sp = null;
for (String part : parts) {
sp = new SymbolPath(sp, part);
}
return sp;
}
private static final Pattern LOCAL_NS_PATTERN = Pattern.compile("^__l([0-9]+)$");
private static final Pattern EMBEDDED_LOCAL_NS_PATTERN = Pattern.compile("::__l([0-9]+)::");
private static final Pattern DEMANGLED_LOCAL_NS_PATTERN = Pattern.compile("^`([0-9]+)'$");
private static final Pattern DEMANGLED_EMBEDDED_LOCAL_NS_PATTERN =
Pattern.compile("::`([0-9]+)'::");
/**
* Standardize a SymbolPath. For now replacing local namespace {@code __l#} pattern with
* {@code `#'} pattern.
* <p> Ultimately, this method should be moved to a different utility class, but putting it
* here for now (probably with the template work)
* @param symbolPath the symbol path to standardize
* @return the standardized symbol path
*/
public static SymbolPath standarizeSymbolPath(SymbolPath symbolPath) {
List<String> parts = symbolPath.asList();
for (int i = 0; i < parts.size(); i++) {
String part = parts.get(i);
// These anonymous namespaces are those that come in the clear (non-mangled)
StringUtils.replace(part, "`anonymous-namespace'", "`anonymous namespace'");
StringBuilder sb = new StringBuilder();
Matcher m = LOCAL_NS_PATTERN.matcher(part);
if (m.find()) {
m.appendReplacement(sb, "`" + m.group(1) + "'");
}
else {
m = EMBEDDED_LOCAL_NS_PATTERN.matcher(part);
while (m.find()) {
m.appendReplacement(sb, "::`" + m.group(1) + "'::");
}
m.appendTail(sb);
}
if (!sb.isEmpty()) {
parts.set(i, sb.toString());
}
}
return new SymbolPath(parts);
}
/**
* Standardize a SymbolPath. Alternative: replacing local namespace {@code `#'} pattern with
* {@code __l#} pattern.
* <p> Ultimately, this method should be moved to a different utility class, but putting it
* here for now (probably with the template work)
* @param symbolPath the symbol path to standardize
* @return the standardized symbol path
*/
public static SymbolPath standarizeSymbolPathAlt(SymbolPath symbolPath) {
List<String> parts = symbolPath.asList();
for (int i = 0; i < parts.size(); i++) {
String part = parts.get(i);
// These anonymous namespaces are those that come in the clear (non-mangled)
StringUtils.replace(part, "`anonymous-namespace'", "`anonymous namespace'");
StringBuilder sb = new StringBuilder();
Matcher m = DEMANGLED_LOCAL_NS_PATTERN.matcher(part);
if (m.find()) {
m.appendReplacement(sb, "__l" + m.group(1));
}
else {
m = DEMANGLED_EMBEDDED_LOCAL_NS_PATTERN.matcher(part);
while (m.find()) {
m.appendReplacement(sb, "::__l" + m.group(1) + "::");
}
m.appendTail(sb);
}
if (!sb.isEmpty()) {
parts.set(i, sb.toString());
}
}
return new SymbolPath(parts);
}
}
@@ -26,6 +26,9 @@ import mdemangler.datatype.MDDataTypeParser;
*/
public class MDQuestionModifierType extends MDModifierType {
// TODO: Decide on whether parsing this suffix belongs here... from PDB namespace investigation,
// it is reasoned that this suffix identifies an anonymous namespace. See comments in
// MDMangUtils.createStandardAnonymousNamespaceNode(String anon) method.
private String suffix;
public MDQuestionModifierType(MDMang dmang) {
@@ -22,22 +22,29 @@ import mdemangler.*;
* Microsoft mangled symbol.
*/
public class MDNumberedNamespace extends MDParsableItem {
private String name;
private MDEncodedNumber num;
public MDNumberedNamespace(MDMang dmang) {
super(dmang);
}
public MDEncodedNumber getNumber() {
return num;
}
public String getName() {
return "`" + num + "'";
}
@Override
protected void parseInternal() throws MDException {
MDEncodedNumber num = new MDEncodedNumber(dmang);
num = new MDEncodedNumber(dmang);
num.parse();
name = "`" + num + "'";
}
@Override
public void insert(StringBuilder builder) {
dmang.insertString(builder, name);
dmang.insertString(builder, getName());
}
}
@@ -48,6 +48,18 @@ public class MDQualifier extends MDParsableItem {
return (nameAnonymous != null);
}
public boolean isLocalNamespace() {
return (nameNumbered != null);
}
public boolean isNameC() {
return (nameC != null);
}
public boolean isNameQ() {
return (nameQ != null);
}
public MDNestedName getNested() {
return nameNested;
}
@@ -56,6 +68,22 @@ public class MDQualifier extends MDParsableItem {
return nameAnonymous.getName();
}
public String getLocalNamespace() {
return nameNumbered.getName();
}
public String getLocalNamespaceNumber() {
return nameNumbered.getNumber().toString();
}
public String getNameC() {
return nameC;
}
public String getNameQ() {
return nameQ;
}
@Override
public void insert(StringBuilder builder) {
// Only one of these will hit.
@@ -17,6 +17,8 @@ package mdemangler;
import static org.junit.Assert.*;
import java.util.Arrays;
import org.junit.Test;
import generic.test.AbstractGenericTest;
@@ -35,7 +37,7 @@ public class MDMangUtilsTest extends AbstractGenericTest {
// with nested types that were causing problems for PDB
String mangled = ".?AV<lambda_0>@?0??name0@name1@@YA?AUname2@2@Uname3@2@Uname4@2@@Z@";
String expected =
"`struct name1::name2 __cdecl name1::name0(struct name1::name3,struct name1::name4)'::`1'::<lambda_0>";
"`name1::name2 __cdecl name1::name0(name1::name3,name1::name4)'::`1'::<lambda_0>";
String simpleExpected = "name1::name0::`1'::<lambda_0>";
String expectedDemangled =
"class `struct name1::name2 __cdecl name1::name0(struct name1::name3,struct name1::name4)'::`1'::<lambda_0>";
@@ -59,7 +61,7 @@ public class MDMangUtilsTest extends AbstractGenericTest {
String mangled =
".?AU?$name0@$$QEAV<lambda_0>@?0??name1@name2@?Aname3@name4@@UEAAXVname5@4@HAEBVname6@4@@Z@@name7@name8@@";
String expected =
"name8::name7::name0<class `public: virtual void __cdecl name4::`anonymous namespace'::name2::name1(class Aname3::name5,int,class Aname3::name6 const & __ptr64) __ptr64'::`1'::<lambda_0> && __ptr64>";
"name8::name7::name0<`public: virtual void __cdecl name4::`anonymous namespace'::name2::name1(Aname3::name5,int,Aname3::name6 const &)'::`1'::<lambda_0> &&>";
// See MDMangUtils.getSimpleSymbolPath(item) javadoc to understand why expected and
// simpleExpected are the same
String simpleExpected = expected;
@@ -84,7 +86,7 @@ public class MDMangUtilsTest extends AbstractGenericTest {
public void testTypeNamespaceSimpleConversionDoesNotApply2() throws Exception {
String mangled = ".?AU?$name0@$$QEAV<lambda_0>@?0???1Aname1@name2@@UEAA@XZ@@name3@name4@@";
String expected =
"name4::name3::name0<class `public: virtual __cdecl name2::Aname1::~Aname1(void) __ptr64'::`1'::<lambda_0> && __ptr64>";
"name4::name3::name0<`public: virtual __cdecl name2::Aname1::~Aname1(void)'::`1'::<lambda_0> &&>";
// See MDMangUtils.getSimpleSymbolPath(item) javadoc to understand why expected and
// simpleExpected are the same
String simpleExpected = expected;
@@ -105,4 +107,36 @@ public class MDMangUtilsTest extends AbstractGenericTest {
assertEquals(expectedDemangled, demangled);
}
@Test
public void testStandarizeSymbolPath() throws Exception {
SymbolPath sp = new SymbolPath(Arrays.asList("name0", "__l1", "name2"));
SymbolPath result = MDMangUtils.standarizeSymbolPath(sp);
String expected = "name0::`1'::name2";
assertEquals(expected, result.toString());
}
@Test
public void testStandarizeSymbolPathWithEmbedded() throws Exception {
SymbolPath sp = new SymbolPath(Arrays.asList("name0", "__l1", "name2(name3::__l4::name5)"));
SymbolPath result = MDMangUtils.standarizeSymbolPath(sp);
String expected = "name0::`1'::name2(name3::`4'::name5)";
assertEquals(expected, result.toString());
}
@Test
public void testStandarizeSymbolPathAlt() throws Exception {
SymbolPath sp = new SymbolPath(Arrays.asList("name0", "`1'", "name2"));
SymbolPath result = MDMangUtils.standarizeSymbolPathAlt(sp);
String expected = "name0::__l1::name2";
assertEquals(expected, result.toString());
}
@Test
public void testStandarizeSymbolPathWithEmbeddedAlt() throws Exception {
SymbolPath sp = new SymbolPath(Arrays.asList("name0", "`1'", "name2(name3::`4'::name5)"));
SymbolPath result = MDMangUtils.standarizeSymbolPathAlt(sp);
String expected = "name0::__l1::name2(name3::__l4::name5)";
assertEquals(expected, result.toString());
}
}