diff --git a/Ghidra/Features/Base/src/main/help/help/topics/AutoAnalysisPlugin/AutoAnalysis.htm b/Ghidra/Features/Base/src/main/help/help/topics/AutoAnalysisPlugin/AutoAnalysis.htm index d9799704d8..98aa03fe84 100644 --- a/Ghidra/Features/Base/src/main/help/help/topics/AutoAnalysisPlugin/AutoAnalysis.htm +++ b/Ghidra/Features/Base/src/main/help/help/topics/AutoAnalysisPlugin/AutoAnalysis.htm @@ -450,6 +450,18 @@

Started By: Importing or adding to a program, Auto Analyze command

+

Format String Analyzer

+ +
+

This analyzer detects variadic function calls in the bodies of each function that intersect + the current selection. It then parses their format string arguments to infer the correct function + call signatures. Currently, this analyzer only supports printf, scanf, and their variants (e.g., snprintf, fscanf). + If the current selection is emtpy, it searches through every function within the binary. Once + the signatures are inferred, they are overridden.

+ +

Started By: Importing or adding to a program, Auto Analyze command

+ +

Image Analyzer

diff --git a/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatArgument.java b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatArgument.java new file mode 100644 index 0000000000..f71909e425 --- /dev/null +++ b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatArgument.java @@ -0,0 +1,63 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.string.variadic; +/** + * This class represents a single argument of a variadic function + */ +public class FormatArgument { + + private String lengthModifier; + private String conversionSpecifier; + + /** + * Constructor for a FormatArg + * + * @param lengthModifier length modifier of a format argument + * @param conversionSpec conversion specifier of a format argument + */ + public FormatArgument(String lengthModifier, String conversionSpec) { + this.lengthModifier = lengthModifier; + this.conversionSpecifier = conversionSpec; + } + + /** + * lenghtModifier getter + * + * @return lengthModifier + */ + public String getLengthModifier() { + return this.lengthModifier; + } + + /** + * convertionSpec getter + * + * @return conversionSpecifier + */ + public String getConversionSpecifier() { + return this.conversionSpecifier; + } + + /** + * Converts FormatArg to String + * + * @return FormatArgument as String + */ + public String toString() { + + return String.format("[%s, %s]", this.lengthModifier, this.conversionSpecifier); + } +} diff --git a/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatStringAnalyzer.java b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatStringAnalyzer.java new file mode 100644 index 0000000000..9d9cd23198 --- /dev/null +++ b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatStringAnalyzer.java @@ -0,0 +1,392 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.string.variadic; + +import java.util.*; + +import org.apache.commons.collections4.IteratorUtils; + +import ghidra.app.decompiler.*; +import ghidra.app.decompiler.parallel.*; +import ghidra.app.services.*; +import ghidra.app.util.importer.MessageLog; +import ghidra.framework.options.Options; +import ghidra.program.model.address.Address; +import ghidra.program.model.address.AddressSetView; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.*; +import ghidra.program.model.pcode.HighFunctionDBUtil; +import ghidra.program.model.pcode.PcodeOpAST; +import ghidra.program.util.DefinedDataIterator; +import ghidra.util.Msg; +import ghidra.util.exception.CancelledException; +import ghidra.util.exception.InvalidInputException; +import ghidra.util.task.TaskMonitor; + +public class FormatStringAnalyzer extends AbstractAnalyzer { + + // Array of substrings of variadic function names that are searched for + private static final String[] VARIADIC_SUBSTRINGS = { "printf", "scanf" }; + private static final String NAME = "Variadic Function Signature Override"; + private static final String DESCRIPTION = + "Detects variadic function calls in the bodies of each function that intersect the" + + "current selection and parses their format string arguments to infer the correct " + + "signatures. Currently, this analyzer only supports printf, scanf, and thier variants " + + "(e.g., snprintf, fscanf). If the current selection is empty, it searches through " + + "every function. Once the correct signatures are inferred, they are overridden."; + private final static boolean OPTION_DEFAULT_CREATE_BOOKMARKS_ENABLED = false; + private final static String OPTION_NAME_CREATE_BOOKMARKS = "Create Analysis Bookmarks"; + private static final String OPTION_DESCRIPTION_CREATE_BOOKMARKS = + "Select this check box if you want this analyzer to create analysis bookmarks " + + "when items of interest are created/identified by the analyzer."; + + private boolean createBookmarksEnabled = OPTION_DEFAULT_CREATE_BOOKMARKS_ENABLED; + + // Any function name containing this substring is determined to be an input type function + private static final String INPUT_FUNCTION_SUBSTRING = "scanf"; + private Program currentProgram = null; + private FormatStringParser parser; + + public FormatStringAnalyzer() { + super(NAME, DESCRIPTION, AnalyzerType.FUNCTION_SIGNATURES_ANALYZER); + setSupportsOneTimeAnalysis(); + setPriority(AnalysisPriority.LOW_PRIORITY); + setDefaultEnablement(false); + setPrototype(); + } + + @Override + public boolean canAnalyze(Program program) { + return true; + } + + private synchronized FormatStringParser getParser() { + if (parser == null) { + parser = new FormatStringParser(currentProgram); + } + return parser; + } + + private synchronized void disposeParser() { + parser = null; + } + + @Override + public boolean added(Program program, AddressSetView set, TaskMonitor monitor, MessageLog log) { + this.currentProgram = program; + try { + run(set, monitor); + } + catch (CancelledException e) { + // User cancelled analysis + } + finally { + disposeParser(); + } + return true; + } + + private void run(AddressSetView selection, TaskMonitor monitor) + throws CancelledException { + + DefinedDataIterator dataIterator = DefinedDataIterator.definedStrings(currentProgram); + Map stringsByAddress = new HashMap<>(); + for (Data data : dataIterator) { + String s = data.getDefaultValueRepresentation(); + if (s.contains("%")) { + stringsByAddress.put(data.getAddress(), data); + } + monitor.checkCanceled(); + } + + FunctionIterator functionIterator = currentProgram.getListing().getFunctions(true); + FunctionIterator externalIterator = currentProgram.getListing().getExternalFunctions(); + Iterator programFunctionIterator = IteratorUtils.chainedIterator(functionIterator,externalIterator); + Map> namesToParameters = new HashMap<>(); + + Map namesToReturn = new HashMap<>(); + Set toDecompile = new HashSet<>(); + Set variadicFunctionNames = new HashSet<>(); + + // Find variadic function names and their parameter data types + for (Function function : IteratorUtils.asIterable(programFunctionIterator)) { + String name = function.getName().strip(); + if (usesVariadicFormatString(function)) { + for (String variadicSubstring : VARIADIC_SUBSTRINGS) { + if (name.contains(variadicSubstring)) { + variadicFunctionNames.add(name); + namesToParameters.put(name, getParameters(function)); + namesToReturn.put(name, function.getReturnType()); + break; + } + } + } + monitor.checkCanceled(); + } + + Iterator functionsToSearchIterator = selection != null + ? currentProgram.getFunctionManager() + .getFunctionsOverlapping(selection) + : currentProgram.getFunctionManager().getFunctionsNoStubs(true); + + // Find functions that call variadic functions + while (functionsToSearchIterator.hasNext()) { + Function function = functionsToSearchIterator.next(); + Set calledFunctions = function.getCalledFunctions(monitor); + for (Function calledFunction : calledFunctions) { + // If this function calls a variadic function, add it to functions to decompile + if (namesToParameters.containsKey(calledFunction.getName())) { + toDecompile.add(function); + break; + } + } + monitor.checkCanceled(); + } + + decompile(currentProgram, monitor, stringsByAddress, variadicFunctionNames, + namesToParameters, + namesToReturn, + toDecompile); + } + + private void decompile(Program program, TaskMonitor monitor, + Map stringsByAddress, + Set variadicFunctionNames, + Map> namesToParameters, Map namesToReturn, + Set toDecompile) { + + DecompilerCallback callback = initDecompilerCallback(program, stringsByAddress, + variadicFunctionNames, namesToParameters, namesToReturn); + if (toDecompile.isEmpty()) { + Msg.info(this, "No functions detected that make variadic function calls with " + + "format strings containing format specifiers"); + return; + } + try { + ParallelDecompiler.decompileFunctions(callback, toDecompile, monitor); + } + catch (Exception e) { + Msg.error(this, "Error: could not decompile functions with ParallelDecompiler", e); + } + finally { + callback.dispose(); + } + } + + private DecompilerCallback initDecompilerCallback(Program program, + Map stringsByAddress, + Set variadicFuncNames, Map> namesToParameters, + Map namesToReturn) { + return new DecompilerCallback<>(program, + new VariadicSignatureDecompileConfigurer()) { + @Override + public Void process(DecompileResults results, TaskMonitor tMonitor) throws Exception { + if (results == null) { + return null; + } + Function function = results.getFunction(); + PcodeFunctionParser pcodeParser = new PcodeFunctionParser(program); + if (results.getHighFunction() == null || + results.getHighFunction().getPcodeOps() == null) { + return null; + } + Iterator pcodeOpASTIterator = results.getHighFunction().getPcodeOps(); + List pcodeOpASTs = new ArrayList<>(); + if ((results.getHighFunction() != null) && pcodeOpASTIterator != null) { + while (pcodeOpASTIterator.hasNext()) { + PcodeOpAST pcodeAST = pcodeOpASTIterator.next(); + pcodeOpASTs.add(pcodeAST); + } + } + List functionCallDataList = pcodeParser.parseFunctionForCallData( + pcodeOpASTs, stringsByAddress, variadicFuncNames); + if (functionCallDataList != null && functionCallDataList.size() > 0) { + overrideCallList(program, function, functionCallDataList, namesToParameters, + namesToReturn); + } + tMonitor.checkCanceled(); + return null; + } + }; + } + + private List getParameters(Function function) { + // NOTE: Currently only considers variadic functions with format string + // arguments. + List dataTypes = new ArrayList<>(); + for (ParameterDefinition pd : function.getSignature().getArguments()) { + dataTypes.add(pd.getDataType()); + } + return dataTypes; + } + + private boolean usesVariadicFormatString(Function function) { + int paramCount = function.getParameterCount(); + return function.hasVarArgs() && paramCount > 0 && + isCharPointer(function.getParameters()[paramCount - 1].getDataType()); + } + + private boolean isCharPointer(DataType dataType) { + if (dataType instanceof TypeDef) { + dataType = ((TypeDef) dataType).getBaseDataType(); + } + if (!(dataType instanceof Pointer)) { + return false; + } + DataType dt = ((Pointer) dataType).getDataType(); + return dt instanceof CharDataType || dt instanceof WideCharDataType || + dt instanceof WideChar16DataType || dt instanceof WideChar32DataType; + } + + private class VariadicSignatureDecompileConfigurer implements DecompileConfigurer { + + // DecompInterface allows for control of decompilation processes + @Override + public void configure(DecompInterface decompiler) { + decompiler.toggleCCode(true); // Produce C code + decompiler.toggleSyntaxTree(true); // Produce syntax tree + decompiler.openProgram(currentProgram); + decompiler.setSimplificationStyle("normalize"); + DecompileOptions options = new DecompileOptions(); + options.grabFromProgram(currentProgram); + decompiler.setOptions(options); + } + } + + private ParameterDefinition[] parseParameters(Function function, + Address address, + String callFunctionName, String formatString, + Map> namesToParameters) { + + Program functionProgram = function.getProgram(); + + FormatStringParser parser = getParser(); + + // DataTypes of arguments are treated differently when the variadic function + // looks like scanf since it takes in inputs. We need this information + // so that the correct DataType arguments are generated + boolean isOutputType = !callFunctionName.contains(INPUT_FUNCTION_SUBSTRING); + List formatArguments = + parser.convertToFormatArgumentList(formatString, isOutputType); + + DataType[] dataTypes = isOutputType ? parser.convertToOutputDataTypes(formatArguments) + : parser.convertToInputDataTypes(formatArguments); + + if (dataTypes == null) { + + currentProgram.getBookmarkManager() + .setBookmark(address, BookmarkType.ANALYSIS, "Unrecognized format string", + "Format string could not be parsed: " + formatString); + return null; + } + ParameterDefinition[] paramDefs = + createParameters(callFunctionName, dataTypes, functionProgram, namesToParameters); + return paramDefs; + } + + private ParameterDefinition[] createParameters(String callFunctionName, DataType[] dataTypes, + Program program, Map> namesToParameters) { + List initialFunctionParameters = namesToParameters.get(callFunctionName); + int numberOfParameters = initialFunctionParameters.size() + dataTypes.length; + if (numberOfParameters == 0) { + return null; // Invalid function + } + ParameterDefinition[] parameterDefinitions = new ParameterDefinition[numberOfParameters]; + for (int i = 0; i < numberOfParameters; i++) { + if (i < initialFunctionParameters.size()) { + parameterDefinitions[i] = + new ParameterDefinitionImpl("param" + i, initialFunctionParameters.get(i), ""); + } + else { + parameterDefinitions[i] = new ParameterDefinitionImpl("param" + i, + dataTypes[i - initialFunctionParameters.size()], ""); + } + } + return parameterDefinitions; + } + + private FunctionSignature initSignature(Function function, Address address, + String callFunctionName, String formatString, + Map> namesToParameters, Map namesToReturn) { + ParameterDefinition[] parameterDefinitions = + parseParameters(function, address, callFunctionName, formatString, namesToParameters); + if (parameterDefinitions == null || parameterDefinitions.length == 0) { + return null; + } + + FunctionDefinitionDataType signature = new FunctionDefinitionDataType(callFunctionName); + signature.setArguments(parameterDefinitions); + signature.setReturnType(namesToReturn.get(callFunctionName)); + return signature; + } + + private void overrideCallList(Program program, Function function, + List functionCallDataList, + Map> namesToParameters, Map namesToReturn) { + if (function == null || functionCallDataList == null) { + return; + } + for (FunctionCallData data : functionCallDataList) { + overrideFunctionCall(program, function, data.getAddressOfCall(), data.getCallFuncName(), + data.getFormatString(), namesToParameters, namesToReturn); + } + } + + private void overrideFunctionCall(Program program, Function function, Address address, + String callFunctionName, String formatString, + Map> namesToParameters, + Map namesToReturn) { + if (formatString == null) { + return; + } + FunctionSignature functionSignature = initSignature(function, address, callFunctionName, + formatString, namesToParameters, namesToReturn); + if (functionSignature == null || function == null || address == null) { + return; + } + + try { + if (createBookmarksEnabled) { + BookmarkManager bookmark = program.getBookmarkManager(); + bookmark.setBookmark(address, BookmarkType.ANALYSIS, + "Function Signature Override", + "Override for call to function " + callFunctionName); + } + HighFunctionDBUtil.writeOverride(function, address, functionSignature); + } + catch (InvalidInputException e) { + Msg.error(this, "Error: invalid input given to writeOverride()", e); + } + } + + @Override + public boolean removed(Program program, AddressSetView set, TaskMonitor monitor, MessageLog log) + throws CancelledException { + return false; + } + + @Override + public void registerOptions(Options options, Program program) { + options.registerOption(OPTION_NAME_CREATE_BOOKMARKS, createBookmarksEnabled, null, + OPTION_DESCRIPTION_CREATE_BOOKMARKS); + } + + @Override + public void optionsChanged(Options options, Program program) { + createBookmarksEnabled = + options.getBoolean(OPTION_NAME_CREATE_BOOKMARKS, createBookmarksEnabled); + } +} diff --git a/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatStringParser.java b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatStringParser.java new file mode 100644 index 0000000000..957d25b555 --- /dev/null +++ b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FormatStringParser.java @@ -0,0 +1,961 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.string.variadic; + +import java.util.*; +import java.util.stream.Collectors; + +import ghidra.program.model.data.*; +import ghidra.program.model.listing.Program; +import ghidra.util.Msg; + +/** + * Class for parsing a variadic function's format String to determine the proper + * number of arguments and their DataTypes. It analyzes format strings from variadic functions. + * Parses format strings adhering to docs https://pubs.opengroup.org/onlinepubs/009695399/functions/fprintf.html + * and https://en.cppreference.com/w/c/io/fscanf. If a format string doesn't adhere properly + * to what is specified in the docs, the string will not continue to be parsed since this is + * undefined behavior. + *
+ * The standard C formats may make optional use of the following extended precision types + * which may be defined as a {@link TypeDef} the appropriate Datatype implementation. + * If a format string is encountered which refers to one of these types which has not + * previously been defined, a TypeDef will be fabricated although it may not be correct. + *
    + *
  • intmax_t - maximum sized signed integer (default: long long)
  • + *
  • uintmax_t - maximum size unsigned integer (default: unsigned long long)
  • + *
  • size_t - unsigned integer type corresponding to sizeof (default: varies)
  • + *
  • ptrdiff_t - signed integer type (default: varies)
  • + *
+ */ +public class FormatStringParser { + + public static final String INTMAX_T_NAME = "intmax_t"; + public static final String UINTMAX_T_NAME = "uintmax_t"; + public static final String SIZE_T_NAME = "size_t"; + public static final String PTRDIFF_T_NAME = "ptrdiff_t"; + + private DataTypeManager dataTypeManager; + + private TypeDef intmax_t; + private TypeDef uintmax_t; + private TypeDef size_t; + private TypeDef ptrdiff_t; + + /** + * Constructor for FormatStringParser. + *
+ * NOTE: Warning messages will be logged once per instantiation when + * appropriate required TypeDef (intmax_t, uintmax_t, size_t, ptrdif_t) + * has not been predefined. + * + * @param program currentProgram + */ + public FormatStringParser(Program program) { + this.dataTypeManager = program.getDataTypeManager(); + } + + /** + * This function takes in a format string and returns List of Strings each holding + * format data. Each String is a substring of the given format string that corresponds to one + * or more DataTypes. These DataTypes determine which arguments need to be given to the variadic + * function. For instance, given the format String "%d %4.2s", this function will return + * the List ["d", "4.2s"] + * + * @param formatString format String + * @return List of substrings of formatStr + */ + private List parseFormatString(String formatString) { + + List formatArgumentList = new ArrayList<>(); + String current = ""; + for (int i = 0; i < formatString.length(); i++) { + char c = formatString.charAt(i); + if (c == '%') { + if (emitPercent(formatString, i)) { + ++i; + } + else { + ++i; + c = formatString.charAt(i); + while (!isConversionSpecifier(c)) { + current += c; + ++i; + if (i >= formatString.length()) { + return null; + } + c = formatString.charAt(i); + } + formatArgumentList.add(current + c); + current = ""; + } + } + } + return formatArgumentList; + } + + /** + * Takes in a single String from parseFormatString's output List and converts it to + * the corresponding FormatArgument(s) and populates the formatArgumentList List. + * isOutputType is true when using a format string for a function that "outputs" + * Strings (e.g., printf, fprintf, etc.). When it's false, it evaluates the + * String's data types as if the function "inputs" Strings (e.g., scanf) + * + * @param formatString Format String + * @param formatArgumentList List of FormatArgument that will be written to + * @param isOutputType Type of variadic function + * @return True if format string successfully parsed + */ + private boolean convertToFormatArguments(String formatString, + List formatArgumentList, boolean isOutputType) { + + FormatParsingData data = new FormatParsingData(); + for (int i = 0; i < formatString.length(); i++) { + char c = formatString.charAt(i); + i = preprocessChar(formatString, i, isOutputType); + if (i == -1) { + return false; + } + if (isFlag(c)) { + continue; + } + if (data.getLengthModifier() != null) { + return addArgumentWithModifier(c, data, formatArgumentList); + } + data.setLengthModifier(detectLengthModifier(c)); + if (data.getLengthModifier() == null) { + data.setConversionSpecifier(detectConversionSpecifier(c)); + if (data.getConversionSpecifier() != null) { + if (!verifyConversionPair(data.getLengthModifier(), + data.getConversionSpecifier())) { + return false; + } + formatArgumentList.add(new FormatArgument(data.getLengthModifier(), + data.getConversionSpecifier())); + return true; + } + // If length modifier and conversion specs aren't present + // and we get an unknown char, format string is invalid + if (data.isPrecisionComplete()) { + return false; + } + if (!Character.isDigit(c) && c != '.' && c != '*') { + return false; + } + if (isOutputType) { + // At this point c is either a number, '*', or '.' + i = handleOutputConversionArgument(formatString, i, data, formatArgumentList); + if (i == -1) { + return false; + } + } + else { + i = handleInputConversionArgument(formatString, i, data, formatArgumentList); + if (i == -1) { + return false; + } + } + } + else if (i + 1 < formatString.length()) { + i = initiateLengthModifierExtension(formatString, i, data); + } + } + return true; + } + + private int preprocessChar(String formatString, int i, boolean isOutputType) { + + char c = formatString.charAt(i); + if (c == '$') { + return -1; + } + if (isFlag(c)) { + if (!isOutputType) { + return -1; + } + i = skipFlags(formatString, i); + } + return i; + } + + private int initiateLengthModifierExtension(String formatString, int i, + FormatParsingData data) { + String tmpLengthModifier = + extendLengthModifier(data.getLengthModifier(), formatString.charAt(i + 1)); + if (tmpLengthModifier != null) { + ++i; + data.setLengthModifier(tmpLengthModifier); + } + return i; + } + + private boolean addArgumentWithModifier(char c, FormatParsingData data, + List formatArgumentList) { + data.setConversionSpecifier(detectConversionSpecifier(c)); + if ((data.getConversionSpecifier() == null) || + !(verifyConversionPair(data.getLengthModifier(), data.getConversionSpecifier()))) { + return false; // Problem with format string + } + formatArgumentList + .add(new FormatArgument(data.getLengthModifier(), data.getConversionSpecifier())); + return true; + + } + + private int handleOutputConversionArgument(String formatString, int i, FormatParsingData data, + List formatArgumentList) { + char c = formatString.charAt(i); + if (!data.isPrecisionComplete() && !data.isFieldWidthComplete() && c != '.') { + if (c == '*') { + formatArgumentList.add(new FormatArgument(null, "*")); + } + else { + i = skipIntegers(formatString, i); + } + if (i == -1) { + return i; + } + data.setFieldWidthComplete(true); + } + else if (data.isFieldWidthComplete() && c != '.') { + return -1; + } + else if (!data.isPrecisionComplete() && c == '.') { + if (i + 1 < formatString.length() && formatString.charAt(i + 1) == '*') { + ++i; + formatArgumentList.add(new FormatArgument(null, "*")); + } + else { + i = skipIntegers(formatString, i + 1); + } + if (i == -1) { + return i; + } + data.setPrecisionComplete(true); + } + else { + return -1; + } + return i; + } + + private int handleInputConversionArgument(String formatString, int i, FormatParsingData data, + List formatArgumentList) { + char c = formatString.charAt(i); + if (c == '*') { + formatArgumentList.add(new FormatArgument(null, "*")); + } + else if (Character.isDigit(c)) { + i = skipIntegers(formatString, i + 1); + if (i == -1) { + return i; + } + data.setPrecisionComplete(true); + } + else { + return -1; + } + return i; + } + + /** + * Takes in a String and converts it to a List of FormatArgument with each FormatArgument + * corresponding to an additional argument. isOutputType is true when using a + * format string for output data types (e.g. printf, fprintf, etc.). When it's + * false, it evaluates the String's data types as if they were input types (e.g. + * scanf) + * + * @param formatString format String + * @param isOutputType Type of variadic function + * @return List of FormatArgument + */ + + public List convertToFormatArgumentList(String formatString, + boolean isOutputType) { + + if (formatString == null) { + return null; + } + List formatStrArgumentList = parseFormatString(formatString); + if (formatStrArgumentList == null) { + return null; + } + List formatArgumentList = new ArrayList<>(); + for (String formatStrArgument : formatStrArgumentList) { + boolean status = + convertToFormatArguments(formatStrArgument, formatArgumentList, isOutputType); + if (!status) { + if (formatStrArgumentList.stream() + .filter(str -> str.contains("$")) + .findAny() + .isPresent()) { + return analyzeFormatStringWithParameters(formatString); + } + return null; + } + } + return formatArgumentList.contains(null) ? null : formatArgumentList; + } + + /** + * + * Handles format Strings with parameters. In this parser, we define a format + * String parameter to be an integer n provided in the form: "%n$" or "*n$", where n is + * the index of the referred argument. If a placeholder uses a format + * argument parameter, all other placeholders must also have a parameter. Also, + * all gaps between format argument indices are not supported. For instance, if + * the first and third arguments are used, there must also be a parameter for a + * second argument. Any parameter pattern beginning with % or * and ending with + * $ must have integer in between. Failing to adhere by the format string + * parameter requirements returns null. + * + * @param formatString format String + * @return List of FormatArgument + * + * + * TODO: What if multiple conversion specs refer to the same placeholder + * with different types? Ex: "%1$*1$x" (uses unsigned int and int) + * Currently just overwrites previous type + * + */ + public List analyzeFormatStringWithParameters(String formatString) { + + FormatParsingData data = new FormatParsingData(); + Map formatArgumentMap = new HashMap<>(); + for (int i = 0; i < formatString.length(); i++) { + char c = formatString.charAt(i); + if (c == ' ') { + continue; + } + if (c == '%') { + if (emitPercent(formatString, i)) { + ++i; + } + else { + data.setInConversion(true); + data.clearData(); + data.setParameterIndex(locateParameterIndex(formatString, i)); + if (data.getParameterIndex() == 0) { + return null; // $ operand number is required + } + i += Integer.toString(data.getParameterIndex()).length() + 1; // i should be at $ + if (isFlag(formatString.charAt(i + 1))) { + i = skipFlags(formatString, i + 1); + } + continue; + } + } + if (data.isInConversion()) { + if (data.getLengthModifier() != null) { + data.setConversionSpecifier(detectConversionSpecifier(c)); + if (data.getConversionSpecifier() == null) { + return null; // Problem with format string + } + formatArgumentMap.put(data.getParameterIndex(), new FormatArgument( + data.getLengthModifier(), data.getConversionSpecifier())); + data.setInConversion(false); + continue; + } + data.setLengthModifier(detectLengthModifier(c)); + if (data.getLengthModifier() == null) { + i = searchWithNullModifier(formatString, i, data, formatArgumentMap); + if (i == -1) { + return null; + } + } + } + } + return convertMapToList(formatArgumentMap); + } + + // Continue format String conversion parsing for when the length modifier is null + private int searchWithNullModifier(String formatString, int i, FormatParsingData data, + Map formatArgumentMap) { + char c = formatString.charAt(i); + data.setConversionSpecifier(detectConversionSpecifier(c)); + if (data.getConversionSpecifier() != null) { + formatArgumentMap.put(data.getParameterIndex(), + new FormatArgument(data.getLengthModifier(), data.getConversionSpecifier())); + data.setInConversion(false); + } + else { + if (data.isPrecisionComplete()) { + return -1; + } + if (!Character.isDigit(c) && c != '.' && c != '*') { + return -1; + } + // At this point c is either a number, '*', or '.' + if (!data.isPrecisionComplete() && !data.isFieldWidthComplete() && c != '.') { + i = handleOutputConversionForParameters(formatString, i, data, formatArgumentMap); + if (i == -1) { + return -1; + } + } + else if (data.isFieldWidthComplete() && c != '.') { + return -1; + } + else if (!data.isPrecisionComplete() && c == '.') { + i = handlePrecisionForParameters(formatString, i, data, formatArgumentMap); + if (i == -1) { + return -1; + } + } + else { + return -1; + } + } + return i; + } + + // Takes care of optional precision indicated by a period ('.') and followed by an + // asterick or series of integers + private int handlePrecisionForParameters(String formatString, int i, FormatParsingData data, + Map formatArgumentMap) { + + if (i + 1 < formatString.length() && formatString.charAt(i + 1) == '*') { + ++i; + int precisionIdx = locateParameterIndex(formatString, i); + if (precisionIdx == 0) { + return -1; + } + i += Integer.toString(precisionIdx).length() + 1; + // i should be at $ + formatArgumentMap.put(precisionIdx, new FormatArgument(null, "d")); + } + else { + i = skipIntegers(formatString, i + 1); // i should be at last number + if (i == -1) { + return -1; + } + } + data.setPrecisionComplete(true); + return i; + } + + private int handleOutputConversionForParameters(String formatString, int i, + FormatParsingData data, Map formatArgumentMap) { + char c = formatString.charAt(i); + if (c == '*') { + int fieldWidthIdx = locateParameterIndex(formatString, i); + if (fieldWidthIdx == 0) { + return i; + } + i += Integer.toString(fieldWidthIdx).length() + 1; + // i should be at $ + formatArgumentMap.put(fieldWidthIdx, new FormatArgument(null, "d")); + } + else { + i = skipIntegers(formatString, i); + if (i == -1) { + return i; + } + } + data.setFieldWidthComplete(true); + return i; + + } + + private List convertMapToList(Map formatArgumentMap) { + List formatArgumentList = new ArrayList<>(); + for (int i = 1; i <= formatArgumentMap.size(); i++) { + FormatArgument formatArgument = formatArgumentMap.get(i); + if (formatArgument == null) { + return null; + } + formatArgumentList.add(formatArgument); + } + return formatArgumentList; + } + + /** + * In a format string with format argument parameters, retrieve that parameter. + * In other words, in the following cases: "%n$" and "*n$", return n where n is + * the index of the referred argument. n cannot be less than 1; return 0 if + * there's a problem. + * + * @param formatString format String + * @param i index within formatStr + * @return formar argument parameter + */ + private int locateParameterIndex(String formatString, int i) { + + char c = formatString.charAt(i); + if (c == '%' || c == '*') { + ++i; + c = formatString.charAt(i); + } + else { + return 0; + } + String paramIndexString = ""; + while (Character.isDigit(c)) { + paramIndexString += Character.toString(c); + ++i; + c = formatString.charAt(i); + } + return c != '$' || paramIndexString.length() == 0 || Integer.parseInt(paramIndexString) == 0 + ? 0 + : Integer.parseInt(paramIndexString); + } + + /** + * Skips a series of flags within a format String. returns the index of the + * format string at the last digit before another non-digit character + * + * @param formatString format String + * @param i index into formatStr + * @return new index into formatStr + */ + private int skipFlags(String formatString, int i) { + for (; isFlag(formatString.charAt(i)); i++) { + // Iterate through chars until all flags are skipped + } + return i - 1; + } + + /** + * Skips a series of numbers (field width or precision) within a format String. + * returns the index of the format String at the last digit before another + * non-digit character + * + * @param formatString format String + * @param i index into formatStr + * @return new index into formatString + */ + private int skipIntegers(String formatString, int i) { + char c = formatString.charAt(i); + if (!Character.isDigit(c)) { + if (isLengthModifier(c) || isConversionSpecifier(c)) { + return i - 1; + } + return -1; + } + for (; Character.isDigit(formatString.charAt(i)); i++) { + // Skip chars until a non-integer is found + } + return i - 1; + } + + // If there are two consecutive '%' signs, do not evaluate the data types + private boolean emitPercent(String formatString, int i) { + if (formatString.charAt(i) == '%' && i + 1 < formatString.length() && + formatString.charAt(i + 1) == '%') { + return true; + } + return false; + } + + public DataType[] convertToOutputDataTypes(List formatArguments) { + if (formatArguments == null) { + return null; + } + List dataTypeList = formatArguments.stream().map(argument -> { + String conversionSpecifier = argument.getConversionSpecifier(); + DataType dt = convertPairToDataType(argument.getLengthModifier(), + conversionSpecifier.equals("*") ? "d" : conversionSpecifier); + return dt; + }).collect(Collectors.toList()); + return dataTypeList.contains(null) ? null + : dataTypeList.toArray(DataType[]::new); + } + + public DataType[] convertToInputDataTypes(List formatArguments) { + if (formatArguments == null) { + return null; + } + + List dataTypesList = new ArrayList<>(); + for (int i = 0; i < formatArguments.size(); i++) { + FormatArgument argument = formatArguments.get(i); + // * means to skip + if (argument.getConversionSpecifier().equals("*")) { + if (formatArguments.get(i + 1).getConversionSpecifier().equals("*")) { + return null; + } + ++i; + continue; + } + DataType dt = convertPairToDataType(argument.getLengthModifier(), + argument.getConversionSpecifier()); + if (dt == null) { + return null; + } + if (!(dt instanceof PointerDataType) || + isVoidPointer(argument.getConversionSpecifier())) { + dataTypesList.add(dataTypeManager.getPointer(dt)); + } + else { + dataTypesList.add(dt); + } + } + return dataTypesList.stream().toArray(size -> new DataType[size]); + } + + private boolean verifyConversionPair(String lengthModifier, String conversionSpecifier) { + if (lengthModifier == null || lengthModifier.equals("l")) { + return true; + } + if ((lengthModifier.equals("L") && isDouble(conversionSpecifier)) || + (!lengthModifier.equals("L") && + (isInteger(conversionSpecifier) || isIntegerPointer(conversionSpecifier)))) { + return true; + } + return false; + } + + private DataType convertPairToDataType(String lengthModifier, String conversionSpecifier) { + + if (lengthModifier == null || conversionSpecifier.equals("c") || + conversionSpecifier.equals("s") || + conversionSpecifier.equals("C") || + conversionSpecifier.equals("S")) { + return conversionSpecifierToDataType(conversionSpecifier); + } + switch (lengthModifier) { + case "h": + return shortLengthModification(conversionSpecifier); + case "hh": + return charLengthModification(conversionSpecifier); + case "l": + return longLengthModification(conversionSpecifier); + case "ll": + case "q": + return longLongLengthModification(conversionSpecifier); + case "j": + return intmax_t_LengthModification(conversionSpecifier); + case "z": + return size_t_LengthModification(conversionSpecifier); + case "t": + return ptrdiff_t_LengthModification(conversionSpecifier); + case "L": + return longDoubleLengthModification(conversionSpecifier); + default: + return null; + } + } + + private DataType conversionSpecifierToDataType(String conversionSpecifier) { + switch (conversionSpecifier.charAt(0)) { + case 'd': + case 'i': + return new IntegerDataType(dataTypeManager); + case 'o': + case 'u': + case 'x': + case 'X': + return new UnsignedIntegerDataType(dataTypeManager); + case 'p': + return dataTypeManager.getPointer(DataType.VOID); + case 's': + return dataTypeManager.getPointer(new CharDataType(dataTypeManager)); + case 'n': + return dataTypeManager.getPointer(new IntegerDataType(dataTypeManager)); + case 'c': + return new UnsignedCharDataType(dataTypeManager); + case 'a': + case 'A': + case 'g': + case 'G': + case 'e': + case 'E': + case 'f': + return new DoubleDataType(dataTypeManager); + case 'S': + case 'C': + return dataTypeManager.getPointer(new WideCharDataType(dataTypeManager)); + default: + return null; + } + } + + private DataType longLengthModification(String conversionSpecifier) { + if (isIntegerPointer(conversionSpecifier)) { + return dataTypeManager.getPointer(new LongDataType(dataTypeManager)); + } + if (conversionSpecifier.contentEquals("s") || conversionSpecifier.contentEquals("c")) { + return dataTypeManager.getPointer(new WideCharDataType(dataTypeManager)); + } + return isSignedInteger(conversionSpecifier) ? new LongDataType(dataTypeManager) + : new UnsignedLongDataType(dataTypeManager); + } + + private DataType longLongLengthModification(String conversionSpecifier) { + if (isIntegerPointer(conversionSpecifier)) { + return dataTypeManager.getPointer(new LongLongDataType(dataTypeManager)); + } + return isSignedInteger(conversionSpecifier) + ? new LongLongDataType(dataTypeManager) + : new UnsignedLongLongDataType(dataTypeManager); + } + + private DataType shortLengthModification(String conversionSpecifier) { + if (isIntegerPointer(conversionSpecifier)) { + return dataTypeManager.getPointer(new ShortDataType(dataTypeManager)); + } + return isSignedInteger(conversionSpecifier) + ? new ShortDataType(dataTypeManager) + : new UnsignedShortDataType(dataTypeManager); + } + + private DataType charLengthModification(String conversionSpecifier) { + if (isIntegerPointer(conversionSpecifier)) { + return dataTypeManager.getPointer(new CharDataType(dataTypeManager)); + } + return isSignedInteger(conversionSpecifier) ? new CharDataType(dataTypeManager) + : new UnsignedCharDataType(dataTypeManager); + } + + private TypeDef lookupTypeDef(String name) { + List typeList = new ArrayList<>(); + dataTypeManager.findDataTypes(name, typeList); + for (DataType dt : typeList) { + if (!(dt instanceof TypeDef)) { + continue; + } + TypeDef td = (TypeDef) dt; + if (td.getBaseDataType() instanceof AbstractIntegerDataType) { + return td; + } + } + return null; + } + + private TypeDef getIntMaxT() { + if (intmax_t != null) { + return intmax_t; + } + intmax_t = lookupTypeDef(INTMAX_T_NAME); + if (intmax_t == null) { + intmax_t = new TypedefDataType(INTMAX_T_NAME, new LongLongDataType(dataTypeManager)); + Msg.warn(this, INTMAX_T_NAME + " not defined. Generated as `" + intmax_t + "'"); + } + return intmax_t; + } + + private TypeDef getUIntMaxT() { + if (uintmax_t != null) { + return uintmax_t; + } + uintmax_t = lookupTypeDef(UINTMAX_T_NAME); + if (uintmax_t == null) { + uintmax_t = + new TypedefDataType(UINTMAX_T_NAME, new UnsignedLongLongDataType(dataTypeManager)); + Msg.warn(this, UINTMAX_T_NAME + " not defined. Generated as `" + uintmax_t + "'"); + } + return uintmax_t; + } + + private AbstractIntegerDataType getIntegralPointerType(boolean signed) { + DataOrganization dataOrganization = dataTypeManager.getDataOrganization(); + int size = dataOrganization.getPointerSize(); + if (size < dataOrganization.getLongSize() && size >= dataOrganization.getIntegerSize()) { + return signed ? new IntegerDataType(dataTypeManager) + : new UnsignedIntegerDataType(dataTypeManager); + } + return signed ? new LongDataType(dataTypeManager) + : new UnsignedLongDataType(dataTypeManager); + } + + private TypeDef getSizeT() { + if (size_t != null) { + return size_t; + } + size_t = lookupTypeDef(SIZE_T_NAME); + if (size_t == null) { + size_t = new TypedefDataType(SIZE_T_NAME, getIntegralPointerType(false)); + Msg.warn(this, SIZE_T_NAME + " not defined. Generated as `" + size_t + "'"); + } + return size_t; + } + + private TypeDef getPtrDiffT() { + if (ptrdiff_t != null) { + return ptrdiff_t; + } + ptrdiff_t = lookupTypeDef(PTRDIFF_T_NAME); + if (ptrdiff_t == null) { + ptrdiff_t = new TypedefDataType(PTRDIFF_T_NAME, getIntegralPointerType(true)); + Msg.warn(this, PTRDIFF_T_NAME + " not defined. Generated as `" + ptrdiff_t + "'"); + } + return ptrdiff_t; + } + + private DataType intmax_t_LengthModification(String conversionSpecifier) { + TypeDef intType = isUnsignedInteger(conversionSpecifier) ? getUIntMaxT() : getIntMaxT(); + return isIntegerPointer(conversionSpecifier) + ? dataTypeManager.getPointer(intType) + : intType; + } + + private DataType size_t_LengthModification(String conversionSpecifier) { + TypeDef sizeType = getSizeT(); + return isIntegerPointer(conversionSpecifier) + ? dataTypeManager.getPointer(sizeType) + : sizeType; + } + + private DataType ptrdiff_t_LengthModification(String conversionSpecifier) { + TypeDef type = isUnsignedInteger(conversionSpecifier) ? getSizeT() : getPtrDiffT(); + return isIntegerPointer(conversionSpecifier) + ? dataTypeManager.getPointer(type) + : type; + } + + private DataType longDoubleLengthModification(String conversionSpecifier) { + return new LongDoubleDataType(dataTypeManager); + } + + private boolean isInteger(String conversionSpecifier) { + return isUnsignedInteger(conversionSpecifier) || isSignedInteger(conversionSpecifier); + } + + private boolean isDouble(String conversionSpecifier) { + char c = conversionSpecifier.charAt(0); + String doubleConversionSpecifierSet = "aAeEfFgG"; + return doubleConversionSpecifierSet.indexOf(c) != -1; + } + + private boolean isUnsignedInteger(String conversionSpecifier) { + char c = conversionSpecifier.charAt(0); + String unsignedIntSpecifierSet = "ouxX"; + return unsignedIntSpecifierSet.indexOf(c) != -1; + } + + private boolean isSignedInteger(String conversionSpecifier) { + char c = conversionSpecifier.charAt(0); + String signedIntSpecifierSet = "di"; + return signedIntSpecifierSet.indexOf(c) != -1; + } + + private boolean isIntegerPointer(String conversionSpecifier) { + char c = conversionSpecifier.charAt(0); + String pointerSpecifierSet = "n"; + return pointerSpecifierSet.indexOf(c) != -1; + } + + private boolean isVoidPointer(String conversionSpecifier) { + char c = conversionSpecifier.charAt(0); + String voidPointerSpecifierSet = "p"; + return voidPointerSpecifierSet.indexOf(c) != -1; + } + + private boolean isFlag(char c) { + String flagSpecifierSet = "0+ -#'"; + return flagSpecifierSet.indexOf(c) != -1; + } + + private String extendLengthModifier(String lengthModifier, char nextChar) { + if ((lengthModifier.equals("h") && nextChar == 'h') || + (lengthModifier.equals("l") && nextChar == 'l')) { + return lengthModifier + Character.toString(nextChar); + } + return null; + } + + private boolean isConversionSpecifier(char c) { + return detectConversionSpecifier(c) != null; + } + + private boolean isLengthModifier(char c) { + return detectLengthModifier(c) != null; + } + + private String detectLengthModifier(char c) { + String lengthModifierSet = "hljztLq"; + return lengthModifierSet.indexOf(c) != -1 ? Character.toString(c) : null; + } + + private String detectConversionSpecifier(char c) { + String conversionSpecifierSet = "diuofeaFEApcsxXgGnCS"; + return conversionSpecifierSet.indexOf(c) != -1 ? Character.toString(c) : null; + } + + public int skipToNextWhitespace(String formatStr, int i) { + char c = formatStr.charAt(i); + while (c != ' ') { + ++i; + c = formatStr.charAt(i); + } + return i; + } + + private class FormatParsingData { + + private String conversionSpecifier = null; + private String lengthModifier = null; + private boolean fieldWidthComplete = false; + private boolean precisionComplete = false; + private boolean inConversion = false; + private int parameterIndex = 0; + + private void setParameterIndex(int parameterIndex) { + this.parameterIndex = parameterIndex; + } + + private int getParameterIndex() { + return this.parameterIndex; + } + + private void setConversionSpecifier(String conversionSpecifier) { + this.conversionSpecifier = conversionSpecifier; + } + + private String getConversionSpecifier() { + return this.conversionSpecifier; + } + + private void setLengthModifier(String lengthModifier) { + this.lengthModifier = lengthModifier; + } + + private String getLengthModifier() { + return this.lengthModifier; + } + + private boolean isFieldWidthComplete() { + return this.fieldWidthComplete; + } + + private void setFieldWidthComplete(boolean fieldWidthComplete) { + this.fieldWidthComplete = fieldWidthComplete; + } + + private boolean isPrecisionComplete() { + return this.precisionComplete; + } + + private void setPrecisionComplete(boolean precisionComplete) { + this.precisionComplete = precisionComplete; + } + + private void setInConversion(boolean inConversion) { + this.inConversion = inConversion; + } + + private boolean isInConversion() { + return this.inConversion; + } + + private void clearData() { + this.precisionComplete = false; + this.fieldWidthComplete = false; + this.lengthModifier = null; + this.conversionSpecifier = null; + } + } + +} diff --git a/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FunctionCallData.java b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FunctionCallData.java new file mode 100644 index 0000000000..a8c82bb132 --- /dev/null +++ b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/FunctionCallData.java @@ -0,0 +1,68 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.string.variadic; + +import ghidra.program.model.address.*; + +/** + * Class for encapsulating a variadic function call + */ +public class FunctionCallData { + + private Address addressOfCall; + private String callFunctionName; + private String formatString; + + /** + * Constructore for FuncCallData + * + * @param addressOfCall Address of function call + * @param callFunctionName variadic function name + * @param formatString format String + */ + public FunctionCallData(Address addressOfCall, String callFunctionName, String formatString) { + this.addressOfCall = addressOfCall; + this.callFunctionName = callFunctionName; + this.formatString = formatString; + } + + /** + * addressOfCall getter + * + * @return addressOfCall + */ + public Address getAddressOfCall() { + return this.addressOfCall; + } + + /** + * callFunctionName getter + * + * @return callFunctionName + */ + public String getCallFuncName() { + return this.callFunctionName; + } + + /** + * formatString getter + * + * @return formatString + */ + public String getFormatString() { + return this.formatString; + } +} diff --git a/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/PcodeFunctionParser.java b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/PcodeFunctionParser.java new file mode 100644 index 0000000000..1eaaab6080 --- /dev/null +++ b/Ghidra/Features/DecompilerDependent/src/main/java/ghidra/app/plugin/core/string/variadic/PcodeFunctionParser.java @@ -0,0 +1,184 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.string.variadic; + +import java.util.*; + +import ghidra.docking.settings.SettingsImpl; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.StringDataInstance; +import ghidra.program.model.data.StringDataType; +import ghidra.program.model.listing.*; +import ghidra.program.model.mem.MemoryBufferImpl; +import ghidra.program.model.pcode.PcodeOpAST; +import ghidra.program.model.pcode.Varnode; + +/** + * Class for parsing functions' Pcode representations and finding variadic + * functions being called + * + */ +public class PcodeFunctionParser { + + // All values within the range [32, 126] are ascii readable + private static final int READABLE_ASCII_LOWER_BOUND = 32; + private static final int READABLE_ASCII_UPPER_BOUND = 126; + // How many bytes to read from a memory address when initial format + // String cannot be found. This normally only happens for short format + // Strings with lengths less than 5 + private static final int BUFFER_LENGTH = 20; + private static final String CALL_INSTRUCTION = "CALL"; + + private Program program; + + public PcodeFunctionParser(Program program) { + this.program = program; + } + + /** + * Takes pcode ops of a function and parses them to determine whether there are + * any calls to variadic functions that use format Strings. + * + * @param pcodeOps List of PcodeOpAST for a function + * @param addressToCandidateData map of Addresses to format String data + * @param variadicFunctionNames Set of variadic functions to look for + * @return List of variadic functions that the current function calls + */ + public List parseFunctionForCallData(List pcodeOps, + Map addressToCandidateData, Set variadicFunctionNames) { + + if (pcodeOps == null || addressToCandidateData == null || variadicFunctionNames == null || + this.program == null) { + return null; + } + List functionCallDataList = new ArrayList<>(); + for (PcodeOpAST ast : pcodeOps) { + Varnode firstNode = ast.getInput(0); + if (firstNode == null) { + continue; + } + if (ast.getMnemonic().contentEquals(CALL_INSTRUCTION)) { + + FunctionManager functionManager = this.program.getFunctionManager(); + Function function = functionManager.getFunctionAt(firstNode.getAddress()); + if (function == null) { + return null; + } + String functionName = function.getName(); + if (variadicFunctionNames.contains(functionName)) { + Varnode[] inputs = ast.getInputs(); + if (inputs.length > 0) { + boolean hasDefinedFormatString = searchForVariadicCallData(ast, + addressToCandidateData, functionCallDataList, functionName); + if (!hasDefinedFormatString) { + searchForHiddenFormatStrings(ast, functionCallDataList, functionName); + } + } + } + } + } + return functionCallDataList; + } + + private boolean searchForVariadicCallData(PcodeOpAST ast, + Map addressToCandidateData, List functionCallDataList, + String functionName) { + + boolean hasDefinedFormatString = false; + Varnode[] inputs = ast.getInputs(); + for (int i = 1; i < inputs.length; i++) { + Varnode v = inputs[i]; + Data data = null; + Address ramSpaceAddress = convertAddressToRamSpace(v.getAddress()); + if (addressToCandidateData.containsKey(ramSpaceAddress)) { + data = addressToCandidateData.get(ramSpaceAddress); + functionCallDataList.add(new FunctionCallData(ast.getSeqnum().getTarget(), + functionName, data.getDefaultValueRepresentation())); + hasDefinedFormatString = true; + } + } + return hasDefinedFormatString; + } + + // If addrToCandidateData doesn't have format String data for this call + // and we are calling a variadic function, parse the String to determine + // whether it's a format String. + private void searchForHiddenFormatStrings(PcodeOpAST ast, + List functionCallDataList, String functionName) { + + Varnode[] inputs = ast.getInputs(); + // Initialize i = 1 to skip first input + for (int i = 1; i < inputs.length; ++i) { + Varnode v = inputs[i]; + String formatStringCandidate = findFormatString(v.getAddress()); + if (formatStringCandidate == null) { + continue; + } + if (formatStringCandidate.contains("%")) { + functionCallDataList.add(new FunctionCallData(ast.getSeqnum().getTarget(), + functionName, formatStringCandidate)); + } + break; + } + } + + private Address convertAddressToRamSpace(Address address) { + + String addressString = address.toString(false); + return this.program.getAddressFactory().getAddress(addressString); + } + + /** + * Looks at bytes at given address and converts to format String + * + * @param address Address of format String + * @return format String + */ + private String findFormatString(Address address) { + + if (!address.getAddressSpace().isConstantSpace()) { + return null; + } + + // Old address associated with constant space which doesn't work + Address ramSpaceAddress = convertAddressToRamSpace(address); + + MemoryBufferImpl memoryBuffer = + new MemoryBufferImpl(this.program.getMemory(), ramSpaceAddress); + SettingsImpl settings = new SettingsImpl(); + + StringDataInstance stringDataInstance = StringDataInstance + .getStringDataInstance(new StringDataType(), memoryBuffer, settings, BUFFER_LENGTH); + String stringValue = stringDataInstance.getStringValue(); + if (stringValue == null) { + return null; + } + + String formatStringCandidate = ""; + for (int i = 0; i < stringValue.length(); i++) { + if (!isAsciiReadable(stringValue.charAt(i))) { + break; + } + formatStringCandidate += stringValue.charAt(i); + } + return formatStringCandidate; + } + + private boolean isAsciiReadable(char c) { + + return c >= READABLE_ASCII_LOWER_BOUND && c <= READABLE_ASCII_UPPER_BOUND; + } +} diff --git a/Ghidra/Features/DecompilerDependent/src/test/java/ghidra/app/plugin/core/string/variadic/FormatStringParserTest.java b/Ghidra/Features/DecompilerDependent/src/test/java/ghidra/app/plugin/core/string/variadic/FormatStringParserTest.java new file mode 100644 index 0000000000..a6aceccd47 --- /dev/null +++ b/Ghidra/Features/DecompilerDependent/src/test/java/ghidra/app/plugin/core/string/variadic/FormatStringParserTest.java @@ -0,0 +1,332 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.string.variadic; + +import static org.junit.Assert.*; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +import generic.test.AbstractGenericTest; +import ghidra.program.database.ProgramBuilder; +import ghidra.program.database.ProgramDB; +import ghidra.program.database.data.ProgramDataTypeManager; +import ghidra.program.model.data.*; + +public class FormatStringParserTest extends AbstractGenericTest { + + private ProgramBuilder builder; + private ProgramDB program; + + @Before + public void setUp() throws Exception { + + builder = new ProgramBuilder("FormatStringParserTest", ProgramBuilder._TOY, this); + assertNotNull(builder); + program = builder.getProgram(); + assertNotNull(program); + + } + + // Determines whether null is properly returned for + // invalid format Strings. Each String is invalid due to + // either (1) invalid conversion specifier, (2) invalid + // length modifier, or (3) placeholder incorrectly used + @Test + public void testInvalidFormatString() { + + runFormatTest("%r", null, true); // r is not a conversion specifier + runFormatTest("%%%lw", null, true); // w is not a conversion specifier + runFormatTest("%#0*.*ld", null, false); // scanf doesn't use flags or period + runFormatTest("%d::%%%ld%z", null, true); // z is not a conversion specifier + runFormatTest("thisisatest%%%#**u", null, true); // two consecutive astericks + runFormatTest("%#0'*rd", null, true); // r is not length modifier + runFormatTest("%%%#'*md", null, true); // m is not length modifier + runFormatTest("%*.**d", null, true); // two consecutive astericks + runFormatTest("%lD", null, true); // D is not a conversion specifier + runFormatTest("%-0+**d", null, false); // scanf doesn't use flags, two consecutive astericks + runFormatTest("%-0+*.*d", null, false); // scanf doesn't use flags or period + runFormatTest("%2.3d", null, false); // scanf doesn't use period + runFormatTest("%*1$d %d\n", null, true); // If one placeholder specifies parameter, the others must too + runFormatTest("%2$d %d\n", null, true); // If one placeholder specifies parameter, the others must too + + } + + // Tests format strings for scanf which have expected types of pointers instead + // of standard format strings + @Test + public void testScanfFormatString() { + + DataType[] expectedTypes1 = + { program.getDataTypeManager().getPointer(new IntegerDataType()) }; + runFormatTest("%d", expectedTypes1, false); + DataType[] expectedTypes2 = + { program.getDataTypeManager().getPointer(new IntegerDataType()), + program.getDataTypeManager().getPointer(new ShortDataType()) }; + + runFormatTest("%d%hi", expectedTypes2, false); + + DataType[] expectedTypes3 = + { program.getDataTypeManager().getPointer(new PointerDataType(DataType.VOID)), + program.getDataTypeManager().getPointer(new CharDataType()) }; + runFormatTest("%p%*d%s", expectedTypes3, false); + + DataType[] expectedTypes4 = + { program.getDataTypeManager().getPointer(new LongDoubleDataType()), + program.getDataTypeManager().getPointer(new CharDataType()), + program.getDataTypeManager().getPointer(new PointerDataType(DataType.VOID)) }; + + runFormatTest("!:%12La%*d+=%2s%3p%*20d", expectedTypes4, false); + + } + + // Tests format strings that are more complex, containing less commonly + // used format patterns and more '%' characters + @Test + public void testComplexFormatString() { + DataType[] expectedTypes1 = + { program.getDataTypeManager().getPointer(new IntegerDataType()), }; + runFormatTest("#12%n\nd2", expectedTypes1, true); + + DataType[] expectedTypes2 = + { program.getDataTypeManager().getPointer(new CharDataType()), new LongDataType() }; + runFormatTest("#thisisatest%+-4.12s%#.1lin\nd2", expectedTypes2, true); + + DataType[] expectedTypes3 = + { new PointerDataType(DataType.VOID), new LongDoubleDataType(), + new UnsignedCharDataType() }; + runFormatTest("%01.3pp%%%#1.2Lg%%%%%hhXxn2", expectedTypes3, true); + + DataType[] expectedTypes4 = { new IntegerDataType(), new IntegerDataType(), + new UnsignedCharDataType(), new IntegerDataType(), new LongDoubleDataType() }; + runFormatTest("%0#+-*.*hhX%%%.*La", expectedTypes4, true); + DataType[] expectedTypes5 = { new IntegerDataType(), + + program.getDataTypeManager().getPointer(new IntegerDataType()), new IntegerDataType(), + program.getDataTypeManager().getPointer(new WideCharDataType()), new IntegerDataType(), + new LongDoubleDataType() }; + runFormatTest("%.*n%*C%%%%%.*LE", expectedTypes5, true); + + } + + // Tests format strings that use astericks to add another int + // argument to determine field width or precision + @Test + public void testAsterickFormatString() { + DataType[] expectedTypes1 = { new IntegerDataType(), new IntegerDataType() }; + runFormatTest("%*d", expectedTypes1, true); + + DataType[] expectedTypes2 = { new IntegerDataType(), new LongDataType() }; + runFormatTest("%.*ld", expectedTypes2, true); + + DataType[] expectedTypes3 = + { new IntegerDataType(), new IntegerDataType(), new IntegerDataType() }; + runFormatTest("%*.*d", expectedTypes3, true); + DataType[] expectedTypes4 = + { new IntegerDataType(), new IntegerDataType(), new IntegerDataType() }; + runFormatTest("*%%%+-*.*d", expectedTypes4, true); + + } + + // Test simple format strings with different length modifiers + @Test + public void testLengthModifierFormatString() { + DataType[] expectedTypes1 = + { new LongDataType(), new PointerDataType(LongDataType.dataType) }; + runFormatTest("%ld %ln", expectedTypes1, true); + + DataType[] expectedTypes2 = + { new ShortDataType(), new CharDataType(), new PointerDataType(ShortDataType.dataType), + new PointerDataType(CharDataType.dataType) }; + runFormatTest("%hd %hhi %hn %hhn", expectedTypes2, true); + + DataType[] expectedTypes3 = { new UnsignedShortDataType(), new UnsignedCharDataType() }; + runFormatTest("%hx %hhu", expectedTypes3, true); + + DataType[] expectedTypes4 = + { new UnsignedLongDataType(), new LongLongDataType(), new UnsignedLongLongDataType(), + new PointerDataType(LongLongDataType.dataType) }; + runFormatTest("%lX %lld %llx %lln", expectedTypes4, true); + + DataType[] expectedTypes5 = + { new LongDoubleDataType(), new LongLongDataType(), new UnsignedLongLongDataType(), + new UnsignedShortDataType(), new UnsignedCharDataType() }; + runFormatTest("%LE %lli %llX %hu %hhX", expectedTypes5, true); + } + + // Test simple format strings with different special length modifiers + // using generated default typedefs + @Test + public void testSpecialLengthModifierFormatStringDefault() { + DataType[] expectedTypes1 = + { new TypedefDataType("size_t", UnsignedLongDataType.dataType) }; + runFormatTest("%zd", expectedTypes1, true); + + DataType[] expectedTypes2 = + { new TypedefDataType("size_t", UnsignedLongDataType.dataType) }; + runFormatTest("%zu", expectedTypes2, true); + + DataType[] expectedTypes3 = { new TypedefDataType("ptrdiff_t", LongDataType.dataType) }; + runFormatTest("%td", expectedTypes3, true); + + DataType[] expectedTypes4 = + { new TypedefDataType("size_t", UnsignedLongDataType.dataType) }; + runFormatTest("%tu", expectedTypes4, true); + + DataType[] expectedTypes5 = { new TypedefDataType("intmax_t", LongLongDataType.dataType) }; + runFormatTest("%jd", expectedTypes5, true); + + DataType[] expectedTypes6 = + { new TypedefDataType("uintmax_t", UnsignedLongLongDataType.dataType) }; + runFormatTest("%ju", expectedTypes6, true); + + DataType[] expectedTypes7 = + { new PointerDataType(new TypedefDataType("intmax_t", LongLongDataType.dataType)) }; + runFormatTest("%jn", expectedTypes7, true); + } + + // Test simple format strings with different special length modifiers + // using predefined typedefs + @Test + public void testSpecialLengthModifierFormatStringPredefined() { + + int txId = program.startTransaction("Add TypeDefs"); + try { + ProgramDataTypeManager dtm = program.getDataTypeManager(); + DataType sizetDt = + dtm.resolve(new TypedefDataType("size_t", UnsignedLongLongDataType.dataType), null); + DataType ptrdiftDt = + dtm.resolve(new TypedefDataType("ptrdiff_t", LongLongDataType.dataType), null); + DataType intmaxtDt = + dtm.resolve(new TypedefDataType("intmax_t", LongDataType.dataType), null); + DataType uintmaxtDt = + dtm.resolve(new TypedefDataType("uintmax_t", UnsignedLongDataType.dataType), null); + + DataType[] expectedTypes1 = { sizetDt }; + runFormatTest("%zd", expectedTypes1, true); + + DataType[] expectedTypes2 = { sizetDt }; + runFormatTest("%zu", expectedTypes2, true); + + DataType[] expectedTypes3 = { ptrdiftDt }; + runFormatTest("%td", expectedTypes3, true); + + DataType[] expectedTypes4 = { sizetDt }; + runFormatTest("%tu", expectedTypes4, true); + + DataType[] expectedTypes5 = { intmaxtDt }; + runFormatTest("%jd", expectedTypes5, true); + + DataType[] expectedTypes6 = { uintmaxtDt }; + runFormatTest("%ju", expectedTypes6, true); + + DataType[] expectedTypes7 = { new PointerDataType(intmaxtDt) }; + runFormatTest("%jn", expectedTypes7, true); + + } + finally { + program.endTransaction(txId, true); + } + } + + // Test simple format Strings with different conversion specifiers + @Test + public void testConversionSpecFormatString() { + DataType[] expectedTypes1 = { new IntegerDataType() }; + runFormatTest("%d", expectedTypes1, true); + + DataType[] expectedTypes2 = + { new IntegerDataType(), new IntegerDataType(), new UnsignedIntegerDataType(), + program.getDataTypeManager().getPointer(new CharDataType()) }; + runFormatTest("%i %i %x %s", expectedTypes2, true); + + DataType[] expectedTypes3 = { new IntegerDataType(), new IntegerDataType(), + program.getDataTypeManager().getPointer(new CharDataType()) }; + runFormatTest("%d %d %s", expectedTypes3, true); + + DataType[] expectedTypes4 = { new DoubleDataType(), new DoubleDataType(), + new DoubleDataType(), new DoubleDataType(), new UnsignedCharDataType() }; + runFormatTest("%e %f %E %G %c", expectedTypes4, true); + + DataType[] expectedTypes5 = { new UnsignedIntegerDataType(), new UnsignedIntegerDataType(), + new UnsignedIntegerDataType(), new DoubleDataType(), new DoubleDataType() }; + runFormatTest("%u %x %X %e %g", expectedTypes5, true); + DataType[] expectedTypes6 = { new IntegerDataType() }; + runFormatTest("%.d", expectedTypes6, true); + } + + // Format Strings with field widths indicated by the sequence "*m$" + // where m is an integer that determines the position in the argument + // list of an integer argument + @Test + public void testFormatParameters() { + DataType[] expectedTypes1 = { new IntegerDataType() }; + runFormatTest("%1$d", expectedTypes1, true); + + DataType[] expectedTypes2 = { new IntegerDataType(), new IntegerDataType() }; + runFormatTest("%1$*2$d", expectedTypes2, true); + + DataType[] expectedTypes3 = { new IntegerDataType(), new IntegerDataType() }; + runFormatTest("%1$.*2$d", expectedTypes3, true); + + DataType[] expectedTypes4 = { new IntegerDataType(), new IntegerDataType(), + new IntegerDataType(), new IntegerDataType() }; + runFormatTest("%1$d:%2$.*3$d:%4$.*3$d\n", expectedTypes4, true); + DataType[] expectedTypes5 = + { new UnsignedIntegerDataType(), new UnsignedIntegerDataType() }; + + runFormatTest("%2$d %2$#x; %1$d %1$#x", expectedTypes5, true); + + DataType[] expectedTypes6 = + { new UnsignedIntegerDataType(), new IntegerDataType(), new IntegerDataType() }; + runFormatTest("%2$+#*3$d:%2$#x;0-:'.~%1$0*2$d:!2%1$#x", expectedTypes6, true); + DataType[] expectedTypes7 = + { new UnsignedLongLongDataType(), new DoubleDataType(), new IntegerDataType() }; + runFormatTest("%2$+#*3$f:*;`2!%1$#qu", expectedTypes7, true); + } + + private void runFormatTest(String testString, DataType[] expected, boolean runOutputAnalyzer) { + + FormatStringParser parser = new FormatStringParser(program); + List formatArguments = + parser.convertToFormatArgumentList(testString, runOutputAnalyzer); + DataType[] dataTypes = runOutputAnalyzer ? parser.convertToOutputDataTypes(formatArguments) + : parser.convertToInputDataTypes(formatArguments); + assertEquivalent(dataTypes, expected); + + } + + private void assertEquivalent(DataType[] actual, DataType[] expected) { + + if (expected == null) { + assertNull(actual); + return; + } + assertNotNull("Expected args were not produced", actual); + assertNotNull("Unexpected args were produced", expected); + assertEquals("Expected arg count differs from actual", actual.length, expected.length); + + for (int i = 0; i < actual.length; i++) { + assertNotNull("Unexpected null arg returned", actual[i]); + if (!actual[i].isEquivalent(expected[i])) { + fail("Expected: " + expected[i] + ", Actual: " + actual[i]); + } + } + } + +}