diff --git a/Ghidra/Debug/Framework-TraceModeling/src/test/java/ghidra/pcode/exec/trace/TracePcodeEmulatorTest.java b/Ghidra/Debug/Framework-TraceModeling/src/test/java/ghidra/pcode/exec/trace/TracePcodeEmulatorTest.java index 364a4e3186..c306412aa6 100644 --- a/Ghidra/Debug/Framework-TraceModeling/src/test/java/ghidra/pcode/exec/trace/TracePcodeEmulatorTest.java +++ b/Ghidra/Debug/Framework-TraceModeling/src/test/java/ghidra/pcode/exec/trace/TracePcodeEmulatorTest.java @@ -38,6 +38,7 @@ import ghidra.program.model.lang.*; import ghidra.program.model.listing.Instruction; import ghidra.test.AbstractGhidraHeadlessIntegrationTest; import ghidra.trace.database.ToyDBTraceBuilder; +import ghidra.trace.database.context.DBTraceRegisterContextManager; import ghidra.trace.model.memory.TraceMemoryFlag; import ghidra.trace.model.memory.TraceMemoryManager; import ghidra.trace.model.thread.TraceThread; @@ -864,17 +865,13 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "x86:LE:64:default")) { Language lang = tb.trace.getBaseLanguage(); Register ctxReg = lang.getContextBaseRegister(); - Register opsizeReg = lang.getRegister("opsize"); - Register addrsizeReg = lang.getRegister("addrsize"); Register longModeReg = lang.getRegister("longMode"); RegisterValue ctxVal = new RegisterValue(ctxReg) - .assign(opsizeReg, BigInteger.ONE) - .assign(addrsizeReg, BigInteger.ONE) .assign(longModeReg, BigInteger.ZERO); + DBTraceRegisterContextManager ctxManager = tb.trace.getRegisterContextManager(); try (UndoableTransaction tid = tb.startTransaction()) { - tb.trace.getRegisterContextManager() - .setValue(lang, ctxVal, Range.atLeast(0L), - tb.range(0x00400000, 0x00400002)); + ctxManager.setValue(lang, ctxVal, Range.atLeast(0L), + tb.range(0x00400000, 0x00400002)); } TraceThread thread = initTrace(tb, List.of( @@ -891,6 +888,8 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes TracePcodeEmulator emu = new TracePcodeEmulator(tb.trace, 0); PcodeThread emuThread = emu.newThread(thread.getPath()); + // TODO: Seems the Trace-bound thread ought to know to do this in reInitialize() + ctxVal = ctxManager.getValueWithDefault(lang, ctxReg, 0, tb.addr(0x00400000)); emuThread.overrideContext(ctxVal); emuThread.stepInstruction(); emuThread.stepInstruction(); diff --git a/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java b/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java index 91d3624597..d3bb842409 100644 --- a/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java +++ b/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java @@ -63,7 +63,7 @@ public class AssemblyThrasherDevScript extends GhidraScript { } @Override - public AssemblyResolvedConstructor select(AssemblyResolutionResults rr, + public AssemblyResolvedPatterns select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx) throws AssemblySemanticException { StringBuilder sb = new StringBuilder(); boolean gotOne = false; @@ -72,7 +72,7 @@ public class AssemblyThrasherDevScript extends GhidraScript { if (ar.isError()) { continue; } - AssemblyResolvedConstructor can = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns can = (AssemblyResolvedPatterns) ar; if (can.getContext().combine(ctx) == null) { continue; } diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/assembler/AssemblyDualTextField.java b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/assembler/AssemblyDualTextField.java index 64a4147d2f..1b039ebfc9 100644 --- a/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/assembler/AssemblyDualTextField.java +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/plugin/core/assembler/AssemblyDualTextField.java @@ -704,7 +704,7 @@ public class AssemblyDualTextField { * @param existing the instruction, if any, currently under the user's cursor * @return a preference */ - protected int computePreference(AssemblyResolvedConstructor rc, Instruction existing) { + protected int computePreference(AssemblyResolvedPatterns rc, Instruction existing) { if (existing == null) { return 0; } @@ -763,7 +763,7 @@ public class AssemblyDualTextField { //result.add(new AssemblyError("", ar.toString())); continue; } - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar; for (byte[] ins : rc.possibleInsVals(ctx)) { result.add(new AssemblyInstruction(text, Arrays.copyOf(ins, ins.length), computePreference(rc, existing))); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java index 6e9728ecbc..0c0e30d50b 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java @@ -42,7 +42,8 @@ public interface Assembler { * refer to pseudo instructions. * *

- * NOTE: There must be an active transaction on the bound program for this method to succeed. + * NOTE: There must be an active transaction on the bound program for this method to + * succeed. * * @param at the location where the resulting instructions should be placed * @param listing a new-line separated or array sequence of instructions @@ -119,8 +120,8 @@ public interface Assembler { * results. * *

- * NOTE: The resolved instructions are given as masks and values. Where the mask does not cover, - * you can choose any value. + * NOTE: The resolved instructions are given as masks and values. Where the mask does not + * cover, you can choose any value. * * @param parse a parse result giving a valid tree * @param at the location of the start of the instruction @@ -139,8 +140,8 @@ public interface Assembler { * results. * *

- * NOTE: The resolved instructions are given as masks and values. Where the mask does not cover, - * you can choose any value. + * NOTE: The resolved instructions are given as masks and values. Where the mask does not + * cover, you can choose any value. * * @param parse a parse result giving a valid tree * @param at the location of the start of the instruction @@ -192,7 +193,7 @@ public interface Assembler { * @return the new {@link Instruction} code unit * @throws MemoryAccessException there is an issue writing the result to program memory */ - public Instruction patchProgram(AssemblyResolvedConstructor res, Address at) + public Instruction patchProgram(AssemblyResolvedPatterns res, Address at) throws MemoryAccessException; /** diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java index 530d3442c4..c29b8cee42 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java @@ -25,18 +25,21 @@ import ghidra.program.model.listing.Program; public interface AssemblerBuilder { /** * Get the ID of the language for which this instance builds an assembler + * * @return the language ID */ public LanguageID getLanguageID(); /** * Get the language for which this instance builds an assembler + * * @return the language */ public Language getLanguage(); /** * Build an assembler with the given selector callback + * * @param selector the selector callback * @return the built assembler */ @@ -44,6 +47,7 @@ public interface AssemblerBuilder { /** * Build an assembler with the given selector callback and program binding + * * @param selector the selector callback * @param program the bound program * @return the built assembler diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java index 7c0ee4ec89..88ec0ceac7 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java @@ -19,19 +19,20 @@ import java.util.*; import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult; import ghidra.app.plugin.assembler.sleigh.sem.*; -import ghidra.app.plugin.assembler.sleigh.util.SleighUtil; +import ghidra.app.plugin.assembler.sleigh.util.AsmUtil; /** - * Provides a mechanism for pruning and selecting binary assembled instructions from the results - * of parsing textual assembly instructions. There are two opportunities: After parsing, but before - * semantic resolution, and after resolution. In the first opportunity, filtering is optional --- - * the user may discard any or all parse trees. The second is required, since only one instruction - * may be placed at the desired address --- the user must select one instruction among the many - * results, and if a mask is present, decide on a value for the omitted bits. + * Provides a mechanism for pruning and selecting binary assembled instructions from the results of + * parsing textual assembly instructions. There are two opportunities: After parsing, but before + * prototype generation, and after machine code generation. In the first opportunity, filtering is + * optional --- the user may discard any or all parse trees. The second is required, since only one + * instruction may be placed at the desired address --- the user must select one instruction among + * the many results, and if a mask is present, decide on a value for the omitted bits. * + *

* Extensions of this class are also suitable for collecting diagnostic information about attempted - * assemblies. For example, an implementation may employ the syntax errors in order to produce - * code completion suggestions in a GUI. + * assemblies. For example, an implementation may employ the syntax errors in order to produce code + * completion suggestions in a GUI. */ public class AssemblySelector { protected Set syntaxErrors = new TreeSet<>(); @@ -40,7 +41,7 @@ public class AssemblySelector { /** * A comparator on instruction length (shortest first), then bits lexicographically */ - protected Comparator compareBySizeThenBits = (a, b) -> { + protected Comparator compareBySizeThenBits = (a, b) -> { int result; result = a.getInstructionLength() - b.getInstructionLength(); if (result != 0) { @@ -48,7 +49,7 @@ public class AssemblySelector { } result = - SleighUtil.compareArrays(a.getInstruction().getVals(), b.getInstruction().getVals()); + AsmUtil.compareArrays(a.getInstruction().getVals(), b.getInstruction().getVals()); if (result != 0) { return result; } @@ -58,16 +59,20 @@ public class AssemblySelector { /** * Filter a collection of parse trees. * - * Generally, the assembly resolver considers every possible parsing of an assembly - * instruction. If, for some reason, the user wishes to ignore certain trees (perhaps for - * efficiency, or perhaps because a certain form of instruction is desired), entire parse - * trees may be pruned here. + *

+ * Generally, the assembly resolver considers every possible parsing of an assembly instruction. + * If, for some reason, the user wishes to ignore certain trees (perhaps for efficiency, or + * perhaps because a certain form of instruction is desired), entire parse trees may be pruned + * here. * - * It's possible that no trees pass the filter. In this case, this method ought to throw an - * {@link AssemblySyntaxException}. Another option is to pass the erroneous result on for semantic - * analysis, in which case, the error is simply copied into an erroneous semantic result. - * Depending on preferences, this may simplify the overall filtering and error-handling logic. + *

+ * It is possible that no trees pass the filter. In this case, this method ought to throw an + * {@link AssemblySyntaxException}. Another option is to pass the erroneous result on for + * semantic analysis, in which case, the error is simply copied into an erroneous semantic + * result. Depending on preferences, this may simplify the overall filtering and error-handling + * logic. * + *

* By default, no filtering is applied. If all the trees produce syntax errors, an exception is * thrown. * @@ -95,10 +100,12 @@ public class AssemblySelector { /** * Select an instruction from the possible results. * - * Must select precisely one resolved constructor from the results given back by the assembly - * resolver. Precisely one. That means the mask of the returned result must consist of all 1s. - * Also, if no selection is suitable, an exception must be thrown. + *

+ * This must select precisely one resolved constructor from the results given back by the + * assembly resolver. This further implies the mask of the returned result must consist of all + * 1s. If no selection is suitable, this must throw an exception. * + *

* By default, this method selects the shortest instruction that is compatible with the given * context and takes 0 for bits that fall outside the mask. If all possible resolutions produce * errors, an exception is thrown. @@ -106,18 +113,18 @@ public class AssemblySelector { * @param rr the collection of resolved constructors * @param ctx the applicable context. * @return a single resolved constructor with a full instruction mask. - * @throws AssemblySemanticException + * @throws AssemblySemanticException */ - public AssemblyResolvedConstructor select(AssemblyResolutionResults rr, + public AssemblyResolvedPatterns select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx) throws AssemblySemanticException { - List sorted = new ArrayList<>(); + List sorted = new ArrayList<>(); // Select only non-erroneous results whose contexts are compatible. for (AssemblyResolution ar : rr) { if (ar.isError()) { semanticErrors.add((AssemblyResolvedError) ar); continue; } - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar; sorted.add(rc); } if (sorted.isEmpty()) { @@ -127,9 +134,9 @@ public class AssemblySelector { sorted.sort(compareBySizeThenBits); // Pick just the first - AssemblyResolvedConstructor res = sorted.get(0); + AssemblyResolvedPatterns res = sorted.get(0); // Just set the mask to ffs (effectively choosing 0 for the omitted bits) return AssemblyResolution.resolved(res.getInstruction().fillMask(), res.getContext(), - "Selected", null); + "Selected", null, null, null); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySemanticException.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySemanticException.java index 57c710ba40..8dce8ad56a 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySemanticException.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySemanticException.java @@ -26,6 +26,7 @@ import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedError; /** * Thrown when all resolutions of an assembly instruction result in semantic errors. * + *

* For SLEIGH, semantic errors amount to incompatible contexts */ public class AssemblySemanticException extends AssemblyException { @@ -37,6 +38,7 @@ public class AssemblySemanticException extends AssemblyException { /** * Construct a semantic exception with the associated semantic errors + * * @param errors the associated semantic errors */ public AssemblySemanticException(Set errors) { @@ -46,6 +48,7 @@ public class AssemblySemanticException extends AssemblyException { /** * Get the collection of associated semantic errors + * * @return the collection */ public Collection getErrors() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySyntaxException.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySyntaxException.java index 8ed2683b5e..9e6e8abf2e 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySyntaxException.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySyntaxException.java @@ -35,6 +35,7 @@ public class AssemblySyntaxException extends AssemblyException { /** * Construct a syntax exception with the associated syntax errors + * * @param errors the associated syntax errors */ public AssemblySyntaxException(Set errors) { @@ -44,6 +45,7 @@ public class AssemblySyntaxException extends AssemblyException { /** * Get the collection of associated syntax errors + * * @return the collection */ public Collection getErrors() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssembler.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssembler.java index 0eaedd1eb1..6a7aca2be3 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssembler.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssembler.java @@ -17,11 +17,12 @@ package ghidra.app.plugin.assembler.sleigh; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.*; +import java.util.Collection; import ghidra.app.plugin.assembler.*; import ghidra.app.plugin.assembler.sleigh.parse.*; import ghidra.app.plugin.assembler.sleigh.sem.*; +import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNumericSymbols; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.program.disassemble.Disassembler; @@ -32,17 +33,16 @@ import ghidra.program.model.lang.RegisterValue; import ghidra.program.model.listing.*; import ghidra.program.model.mem.Memory; import ghidra.program.model.mem.MemoryAccessException; -import ghidra.program.model.symbol.*; import ghidra.util.task.TaskMonitor; /** * An {@link Assembler} for a {@link SleighLanguage}. * - * To obtain one of these, please use {@link SleighAssemblerBuilder}, or better yet, the static - * methods of {@link Assemblers}. + *

+ * For documentation on how the SLEIGH assembler works, see {@link SleighAssemblerBuilder}. To use + * the assembler, please use {@link Assemblers#getAssembler(Program)} or similar. */ public class SleighAssembler implements Assembler { - public static final int DEFAULT_MAX_RECURSION_DEPTH = 2; // TODO: Toss this protected static final DbgTimer dbg = DbgTimer.INACTIVE; protected AssemblySelector selector; @@ -75,7 +75,8 @@ public class SleighAssembler implements Assembler { /** * Construct a SleighAssembler. * - * NOTE: This variant does not permit {@link #assemble(Address, String...)}. + *

+ * NOTE: This variant does not permit {@link #assemble(Address, String...)}. * * @param selector a method of selecting one result from many * @param lang the SLEIGH language (must be same as to create the parser) @@ -93,7 +94,7 @@ public class SleighAssembler implements Assembler { } @Override - public Instruction patchProgram(AssemblyResolvedConstructor res, Address at) + public Instruction patchProgram(AssemblyResolvedPatterns res, Address at) throws MemoryAccessException { if (!res.getInstruction().isFullMask()) { throw new AssemblySelectionError("Selected instruction must have a full mask."); @@ -157,7 +158,7 @@ public class SleighAssembler implements Assembler { @Override public Collection parseLine(String line) { - return parser.parse(line, getProgramLabels()); + return parser.parse(line, getNumericSymbols()); } @Override @@ -173,13 +174,13 @@ public class SleighAssembler implements Assembler { if (parse.isError()) { AssemblyResolutionResults results = new AssemblyResolutionResults(); AssemblyParseErrorResult err = (AssemblyParseErrorResult) parse; - results.add(AssemblyResolution.error(err.describeError(), "Parsing", null)); + results.add(AssemblyResolution.error(err.describeError(), "Parsing")); return results; } AssemblyParseAcceptResult acc = (AssemblyParseAcceptResult) parse; AssemblyTreeResolver tr = - new AssemblyTreeResolver(lang, at.getOffset(), acc.getTree(), ctx, ctxGraph); + new AssemblyTreeResolver(lang, at, acc.getTree(), ctx, ctxGraph); return tr.resolve(); } @@ -219,7 +220,7 @@ public class SleighAssembler implements Assembler { public byte[] assembleLine(Address at, String line, AssemblyPatternBlock ctx) throws AssemblySemanticException, AssemblySyntaxException { AssemblyResolutionResults results = resolveLine(at, line, ctx); - AssemblyResolvedConstructor res = selector.select(results, ctx); + AssemblyResolvedPatterns res = selector.select(results, ctx); if (res == null) { throw new AssemblySelectionError( "Must select exactly one instruction. Report errors via AssemblySemanticError"); @@ -234,37 +235,15 @@ public class SleighAssembler implements Assembler { } /** - * A convenience to obtain a map of program labels strings to long values + * A convenience to obtain assembly symbols * * @return the map - * - * {@literal TODO Use a Map instead so that, if possible, symbol values can be checked} - * lest they be an invalid substitution for a given operand. */ - protected Map getProgramLabels() { - Map labels = new HashMap<>(); - for (Register reg : lang.getRegisters()) { - // TODO/HACK: There ought to be a better mechanism describing suitable symbolic - // substitutions for a given operand. - if (!"register".equals(reg.getAddressSpace().getName())) { - labels.put(reg.getName(), (long) reg.getOffset()); - } - } + protected AssemblyNumericSymbols getNumericSymbols() { if (program != null) { - final SymbolIterator it = program.getSymbolTable().getAllSymbols(false); - while (it.hasNext()) { - Symbol sym = it.next(); - if (sym.isExternal()) { - continue; // skip externals - will generally be referenced indirectly not directly - } - SymbolType symbolType = sym.getSymbolType(); - if (symbolType != SymbolType.LABEL && symbolType != SymbolType.FUNCTION) { - continue; - } - labels.put(sym.getName(), sym.getAddress().getOffset()); - } + return AssemblyNumericSymbols.fromProgram(program); } - return labels; + return AssemblyNumericSymbols.fromLanguage(lang); } @Override diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssemblerBuilder.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssemblerBuilder.java index b61f114907..2c1612ecc4 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssemblerBuilder.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/SleighAssemblerBuilder.java @@ -24,8 +24,7 @@ import ghidra.app.plugin.assembler.*; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential; import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParser; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyContextGraph; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyDefaultContext; +import ghidra.app.plugin.assembler.sleigh.sem.*; import ghidra.app.plugin.assembler.sleigh.symbol.*; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx; @@ -43,57 +42,267 @@ import ghidra.util.SystemUtilities; /** * An {@link AssemblerBuilder} capable of supporting almost any {@link SleighLanguage} * + *

* To build an assembler, please use a static method of the {@link Assemblers} class. * - * SLEIGH-based assembly is a bit of an experimental feature at this time. Nevertheless, it seems to - * have come along quite nicely. It's not quite as fast as disassembly, since after all, that's what - * SLEIGH was designed to do. + *

+ * SLEIGH-based assembly is a bit temperamental, since it essentially runs the disassembler + * backwards. The process is tenuous, but works well enough for interactive single-instruction + * assembly. It is not nearly as fast as disassembly, since after all, SLEIGH was not designed for + * assembly. The assembler is great for interactive patching and for building small samples in unit + * tests. For other cases, a real tool chain is likely more appropriate. * - * Overall, the method is fairly simple, though its implementation is a bit more complex. First, we - * gather every pair of pattern and constructor by traversing the decision tree used by disassembly. - * We then use the "print pieces" to construct a context-free grammar. Each production is associated - * with the one-or-more constructors with the same sequence of print pieces. We then build a LALR(1) - * parser for the generated grammar. This now constitutes a generic parser for the given language. - * Note that this step takes some time, and may be better suited as a build-time step. Because - * SLEIGH specifications are not generally concerned with eliminating ambiguity of printed - * instructions (rather, it only does so for instruction bytes), we must consider that the grammar - * could be ambiguous. To handle this, the action/goto table is permitted multiple entries per cell, - * and we allow backtracking. There are also cases where tokens are not actually separated by - * spaces. For example, in the {@code ia.sinc} file, there is JMP ... and J^cc, meaning, the lexer - * must consider J as a token as well as JMP, introducing another source of possible backtracking. - * Despite that, parsing is completed fairly quickly. + *

A Review of Disassembly

* - * To assemble, we first parse the textual instruction, yielding zero or more parse trees. No parse - * trees implies an error. For each parse tree, we attempt to resolve the instruction bytes, - * starting at the leaves and working upwards while tracking and solving context changes. The - * context changes must be considered in reverse. We read the context register of the - * children (a disassembler would write). We then assume there is at most one variable in the - * expression, solve for it, and write the solution to the appropriate field (a - * disassembler would read). If no solution exists, a semantic error is logged. Since it's possible - * a production in the parse tree is associated with multiple constructors, different combinations - * of constructors are explored as we move upward in the tree. If all possible combinations yield - * semantic errors, then the overall result is an error. + *

+ * Before diving into assembly, it may be helpful to review SLEIGH and disassembly, at least as far + * as I understand. SLEIGH is really a specification of three distinct things, all related by trees + * of "constructors." 1) A mnemonic grammar, 2) A machine-code grammar, 3) Run-time semantics, i.e., + * p-code. The third is consumed primarily by the decompiler, the emulator, and other analysis, and + * is of little concern to the (dis)assembler. All three are tightly bound. A single constructor + * specifies a production in both grammars, constraints for selecting the production, as well as the + * generated run-time semantics. Consider an example: * - * Some productions are "purely recursive," e.g., {@code :^instruction} lines in the SLEIGH. These - * are ignored during parser construction. Let such a production be given as I => I. When resolving - * the parse tree to bytes, and we encounter a production with I on the left hand side, we then - * consider the possible application of the production I => I and its consequential constructors. - * Ideally, we could repeat this indefinitely, stopping when all further applications result in - * semantic errors; however, there is no guarantee in the SLEIGH specification that such an - * algorithm will actually halt, so a maximum number (default of 1) of applications are attempted. + *

+ * :ADD regD,imm8 is op=5 & regD & imm8 { regD = regD + imm8; }
+ * 
* - * After all the context changes and operands are resolved, we apply the constructor patterns and - * proceed up the tree. Thus, each branch yields zero or more "resolved constructors," which each - * specify two masked blocks of data: one for the instruction, and one for the context. These are - * passed up to the parent production, which, having obtained results from all its children, - * attempts to apply the corresponding constructors. + *

+ * The colon indicates this constructor applies to the root "instruction" table. The mnemonic + * production precedes the is keyword. The machine-code constraints and production + * follow. Finally, the semantics appear within braces. * - * Once we've resolved the root node, any resolved constructors returned are taken as successfully - * assembled instruction bytes. If applicable, the corresponding context registers are compared to - * the context at the target address in the program and filtered for compatibility. + *

+ * To support bitfield parsing, a list of token formats and fields within must be declared. The + * machine-code production may specify constraints in terms of those fields. Such constraints become + * patterns that the parser uses to choose a constructor. For example, we may have + * op=(0,3);regD=(4,7);imm8=(8,15). In little endian, this would indicate a 2-byte + * token: + * + *

+ *  +-4----+-4----+-8----------+
+ *  | regD |  op  |    imm8    |
+ *  +------+------+------------+
+ * 
+ * + *

+ * Thus, this constructor is assigned the pattern 0101...., which handles + * op=5. regD and imm8 remain as operands. The operands of + * the machine-code production refer to fields and subtables. During disassembly, those operands are + * parsed in the order named: left to right, depth first. For the (root) instruction table and each + * subtable, the disassembler selects exactly one constructor. The parser may only examine one + * machine-code token at a time; however, the token can be large (32 bits is common), and it may + * make several sub-table decisions based on fields within a single token, essentially allowing it + * to look ahead and parse those fields out of order. In the example, the parser will technically + * examine the op field before parsing regD. + * + *

+ * When parsing a table or subtable, if no constructor's constraints can be matched, parsing fails. + * Each token is some number of bytes in size. The parser advances to the next token when it + * encounters a semicolon in the machine-code production. Note that when the parser returns to a + * parent constructor, i.e., the PDA pops its stack, the parser may return to a previous token. If + * that behavior is not desired, a machine-code production may contain ellipses, causing the parser + * to advance to the next token, even considering those tokens already examined by operands to the + * ellipses' left. Once all operands of the selected instruction constructor have been parsed, the + * resulting constructor tree ("prototype") is recorded and returned. + * + *

+ * To display the instruction's mnemonic, the prototype is walked, generating the tokens ("print + * pieces") from the mnemonic production of each constructor. The walk is ordered according to that + * mnemonic production. The mnemonic grammar consists of syntactic text and symbols. Any symbols it + * uses must also appear in the machine-code production. Where the symbol is a sub-table, it behaves + * like a non-terminal in the grammar: It generates the print pieces of the constructor selected for + * the sub-table. Where the symbol is a field, it behaves like a terminal. It displays the numeric + * value of the field, or in the case of attached names, e.g., register names, it displays the name. + * + *

+ * To complicate matters, but greatly increase the capability of the disassembler, SLEIGH introduces + * temporary symbols and context to the disassembler. A temporary symbol allows the computation of + * displayed values from fields. (The value may also be used by the p-code generator.) For example, + * a language may permit the expression of immediates as a value and a shift. Temporary symbols + * permit the effective value to be computed and displayed. Thus, a temporary symbol is valid in the + * mnemonic production. Context serves at least two purposes: 1) To propagate auxiliary information + * to sub-tables during disassembly, and 2) To handle persistent state changes in a processor that + * modify its decoder, e.g., ARM in THUMB mode. The latter is accomplished by marking regions of + * memory with this contextual information. Context is implemented by introducing a context + * register. It behaves like a special mutable token, initialized from the disassembler's memory, + * the context marked at the instruction's start address, or the language's default context. Like + * token fields, context fields can be referred to by a constructor's machine-code production, + * either to form constraints or to parse as operands. Fields may be modified by including mutations + * in the constructor. Mutations and temporary symbols are defined by assigning an expression to the + * field or symbol. Those expressions may refer to other fields and temporary symbols in the scope + * of that constructor. Since mutations are meant to be propagated down, they must be applied in + * pre-order during parsing. Note that context is not saved on any sort of stack, thus it is + * possible for context mutations in a sub-table operand (and its sub-table operands) to affect + * parsing of sibling sub-table operands to the right. + * + *

+ * When disassembling entire subroutines, the disassembler must propagate context changes from + * instruction to instruction. Some bits of the context register are marked "global." Those bits, + * when instruction parsing succeeds, are taken as the "output context" of the resulting + * instruction. Propagation follows from a recursive traversal disassembly strategy, i.e., it heeds + * the branch targets of the instruction. The generated p-code is used to determine whether the + * instruction has branches and/or fall-through. If the output context differs from the default + * context, the disassembler saves it as the initial context for the next instruction. If the + * instruction has a branch target, the output context is marked at the target address. + * + *

Assembly

+ * + *

+ * Conceptually, assembly is a straightforward reversal of the disassembly process; however, the + * actual implementation is far more complex. To assemble an instruction there are three distinct + * phases: 1) Parsing, 2) Prototype generation, 3) Machine code generation. Each phase may take + * advantage of pre-computed artifacts. + * + *

Parsing Assembly Mnemonics

+ * + *

+ * To parse, we pre-compute a LALR(1) parser based on mnemonic grammar. Because different + * constructors may specify the same mnemonic production as others in the same table, we have to + * associate all such constructors to the production. This step takes some time, and may be better + * suited as a build-time step. Because SLEIGH specifications are not generally concerned with + * eliminating ambiguity of printed instructions (rather, it only does so for instruction bytes), we + * must consider that the grammar could be ambiguous. To handle this, the action/goto table is + * permitted multiple entries per cell, and we allow backtracking. There are also cases where tokens + * are not actually separated by spaces. For example, in the {@code ia.sinc} file, there is JMP, and + * J^cc, meaning, the lexer must consider J as a token as well as JMP, introducing another source of + * possible backtracking. Despite that, parsing an instruction is fairly quick, since the sentences + * are rather short. The pre-compute part of this process is implemented in {@link #buildGrammar()} + * and {@link #buildParser()}. Parsing is then encapsulated in {@link AssemblyParser}. + * + *

Prototype Generation

+ * + *

+ * To generate prototypes, we examine each resulting parse tree. If there are no parse trees, then a + * syntax errors is reported. Otherwise, for each tree, starting at the root production, we consider + * all associated constructors, matching each print piece to its corresponding operand on the + * machine-code side. For sub-table operands, the production substituted for the associated + * non-terminal guides generation, recursively. For other operands, the associated terminal provides + * the value or name. To mimic the token advancement of the disassembler, a shift is computed and + * stored for each operand. Computing the shift requires computing each operand's length, and so + * once the root of each prototype is generated, the instruction length is also known. Patterns and + * mutations are applied to mimic the disassembly process: pre-ordered, depth first, left to right, + * heeding the computed shift. If a pattern or mutation for a constructor conflicts with what's been + * generated so far, the constructor is pruned. If all possible constructors for a sub-table operand + * are pruned, then the containing constructor is also pruned. + * + *

+ * In some cases, an operand appears in the machine-code production, but not the mnemonic + * production: so-called "hidden operands." These pose a potential issue for the assembler, because + * nothing syntactic can guide prototype generation. For hidden sub-table operands, we must consider + * all constructors in the table. Furthermore, all operands of those constructors are considered + * "hidden," and so we exhaust recursively. For other hidden operands, the value is left + * unspecified. The prototype generation process is encapsulated in + * {@link AssemblyConstructStateGenerator}. + * + *

Machine Code Generation

+ * + *

+ * Machine code generation is a complex process, but it follows a straightforward reversal of the + * disassembler's parse phase. For each prototype, we start at the leaves (non-sub-table operands) + * and proceed upwards. This is still a depth-first traversal, but unlike disassembly, generation + * proceeds in post-order and right to left, as follows. Starting at the root: + * + *

    + *
  1. Resolve operands from right to left, descending into sub-table operands.
  2. + *
  3. Solve context mutations, in reverse order. + *
  4. Apply the required patterns + *
+ * + *

+ * Note that for a single prototype, a constructor has already been selected for each sub-table + * operand. The resolution of sub-table operands follows the same process as for the root + * constructor. + * + *

+ * For other operands, resolution proceeds by solving the operand's defining expression set equal to + * the value specified by the terminal. The resulting values are written into their respective token + * or context fields, generating an "assembly pattern." An assembly pattern is simply a masked bit + * sequence recording what is expected in the instruction buffer and context register. Each bit is + * 0, 1, or unspecified. In many cases, the "defining expression" is simply a field, so "solving" + * degenerates simply to "writing" the specified value into the field. Solving expressions is only + * required when a terminal defines the value of a temporary symbol. If the value is unspecified, + * i.e., it is a hidden operand, then no fields are written. Thus, hidden non-sub-table operands + * generate empty patterns. + * + *

+ * As machine code generation proceeds right to left in a constructor, the resulting assembly + * patterns are accumulated. If a generated pattern conflicts with that accumulated so far, the + * pattern is pruned, likely halting generation of the current prototype. Once all operands have + * been successfully resolved, the constructor's context mutations are solved. These tend to get + * complicated since some fields may have values defined by the accumulated pattern, and some may + * not. The changes are processed in reverse order from specified in the constructor, since fields + * may be mutated in a way that forms data dependences among them. To solve, the field on the + * left-hand side of the mutation is read, then it is set equal to the right-hand size and passed to + * the solver. Because, from the disassembly perspective, the left-hand side is about to be written, + * its value is cleared before passed to the solver. If successful, the solver returns patterns that + * satisfy the equation. Resolution accumulates the patterns. If solving fails, or the patterns + * conflict, it is pruned. Finally, the patterns required to select the constructor are applied, + * again pruning conflicts. Note that a constructor may specify multiple patterns, e.g., if a + * constraint is op == 5 || op == 6. Thus, overall, it is possible a single prototype + * will generate multiple assembly patterns. This process is encapsulated in + * {@link AssemblyConstructState}. + * + *

Handling Context and Prefixes

+ * + *

+ * Once the root constructor has been completely resolved, the resulting instruction patterns + * comprise the generated instruction bytes. However, we must consider the context pattern, too. In + * practice, the assembler is invoked at a particular address, and the program database may provide + * an initial context (as marked during previous disassembly). In other words, when patching an + * instruction, we have to keep any persistent context in place. Thus, we can further cull patterns + * whose context does not match. This intuition is frustrated by the possibility of constructors + * with the mnemonic production ^instruction, though. These "pure recursive" + * constructors are often (ab)used to handle instruction prefixes, e.g.: + * + * + * :^instruction is prefixed=0 & byte=0xff; instruction [ prefixed=1; ] {} + * + * + *

+ * There are no syntactic elements that would cue the assembly parser to use this constructor. + * Instead, we rely on the context register. Were it not for these kinds of constructors, we could + * use the saved context as input to the prototype generation phase; however, we cannot. Instead, we + * use the empty context and delay this step until after machine code generation. During assembler + * construction, we pre-compute a "context transition graph." The mnemonic production + * [instruction] => [instruction] has associated with it all pure recursive + * constructors. Naturally, that production cannot be included in the parser, as it would generate + * increasingly deep parse trees ad infinitum. The graph starts with a seed node: the + * language's default context. Then each pure recursive constructor is considered as an edge, + * leading to the node resulting from applying that constructor, mimicking disassembly. This + * proceeds for each unvisited node until no new nodes are produced. This component is encapsulated + * in {@link AssemblyContextGraph}. + * + *

+ * To generate prefixes, we seek the shortest paths from nodes whose context pattern match the + * initial context to nodes whose context pattern matches the generated assembly pattern. Note that + * the shortest path may be the zero-length path. If no paths are found, assembly fails. Machine + * code generation then proceeds by considering each path, and resolving the constructors in + * reverse, in the same manner as constructors from the prototype are resolved. Note that the + * patterns may need to be shifted to accommodate prefix tokens. This is accomplished by examining + * the shift of the nested instruction operand for each constructor. This process is implemented in + * {@link AssemblyTreeResolver#resolveRootRecursion(AssemblyResolutionResults)}. + * + *

Final Steps

+ * + *

+ * As a final fail safe, the generated instructions are fed back through the disassembler and the + * resulting constructor trees are compared. If not equivalent, the instruction is dropped. It is + * possible (common in fact) that the generated assembly instruction pattern is not fully defined. + * By default, the assembler will substitute 0 for each undefined bit. However, the assembler API + * allows the retrieval of the generated pattern, since a user may wish to substitute other values. + * + *

+ * If, in the end, no instructions are generated, a semantic error is reported. Often, the + * description is unwieldy, since it comprises a list of reasons each pattern was pruned. From the + * user side, it is usually sufficient to say, "sorry." From the language developer side, it may be + * useful to manually reconstruct the prototype and discover the conflicts. To that end, the + * implementation includes optional diagnostics, but even then, decoding them takes some familiarity + * and expertise. */ public class SleighAssemblerBuilder implements AssemblerBuilder { - protected static final DbgTimer dbg = SystemUtilities.isInTestingBatchMode() ? DbgTimer.INACTIVE : DbgTimer.ACTIVE; + protected static final DbgTimer dbg = + SystemUtilities.isInTestingBatchMode() ? DbgTimer.INACTIVE : DbgTimer.ACTIVE; protected SleighLanguage lang; protected AssemblyGrammar grammar; @@ -220,6 +429,7 @@ public class SleighAssemblerBuilder implements AssemblerBuilder { /** * Convert the given operand symbol to an {@link AssemblySymbol} * + *

* For subtables, this results in a non-terminal, for all others, the result in a terminal. * * @param cons the constructor to which the operand belongs @@ -242,7 +452,9 @@ public class SleighAssemblerBuilder implements AssemblerBuilder { return built; } if (defsym == null) { - built = new AssemblyNumericTerminal(name, getBitSize(cons, opsym)); + HandleTpl htpl = getHandleTpl(cons, opsym); + built = htpl == null ? new AssemblyNumericTerminal(name, 0, null) + : new AssemblyNumericTerminal(name, htpl.getSize(), htpl.getAddressSpace()); } else if (defsym instanceof SubtableSymbol) { built = new AssemblyNonTerminal(name); @@ -268,39 +480,40 @@ public class SleighAssemblerBuilder implements AssemblerBuilder { } /** - * Obtain the size in bits of a textual operand. + * Obtain the p-code result handle for the given operand * - * This is a little odd, since the variables in pattern expressions do not have an explicit - * size. However, the value exported by a constructor's pCode may have an explicit size given - * (in bytes). Thus, there is a special case, where a constructor prints just one operand and - * exports that same operand with an explicit size. In that case, the size of the operand is - * printed according to that exported size. + *

+ * This handles a special case, where a constructor prints just one operand and exports that + * same operand, often with an explicit size, or as an address in a given space. In such cases, + * the listing displays that operand according to that exported size. * - * For disassembly, this information is used simply to truncate the bits before they are - * displayed. For assembly, we must do two things: 1) Ensure that the provided value fits in the - * given size, and 2) Mask the goal when solving the pattern expression for the operand. + *

+ * For assembly, this gives a few opportunities: 1) We can/must ensure the specified value fits, + * by checking the size. 2) We can/must mask the goal when solving the defining pattern + * expression for the operand. 3)) We can/must check that a label's address space matches that + * represented by the operand, when used for a numeric terminal. * * @param cons the constructor from which the production is being derived * @param opsym the operand symbol corresponding to the grammatical symbol, whose size we wish * to determine. * @return the size of the operand in bits */ - protected int getBitSize(Constructor cons, OperandSymbol opsym) { + protected HandleTpl getHandleTpl(Constructor cons, OperandSymbol opsym) { ConstructTpl ctpl = cons.getTempl(); if (null == ctpl) { // No pcode, no size specification - return 0; + return null; } HandleTpl htpl = ctpl.getResult(); if (null == htpl) { // If nothing is exported, the size is unspecified - return 0; + return null; } if (opsym.getIndex() != htpl.getOffsetOperandIndex()) { // If the export is not of the same operand, it does not specify its size - return 0; + return null; } - return htpl.getSize(); + return htpl; } /** @@ -326,30 +539,10 @@ public class SleighAssemblerBuilder implements AssemblerBuilder { if (sym.takesOperandIndex()) { indices.add(index); } - rhs.add(sym); + rhs.addSymbol(sym); } else { - String tstr = str.trim(); - if (tstr.equals("")) { - rhs.addWS(); - } - else { - char first = tstr.charAt(0); - if (!str.startsWith(tstr)) { - rhs.addWS(); - } - if (!Character.isLetterOrDigit(first)) { - rhs.addWS(); - } - rhs.add(new AssemblyStringTerminal(str.trim())); - char last = tstr.charAt(tstr.length() - 1); - if (!str.endsWith(tstr)) { - rhs.addWS(); - } - if (!Character.isLetterOrDigit(last)) { - rhs.addWS(); - } - } + rhs.addSeparators(str); } } } @@ -384,7 +577,7 @@ public class SleighAssemblerBuilder implements AssemblerBuilder { // Ignore. We don't do pcode. } else if (sym instanceof OperandSymbol) { - // Ignore. These are terminals, or will be produced by there defining symbol + // Ignore. These are terminals, or will be produced by their defining symbols } else if (sym instanceof ValueSymbol) { // Ignore. These are now terminals diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractBinaryExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractBinaryExpressionSolver.java index 9c92585a26..2a4222597c 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractBinaryExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractBinaryExpressionSolver.java @@ -19,12 +19,12 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.expression.BinaryExpression; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; /** - * A solver that handles expressions of the form A [OP] B + * A solver that handles expressions of the form {@code A [OP] B} * * @param the type of expression solved (the operator) */ @@ -37,10 +37,10 @@ public abstract class AbstractBinaryExpressionSolver @Override public AssemblyResolution solve(T exp, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) throws NeedsBackfillException { - MaskedLong lval = solver.getValue(exp.getLeft(), vals, res, cur); - MaskedLong rval = solver.getValue(exp.getRight(), vals, res, cur); + AssemblyResolvedPatterns cur, Set hints, String description) + throws NeedsBackfillException { + MaskedLong lval = solver.getValue(exp.getLeft(), vals, cur); + MaskedLong rval = solver.getValue(exp.getRight(), vals, cur); if (lval != null && !lval.isFullyDefined()) { if (!lval.isFullyUndefined()) { @@ -61,23 +61,23 @@ public abstract class AbstractBinaryExpressionSolver return ConstantValueSolver.checkConstAgrees(cval, goal, description); } else if (lval != null) { - return solveRightSide(exp.getRight(), lval, goal, vals, res, cur, hints, + return solveRightSide(exp.getRight(), lval, goal, vals, cur, hints, description); } else if (rval != null) { - return solveLeftSide(exp.getLeft(), rval, goal, vals, res, cur, hints, description); + return solveLeftSide(exp.getLeft(), rval, goal, vals, cur, hints, description); } else { // Each solver may provide a strategy for solving expression where both sides are // variable, e.g., two fields being concatenated via OR. - return solveTwoSided(exp, goal, vals, res, cur, hints, description); + return solveTwoSided(exp, goal, vals, cur, hints, description); } } catch (NeedsBackfillException e) { throw e; } catch (SolverException e) { - return AssemblyResolution.error(e.getMessage(), description, null); + return AssemblyResolution.error(e.getMessage(), description); } catch (AssertionError e) { dbg.println("While solving: " + exp + " (" + description + ")"); @@ -86,30 +86,30 @@ public abstract class AbstractBinaryExpressionSolver } protected AssemblyResolution solveLeftSide(PatternExpression lexp, MaskedLong rval, - MaskedLong goal, Map vals, Map res, - AssemblyResolvedConstructor cur, Set hints, String description) + MaskedLong goal, Map vals, AssemblyResolvedPatterns cur, + Set hints, String description) throws NeedsBackfillException, SolverException { - return solver.solve(lexp, computeLeft(rval, goal), vals, res, cur, hints, description); + return solver.solve(lexp, computeLeft(rval, goal), vals, cur, hints, description); } protected AssemblyResolution solveRightSide(PatternExpression rexp, MaskedLong lval, - MaskedLong goal, Map vals, Map res, - AssemblyResolvedConstructor cur, Set hints, String description) + MaskedLong goal, Map vals, AssemblyResolvedPatterns cur, + Set hints, String description) throws NeedsBackfillException, SolverException { - return solver.solve(rexp, computeRight(lval, goal), vals, res, cur, hints, description); + return solver.solve(rexp, computeRight(lval, goal), vals, cur, hints, description); } protected AssemblyResolution solveTwoSided(T exp, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) throws NeedsBackfillException, SolverException { + AssemblyResolvedPatterns cur, Set hints, String description) + throws NeedsBackfillException, SolverException { throw new NeedsBackfillException("_two_sided_"); } @Override - public MaskedLong getValue(T exp, Map vals, Map res, - AssemblyResolvedConstructor cur) throws NeedsBackfillException { - MaskedLong lval = solver.getValue(exp.getLeft(), vals, res, cur); - MaskedLong rval = solver.getValue(exp.getRight(), vals, res, cur); + public MaskedLong getValue(T exp, Map vals, AssemblyResolvedPatterns cur) + throws NeedsBackfillException { + MaskedLong lval = solver.getValue(exp.getLeft(), vals, cur); + MaskedLong rval = solver.getValue(exp.getRight(), vals, cur); if (lval != null && rval != null) { MaskedLong cval = compute(lval, rval); return cval; @@ -130,7 +130,9 @@ public abstract class AbstractBinaryExpressionSolver /** * Compute the right-hand-side value given that the result and the left are known * - * NOTE: Assumes commutativity by default + *

+ * NOTE: Assumes commutativity by default + * * @param lval the left-hand-side value * @param goal the result * @return the right-hand-side value solution @@ -150,16 +152,17 @@ public abstract class AbstractBinaryExpressionSolver public abstract MaskedLong compute(MaskedLong lval, MaskedLong rval); @Override - public int getInstructionLength(T exp, Map res) { - int ll = solver.getInstructionLength(exp.getLeft(), res); - int lr = solver.getInstructionLength(exp.getRight(), res); + public int getInstructionLength(T exp) { + int ll = solver.getInstructionLength(exp.getLeft()); + int lr = solver.getInstructionLength(exp.getRight()); return Math.max(ll, lr); } @Override - public MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc) { - MaskedLong lval = solver.valueForResolution(exp.getLeft(), rc); - MaskedLong rval = solver.valueForResolution(exp.getRight(), rc); + public MaskedLong valueForResolution(T exp, Map vals, + AssemblyResolvedPatterns rc) { + MaskedLong lval = solver.valueForResolution(exp.getLeft(), vals, rc); + MaskedLong rval = solver.valueForResolution(exp.getRight(), vals, rc); return compute(lval, rval); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractExpressionSolver.java index a00a640a09..d2ba1886ab 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractExpressionSolver.java @@ -19,7 +19,7 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; @@ -49,14 +49,13 @@ public abstract class AbstractExpressionSolver { * @param exp the expression to solve * @param goal the desired value of the expression * @param vals values of defined symbols - * @param res the results of subconstructor resolutions (used for lengths) * @param hints describes techniques applied by calling solvers * @param description the description to give to resolved solutions * @return the resolution * @throws NeedsBackfillException if the expression refers to an undefined symbol */ public abstract AssemblyResolution solve(T exp, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, + AssemblyResolvedPatterns cur, Set hints, String description) throws NeedsBackfillException; /** @@ -64,33 +63,34 @@ public abstract class AbstractExpressionSolver { * * @param exp the expression * @param vals values of defined symbols - * @param res the results of subconstructor resolutions (used for lengths) * @return the constant value, or null if it depends on a variable * @throws NeedsBackfillException if the expression refers to an undefined symbol */ - public abstract MaskedLong getValue(T exp, Map vals, Map res, - AssemblyResolvedConstructor cur) throws NeedsBackfillException; + public abstract MaskedLong getValue(T exp, Map vals, + AssemblyResolvedPatterns cur) throws NeedsBackfillException; /** * Determines the length of the subconstructor that would be returned had the expression not * depended on an undefined symbol. * + *

* This is used by the backfilling process to ensure values are written to the correct offset * * @param exp the expression - * @param res the results of subconstructor resolutions (used for lengths) * @return the length of filled in token field(s). */ - public abstract int getInstructionLength(T exp, Map res); + public abstract int getInstructionLength(T exp); /** * Compute the value of the expression given the (possibly-intermediate) resolution * * @param exp the expression to evaluate + * @param vals values of defined symbols * @param rc the resolution on which to evaluate it * @return the result */ - public abstract MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc); + public abstract MaskedLong valueForResolution(T exp, Map vals, + AssemblyResolvedPatterns rc); /** * Register this particular solver with the general expression solver diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractUnaryExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractUnaryExpressionSolver.java index c673ee10ea..9d68e92bdf 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractUnaryExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/AbstractUnaryExpressionSolver.java @@ -19,11 +19,11 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.expression.UnaryExpression; /** - * A solver that handles expressions of the form [OP]A + * A solver that handles expressions of the form {@code [OP]A} * * @param the type of expression solved (the operator) */ @@ -36,9 +36,9 @@ public abstract class AbstractUnaryExpressionSolver @Override public AssemblyResolution solve(T exp, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) throws NeedsBackfillException { - MaskedLong uval = solver.getValue(exp.getUnary(), vals, res, cur); + AssemblyResolvedPatterns cur, Set hints, String description) + throws NeedsBackfillException { + MaskedLong uval = solver.getValue(exp.getUnary(), vals, cur); try { if (uval != null && uval.isFullyDefined()) { MaskedLong cval = compute(uval); @@ -46,7 +46,7 @@ public abstract class AbstractUnaryExpressionSolver return ConstantValueSolver.checkConstAgrees(cval, goal, description); } } - return solver.solve(exp.getUnary(), computeInverse(goal), vals, res, cur, hints, + return solver.solve(exp.getUnary(), computeInverse(goal), vals, cur, hints, description); } /* @@ -60,9 +60,9 @@ public abstract class AbstractUnaryExpressionSolver } @Override - public MaskedLong getValue(T exp, Map vals, Map res, - AssemblyResolvedConstructor cur) throws NeedsBackfillException { - MaskedLong val = solver.getValue(exp.getUnary(), vals, res, cur); + public MaskedLong getValue(T exp, Map vals, AssemblyResolvedPatterns cur) + throws NeedsBackfillException { + MaskedLong val = solver.getValue(exp.getUnary(), vals, cur); if (val != null) { return compute(val); } @@ -72,7 +72,9 @@ public abstract class AbstractUnaryExpressionSolver /** * Compute the input value given that the result is known * - * NOTE: Assumes an involution by default + *

+ * NOTE: Assumes an involution by default + * * @param goal the result * @return the input value solution */ @@ -89,13 +91,14 @@ public abstract class AbstractUnaryExpressionSolver public abstract MaskedLong compute(MaskedLong val); @Override - public int getInstructionLength(T exp, Map res) { - return solver.getInstructionLength(exp.getUnary(), res); + public int getInstructionLength(T exp) { + return solver.getInstructionLength(exp.getUnary()); } @Override - public MaskedLong valueForResolution(T exp, AssemblyResolvedConstructor rc) { - MaskedLong val = solver.valueForResolution(exp.getUnary(), rc); + public MaskedLong valueForResolution(T exp, Map vals, + AssemblyResolvedPatterns rc) { + MaskedLong val = solver.valueForResolution(exp.getUnary(), vals, rc); return compute(val); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ConstantValueSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ConstantValueSolver.java index 76c6b538ae..7bfff47f42 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ConstantValueSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ConstantValueSolver.java @@ -19,12 +19,13 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.expression.ConstantValue; /** * "Solves" constant expressions * + *

* Essentially, this either evaluates successfully when asked for a constant value, or checks that * the goal is equal to the constant. Otherwise, there is no solution. */ @@ -36,25 +37,26 @@ public class ConstantValueSolver extends AbstractExpressionSolver @Override public AssemblyResolution solve(ConstantValue cv, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, + AssemblyResolvedPatterns cur, Set hints, String description) { - MaskedLong value = getValue(cv, vals, res, cur); + MaskedLong value = getValue(cv, vals, cur); return checkConstAgrees(value, goal, description); } @Override - public MaskedLong getValue(ConstantValue cv, Map vals, Map res, - AssemblyResolvedConstructor cur) { + public MaskedLong getValue(ConstantValue cv, Map vals, + AssemblyResolvedPatterns cur) { return MaskedLong.fromLong(cv.getValue()); } @Override - public int getInstructionLength(ConstantValue cv, Map res) { + public int getInstructionLength(ConstantValue cv) { return 0; } @Override - public MaskedLong valueForResolution(ConstantValue cv, AssemblyResolvedConstructor rc) { + public MaskedLong valueForResolution(ConstantValue cv, Map vals, + AssemblyResolvedPatterns rc) { return MaskedLong.fromLong(cv.getValue()); } @@ -62,9 +64,8 @@ public class ConstantValueSolver extends AbstractExpressionSolver String description) { if (!value.agrees(goal)) { return AssemblyResolution.error( - "Constant value " + value + " does not agree with child requirements", description, - null); + "Constant value " + value + " does not agree with child requirements", description); } - return AssemblyResolution.nop(description, null); + return AssemblyResolution.nop(description, null, null); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ContextFieldSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ContextFieldSolver.java index d1d9dd5d04..a232dd2677 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ContextFieldSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/ContextFieldSolver.java @@ -24,6 +24,7 @@ import ghidra.app.plugin.processors.sleigh.expression.ContextField; /** * Solves expressions of a context register field * + *

* Essentially, this just encodes the goal into the field, if it can be represented in the given * space and format. Otherwise, there is no solution. */ @@ -35,33 +36,33 @@ public class ContextFieldSolver extends AbstractExpressionSolver { @Override public AssemblyResolution solve(ContextField cf, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) { + AssemblyResolvedPatterns cur, Set hints, String description) { assert cf.minValue() == 0; // In case someone decides to do signedness there. if (!goal.isInRange(cf.maxValue(), cf.hasSignbit())) { return AssemblyResolution.error("Value " + goal + " is not valid for " + cf, - description, null); + description); } AssemblyPatternBlock block = AssemblyPatternBlock.fromContextField(cf, goal); - return AssemblyResolution.contextOnly(block, description, null); + return AssemblyResolution.contextOnly(block, description); } @Override - public MaskedLong getValue(ContextField cf, Map vals, Map res, - AssemblyResolvedConstructor cur) { + public MaskedLong getValue(ContextField cf, Map vals, + AssemblyResolvedPatterns cur) { if (cur == null) { return null; } - return valueForResolution(cf, cur); + return valueForResolution(cf, vals, cur); } @Override - public int getInstructionLength(ContextField cf, Map res) { + public int getInstructionLength(ContextField cf) { return 0; // this is a context field, not an instruction (token) field } @Override - public MaskedLong valueForResolution(ContextField cf, AssemblyResolvedConstructor rc) { + public MaskedLong valueForResolution(ContextField cf, Map vals, + AssemblyResolvedPatterns rc) { int size = cf.getByteEnd() - cf.getByteStart() + 1; MaskedLong res = rc.readContext(cf.getByteStart(), size); res = res.shiftRight(cf.getShift()); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DefaultSolverHint.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DefaultSolverHint.java index 20c2b9e774..9b3845103d 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DefaultSolverHint.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DefaultSolverHint.java @@ -24,8 +24,8 @@ public enum DefaultSolverHint implements SolverHint { */ GUESSING_REPETITION, /** - * A boolean or solver which matches a circular shift is solving the value having guessed a - * shift + * A boolean {@code or} solver which matches a circular shift is solving the value having + * guessed a shift */ GUESSING_CIRCULAR_SHIFT_AMOUNT, /** diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DivExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DivExpressionSolver.java index c53b5bdd21..21de40438e 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DivExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/DivExpressionSolver.java @@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr; import ghidra.app.plugin.processors.sleigh.expression.DivExpression; /** - * Solves expressions of the form A / B + * Solves expressions of the form {@code A / B} */ public class DivExpressionSolver extends AbstractBinaryExpressionSolver { @@ -37,7 +37,8 @@ public class DivExpressionSolver extends AbstractBinaryExpressionSolver * Works like the constant solver, but takes the value of {@code inst_next}, which is given by the * assembly address and the resulting instruction length. * - * NOTE: This solver requires backfill. + *

+ * NOTE: This solver requires backfill, since the value of {@code inst_next} is not known + * until possible prefixes have been considered. */ public class EndInstructionValueSolver extends AbstractExpressionSolver { @@ -37,32 +40,38 @@ public class EndInstructionValueSolver extends AbstractExpressionSolver vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) { + AssemblyResolvedPatterns cur, Set hints, String description) { throw new AssertionError( "INTERNAL: Should never be asked to solve for " + AssemblyTreeResolver.INST_NEXT); } @Override public MaskedLong getValue(EndInstructionValue iv, Map vals, - Map res, AssemblyResolvedConstructor cur) - throws NeedsBackfillException { + AssemblyResolvedPatterns cur) throws NeedsBackfillException { Long instNext = vals.get(AssemblyTreeResolver.INST_NEXT); if (instNext == null) { throw new NeedsBackfillException(AssemblyTreeResolver.INST_NEXT); } - return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_NEXT)); + return MaskedLong.fromLong(instNext); } @Override - public int getInstructionLength(EndInstructionValue iv, Map res) { + public int getInstructionLength(EndInstructionValue iv) { return 0; } @Override - public MaskedLong valueForResolution(EndInstructionValue exp, AssemblyResolvedConstructor rc) { - // Would need to pass in symbol values, and perhaps consider child resolutions. - throw new UnsupportedOperationException( - "The solver should never ask for this value given a resolved constructor."); + public MaskedLong valueForResolution(EndInstructionValue exp, Map vals, + AssemblyResolvedPatterns rc) { + Long instNext = vals.get(AssemblyTreeResolver.INST_NEXT); + if (instNext == null) { + /** + * This method is used in forward state construction, so just leave unknown. This may + * cause unresolvable trees to get generated, but we can't know that until we try to + * resolve them. + */ + return MaskedLong.UNKS; + } + return MaskedLong.fromLong(instNext); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/LeftShiftExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/LeftShiftExpressionSolver.java index 7db45517c0..2092fed3c0 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/LeftShiftExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/LeftShiftExpressionSolver.java @@ -19,12 +19,12 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.expression.LeftShiftExpression; import ghidra.util.Msg; /** - * {@literal Solves expressions of the form A << B} + * Solves expressions of the form {@code A << B} */ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver { @@ -61,13 +61,12 @@ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver vals, Map res, AssemblyResolvedConstructor cur, - Set hints, String description) - throws NeedsBackfillException, SolverException { + Map vals, AssemblyResolvedPatterns cur, Set hints, + String description) throws NeedsBackfillException, SolverException { // Do not guess the same parameter recursively if (hints.contains(DefaultSolverHint.GUESSING_LEFT_SHIFT_AMOUNT)) { // NOTE: Nested left shifts ought to be written as a left shift by a sum - return super.solveTwoSided(exp, goal, vals, res, cur, hints, description); + return super.solveTwoSided(exp, goal, vals, cur, hints, description); } // Count the number of zeros to the right, and consider this the maximum shift value // Any higher shift amount would produce too many zeros to the right @@ -76,24 +75,41 @@ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver hintsWithLShift = SolverHint.with(hints, DefaultSolverHint.GUESSING_LEFT_SHIFT_AMOUNT); + if (maxShift == 64) { + // If the goal is 0s, then any shift will do, so long as the shifted value is 0 + try { + // NB. goal is already 0s, so just use it as subgoal for lhs of shift + AssemblyResolution lres = + solver.solve(exp.getLeft(), goal, vals, cur, hintsWithLShift, description); + if (lres.isError()) { + throw new SolverException("Solving left:=0 failed"); + } + // If this works, then the rhs can have any value, so nothing to solve for + return lres; + } + catch (SolverException | UnsupportedOperationException e) { + Msg.trace(this, "Trying left:=0 in shift resulted in " + e); + // Fall through to the guessing method + } + } for (int shift = maxShift; shift >= 0; shift--) { try { MaskedLong reqr = MaskedLong.fromLong(shift); MaskedLong reql = computeLeft(reqr, goal); AssemblyResolution lres = - solver.solve(exp.getLeft(), reql, vals, res, cur, hintsWithLShift, description); + solver.solve(exp.getLeft(), reql, vals, cur, hintsWithLShift, description); if (lres.isError()) { throw new SolverException("Solving left failed"); } AssemblyResolution rres = - solver.solve(exp.getRight(), reqr, vals, res, cur, hints, description); + solver.solve(exp.getRight(), reqr, vals, cur, hints, description); if (rres.isError()) { throw new SolverException("Solving right failed"); } - AssemblyResolvedConstructor lsol = (AssemblyResolvedConstructor) lres; - AssemblyResolvedConstructor rsol = (AssemblyResolvedConstructor) rres; - AssemblyResolvedConstructor sol = lsol.combine(rsol); + AssemblyResolvedPatterns lsol = (AssemblyResolvedPatterns) lres; + AssemblyResolvedPatterns rsol = (AssemblyResolvedPatterns) rres; + AssemblyResolvedPatterns sol = lsol.combine(rsol); if (sol == null) { throw new SolverException( "Left and right solutions conflict for shift=" + shift); @@ -105,6 +121,6 @@ public class LeftShiftExpressionSolver extends AbstractBinaryExpressionSolver { /** * Create a masked value from a mask and a long * + *

* Any positions in {@code msk} set to 0 create an {@code x} in the corresponding position of * the result. Otherwise, the position takes the corresponding bit from {@code val}. * @@ -92,6 +93,7 @@ public class MaskedLong implements Comparable { /** * Get the mask as a long * + *

* Positions with a defined bit are {@code 1}; positions with an undefined bit are {@code 0}. * * @return the mask as a long @@ -126,6 +128,7 @@ public class MaskedLong implements Comparable { /** * Apply an additional mask to this masked long * + *

* Any {@code 0} bit in {@code msk} will result in an undefined bit in the result. {@code 1} * bits result in a copy of the corresponding bit in the result. * @@ -139,6 +142,7 @@ public class MaskedLong implements Comparable { /** * Sign extend the masked value, according to its mask, to a full long * + *

* The leftmost defined bit is taken as the sign bit, and extended to the left. * * @return the sign-extended masked long @@ -151,6 +155,7 @@ public class MaskedLong implements Comparable { /** * Zero extend the masked value, according to its mask, to a full long * + *

* All bits to the left of the leftmost defined bit are set to 0. * * @return the zero-extended masked long @@ -199,6 +204,7 @@ public class MaskedLong implements Comparable { /** * Combine this and another masked long into one, by taking defined bits from either * + *

* If this masked long agrees with the other, then the two are combined. For each bit position * in the result, the defined bit from either corresponding position is taken. If neither is * defined, then the position is undefined in the result. If both are defined, they must agree. @@ -217,6 +223,7 @@ public class MaskedLong implements Comparable { /** * Shift {@code size} bits @{code n) positions circularly in a given direction * + *

* The shifted bits are the least significant {@code size} bits. The remaining bits are * unaffected. * @@ -247,6 +254,7 @@ public class MaskedLong implements Comparable { /** * Shift {@code size} bits @{code n) positions circularly in a given direction * + *

* The shifted bits are the least significant {@code size} bits. The remaining bits are * unaffected. * @@ -265,6 +273,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits @{code n} positions left * + *

* This implements both a signed and unsigned shift. * * @param n the number of positions. @@ -282,6 +291,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits {@code n} positions left * + *

* This implements both a signed and unsigned shift. * * @param n the number of positions. @@ -297,6 +307,7 @@ public class MaskedLong implements Comparable { /** * Invert a left shift of {@code n} positions, that is shift right * + *

* This is different from a normal shift right, in that it inserts unknowns at the left. The * normal right shift inserts zeros or sign bits. Additionally, if any ones would fall off the * right, the inversion is undefined. @@ -319,6 +330,7 @@ public class MaskedLong implements Comparable { /** * Invert a left shift of {@code n} positions, that is shift right * + *

* This is different from a normal shift right, in that it inserts unknowns at the left. The * normal right shift inserts zeros or sign bits. Additionally, if any ones would fall off the * right, the inversion is undefined. @@ -337,6 +349,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits arithmetically {@code n} positions right * + *

* This implements a signed shift. * * @param n the number of positions. @@ -352,6 +365,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits arithmetically {@code n} positions right * + *

* This implements a signed shift. * * @param n the number of positions. @@ -367,6 +381,7 @@ public class MaskedLong implements Comparable { /** * Invert an arithmetic right shift of {@code n} positions, that is shift left * + *

* This is different from a normal shift left, in that it inserts unknowns at the right. The * normal left shift inserts zeros. Additionally, all bits that fall off the left must match the * resulting sign bit, or else the inversion is undefined. @@ -400,6 +415,7 @@ public class MaskedLong implements Comparable { /** * Invert an arithmetic right shift of {@code n} positions, that is shift left * + *

* This is different from a normal shift left, in that it inserts unknowns at the right. The * normal left shift inserts zeros. Additionally, all bits that fall off the left must match the * resulting sign bit, or else the inversion is undefined. @@ -418,6 +434,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits logically {@code n} positions right * + *

* This implements an unsigned shift. * * @param n the number of positions. @@ -435,6 +452,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits logically {@code n} positions right * + *

* This implements an unsigned shift. * * @param n the number of positions. @@ -451,6 +469,7 @@ public class MaskedLong implements Comparable { /** * Shift the bits positionally {@code n} positions right * + *

* This fills the left with unknown bits * * @param n @@ -463,6 +482,7 @@ public class MaskedLong implements Comparable { /** * Invert a logical right shift of {@code n} positions, that is shift left * + *

* This is different from a normal shift left, in that it inserts unknowns at the right. The * normal left shift inserts zeros. Additionally, if any ones would fall off the left, the * inversion is undefined. @@ -486,6 +506,7 @@ public class MaskedLong implements Comparable { /** * Invert a logical right shift of {@code n} positions, that is shift left * + *

* This is different from a normal shift left, in that it inserts unknowns at the right. The * normal left shift inserts zeros. Additionally, if any ones would fall off the left, the * inversion is undefined. @@ -504,6 +525,7 @@ public class MaskedLong implements Comparable { /** * Reverse the least significant {@code n} bytes * + *

* This interprets the bits as an {@code n}-byte value and changes the endianness. Any bits * outside of the interpretation are truncated, i.e., become unknown. * @@ -517,16 +539,17 @@ public class MaskedLong implements Comparable { /** * Compute the bitwise AND of this and another masked long * + *

* To handle unknown bits, the result is derived from the following truth table: * - *

{@literal
+	 * 
 	 *   0 x 1 <= A (this)
 	 * 0 0 0 0
 	 * x 0 x x
 	 * 1 0 x 1
 	 * ^
 	 * B (that)
-	 * }
+ *
* * @param that the other masked long ({@code B}). * @return the result. @@ -547,18 +570,19 @@ public class MaskedLong implements Comparable { /** * Solves the expression {@code A & B = C, for B, given C and A} + * *

* To handle unknown bits, the solution is derived from the following truth table, where * {@code *} indicates no solution: * - *

{@literal
+	 * 
 	 *   0 x 1 <= A (that)
 	 * 0 x x 0
 	 * x x x x
 	 * 1 * 1 1
 	 * ^
 	 * B (this)
-	 * }
+ *
* * @param that the other masked long ({@code B}). * @return the result. @@ -587,16 +611,17 @@ public class MaskedLong implements Comparable { /** * Compute the bitwise OR of this and another masked long * + *

* To handle unknown bits, the result is derived from the following truth table: * - *

{@literal
+	 * 
 	 *   0 x 1 <= A (this)
 	 * 0 0 x 1
 	 * x x x 1
 	 * 1 1 1 1
 	 * ^
 	 * B (that)
-	 * }
+ *
* * @param that the other masked long ({@code B}). * @return the result. @@ -620,17 +645,18 @@ public class MaskedLong implements Comparable { /** * Solves the expression A | B = C, for B, given C and A * + *

* To handle unknown bits, the solution is derived from the following truth table, where * {@code *} indicates no solution: * - *

{@literal
+	 * 
 	 *   0 x 1 <= A (that)
 	 * 0 0 0 *
 	 * x x x x
 	 * 1 1 x x
 	 * ^
 	 * B (this)
-	 * }
+ *
* * @param that the other masked long ({@code B}). * @return the result. @@ -658,16 +684,17 @@ public class MaskedLong implements Comparable { /** * Compute the bitwise XOR of this and another masked long * + *

* To handle unknown bits, the result is derived from the following truth table: * - *

{@literal
+	 * 
 	 *   0 x 1 <= A (this)
 	 * 0 0 x 1
 	 * x x x x
 	 * 1 1 x 0
 	 * ^
 	 * B (that)
-	 * }
+ *
* * @param that the other masked long ({@code B}). * @return the result. @@ -696,12 +723,13 @@ public class MaskedLong implements Comparable { /** * Compute the bitwise NOT * + *

* To handle unknown bits, the result is derived from the following truth table: * - *

{@literal
+	 * 
 	 * 0 x 1 <= A (this)
 	 * 1 x 0
-	 * }
+ *
* * @return the result. */ @@ -769,7 +797,7 @@ public class MaskedLong implements Comparable { if (lmv == 2 || rmv == 2) { return 2; } - else if (lmv == 3 || rmv == 3) { + else if (lmv == 3 && rmv == 3) { return 3; } return 0; @@ -893,6 +921,7 @@ public class MaskedLong implements Comparable { /** * Compute the arithmetic quotient as a solution to unsigned multiplication * + *

* This is slightly different than {@link #divideUnsigned(MaskedLong)} in its treatment of * unknowns. * @@ -924,6 +953,7 @@ public class MaskedLong implements Comparable { /** * Checks if this and another masked long agree * + *

* Two masked longs agree iff their corresponding defined bit positions are equal. Where either * or both positions are undefined, no check is applied. In the case that both masked longs are * fully-defined, this is the same as an equality check on the values. @@ -942,6 +972,7 @@ public class MaskedLong implements Comparable { /** * Checks if this and a long agree * + *

* The masked long agrees with the given long iff the masked long's defined bit positions agree * with the corresponding bit positions in the given long. Where there are undefined bits, no * check is applied. In the case that the masked long is fully-defined, this is the same as an @@ -978,10 +1009,12 @@ public class MaskedLong implements Comparable { /** * Check if the masked value falls within a given range * + *

* The range is defined by a maximum and a signedness. The maximum must be one less than a * positive power of 2. In other words, it defines a maximum number of bits, including the sign * bit if applicable. * + *

* The defined bits of this masked long are then checked to fall in the given range. The * effective value is derived by sign/zero extending the value according to its mask. In * general, if any {@code 1} bits exist outside of the given max, the value is rejected, unless @@ -1013,6 +1046,7 @@ public class MaskedLong implements Comparable { /** * "Compare" two masked longs * + *

* This is not meant to reflect a numerical comparison. Rather, this is just to impose an * ordering for the sake of storing these in sorted collections. */ @@ -1038,6 +1072,7 @@ public class MaskedLong implements Comparable { /** * Check for equality * + *

* This will only return true if the other object is a masked long, even if this one is * fully-defined, and the value is equal to a given long (or {@link Long}). The other masked * long must have the same mask and value to be considered equal. For other sorts of "equality" diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MinusExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MinusExpressionSolver.java index 141fa6f63f..067e6da8d6 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MinusExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MinusExpressionSolver.java @@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr; import ghidra.app.plugin.processors.sleigh.expression.MinusExpression; /** - * Solves expressions of the form -A + * Solves expressions of the form {@code -A} */ public class MinusExpressionSolver extends AbstractUnaryExpressionSolver { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MultExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MultExpressionSolver.java index e40f2ec39c..7e7e435add 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MultExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/MultExpressionSolver.java @@ -19,12 +19,12 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.expression.MultExpression; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; /** - * Solves expressions of the form A * B + * Solves expressions of the form {@code A * B} */ public class MultExpressionSolver extends AbstractBinaryExpressionSolver { @@ -103,25 +103,24 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver vals, Map res, - AssemblyResolvedConstructor cur, Set hints, String description) - throws NeedsBackfillException { + MaskedLong goal, Map vals, AssemblyResolvedPatterns cur, + Set hints, String description) throws NeedsBackfillException { MaskedLong lval = repGoal.divideUnsigned(rval); if (lval.multiply(rval).agrees(goal)) { - return solver.solve(lexp, lval, vals, res, cur, hints, description); + return solver.solve(lexp, lval, vals, cur, hints, description); } return null; } @Override protected AssemblyResolution solveLeftSide(PatternExpression lexp, MaskedLong rval, - MaskedLong goal, Map vals, Map res, - AssemblyResolvedConstructor cur, Set hints, String description) + MaskedLong goal, Map vals, AssemblyResolvedPatterns cur, + Set hints, String description) throws NeedsBackfillException, SolverException { // Try the usual case first ResultTracker tracker = new ResultTracker(); AssemblyResolution sol = tracker.trySolverFunc(() -> { - return super.solveLeftSide(lexp, rval, goal, vals, res, cur, hints, description); + return super.solveLeftSide(lexp, rval, goal, vals, cur, hints, description); }); if (sol != null) { return sol; @@ -151,8 +150,8 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver 0) { MaskedLong repRightGoal = MaskedLong.fromMaskAndValue(repMsk, repVal); sol = tracker.trySolverFunc(() -> { - return tryRep(lexp, rval, repRightGoal, goal, vals, res, cur, - hintsWithRepetition, description); + return tryRep(lexp, rval, repRightGoal, goal, vals, cur, hintsWithRepetition, + description); }); if (sol != null) { return sol; @@ -169,8 +168,8 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver>> i; MaskedLong repLeftGoal = MaskedLong.fromMaskAndValue(repMsk, repVal); sol = tracker.trySolverFunc(() -> { - return tryRep(lexp, rval, repLeftGoal, goal, vals, res, cur, - hintsWithRepetition, description); + return tryRep(lexp, rval, repLeftGoal, goal, vals, cur, hintsWithRepetition, + description); }); if (sol != null) { return sol; @@ -182,10 +181,10 @@ public class MultExpressionSolver extends AbstractBinaryExpressionSolver vals, Map res, - AssemblyResolvedConstructor cur, Set hints, String description) + MaskedLong goal, Map vals, AssemblyResolvedPatterns cur, + Set hints, String description) throws NeedsBackfillException, SolverException { - return solveLeftSide(rexp, lval, goal, vals, res, cur, hints, description); + return solveLeftSide(rexp, lval, goal, vals, cur, hints, description); } @Override diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NeedsBackfillException.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NeedsBackfillException.java index c7665fdc71..3cf1031d33 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NeedsBackfillException.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NeedsBackfillException.java @@ -18,13 +18,15 @@ package ghidra.app.plugin.assembler.sleigh.expr; /** * An exception to indicate that the solution of an expression is not yet known * + *

* Furthermore, it cannot be determined whether or not the expression is even solvable. When this - * exception is thrown, a backfill record is placed on the encoded resolution indicating that + * exception is thrown, a backfill record is placed on the encoded resolution indicating that the * resolver must attempt to solve the expression again, once the encoding is otherwise complete. * This is needed, most notably, when an encoding depends on the address of the next * instruction, because the length of the current instruction is not known until resolution has * finished. * + *

* Backfill becomes a possibility when an expression depends on a symbol that is not (yet) defined. * Thus, as a matter of good record keeping, the exception takes the name of the missing symbol. */ @@ -33,6 +35,7 @@ public class NeedsBackfillException extends SolverException { /** * Construct a backfill exception, resulting from the given missing symbol name + * * @param symbol the missing symbol name */ public NeedsBackfillException(String symbol) { @@ -42,6 +45,7 @@ public class NeedsBackfillException extends SolverException { /** * Retrieve the missing symbol name from the original solution attempt + * * @return the missing symbol name */ public String getSymbol() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NotExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NotExpressionSolver.java index 4c228c05e8..62e09878e2 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NotExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/NotExpressionSolver.java @@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr; import ghidra.app.plugin.processors.sleigh.expression.NotExpression; /** - * Solves expressions of the form ~A + * Solves expressions of the form {@code ~A} */ public class NotExpressionSolver extends AbstractUnaryExpressionSolver { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OperandValueSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OperandValueSolver.java index 23add124ca..6e6345a7f3 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OperandValueSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OperandValueSolver.java @@ -28,6 +28,7 @@ import ghidra.app.plugin.processors.sleigh.symbol.TripleSymbol; /** * Solves expressions of an operand value * + *

* These are a sort of named sub-expression, but they may also specify a shift in encoding. */ public class OperandValueSolver extends AbstractExpressionSolver { @@ -39,12 +40,13 @@ public class OperandValueSolver extends AbstractExpressionSolver { /** * Obtains the "defining expression" * + *

* This is either the symbols assigned defining expression, or the expression associated with * its defining symbol. * * @return the defining expression, or null if neither is available */ - protected PatternExpression getDefiningExpression(OperandSymbol sym) { + public static PatternExpression getDefiningExpression(OperandSymbol sym) { PatternExpression patexp = sym.getDefiningExpression(); if (patexp != null) { return patexp; @@ -59,62 +61,63 @@ public class OperandValueSolver extends AbstractExpressionSolver { @Override public AssemblyResolution solve(OperandValue ov, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) throws NeedsBackfillException { + AssemblyResolvedPatterns cur, Set hints, String description) + throws NeedsBackfillException { Constructor cons = ov.getConstructor(); OperandSymbol sym = cons.getOperand(ov.getIndex()); PatternExpression patexp = getDefiningExpression(sym); if (patexp == null) { if (goal.equals(MaskedLong.ZERO)) { - return AssemblyResolution.nop(description, null); + return AssemblyResolution.nop(description, null, null); } return AssemblyResolution.error("Operand " + sym.getName() + - " is undefined and does not agree with child requirements", description, null); + " is undefined and does not agree with child requirements", description); } - AssemblyResolution result = solver.solve(patexp, goal, vals, res, cur, hints, description); + AssemblyResolution result = solver.solve(patexp, goal, vals, cur, hints, description); if (result.isError()) { AssemblyResolvedError err = (AssemblyResolvedError) result; return AssemblyResolution.error(err.getError(), "Solution to " + sym.getName() + " := " + goal + " = " + patexp, - List.of(result)); + List.of(result), null); } // TODO: Shifting here seems like a hack to me. // I assume this only comes at the top of an expression - AssemblyResolvedConstructor con = (AssemblyResolvedConstructor) result; - int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res); + AssemblyResolvedPatterns con = (AssemblyResolvedPatterns) result; + int shamt = AssemblyTreeResolver.computeOffset(sym, cons); return con.shift(shamt); } @Override - public MaskedLong getValue(OperandValue ov, Map vals, Map res, - AssemblyResolvedConstructor cur) throws NeedsBackfillException { + public MaskedLong getValue(OperandValue ov, Map vals, + AssemblyResolvedPatterns cur) throws NeedsBackfillException { Constructor cons = ov.getConstructor(); OperandSymbol sym = cons.getOperand(ov.getIndex()); PatternExpression patexp = getDefiningExpression(sym); if (patexp == null) { return MaskedLong.ZERO; } - int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res); + int shamt = AssemblyTreeResolver.computeOffset(sym, cons); cur = cur == null ? null : cur.truncate(shamt); - MaskedLong result = solver.getValue(patexp, vals, res, cur); + MaskedLong result = solver.getValue(patexp, vals, cur); return result; } @Override - public int getInstructionLength(OperandValue ov, Map res) { + public int getInstructionLength(OperandValue ov) { Constructor cons = ov.getConstructor(); OperandSymbol sym = cons.getOperand(ov.getIndex()); PatternExpression patexp = sym.getDefiningExpression(); if (patexp == null) { return 0; } - int length = solver.getInstructionLength(patexp, res); - int shamt = AssemblyTreeResolver.computeOffset(sym, cons, res); + int length = solver.getInstructionLength(patexp); + int shamt = AssemblyTreeResolver.computeOffset(sym, cons); return length + shamt; } @Override - public MaskedLong valueForResolution(OperandValue ov, AssemblyResolvedConstructor rc) { + public MaskedLong valueForResolution(OperandValue ov, Map vals, + AssemblyResolvedPatterns rc) { Constructor cons = ov.getConstructor(); OperandSymbol sym = cons.getOperand(ov.getIndex()); PatternExpression patexp = sym.getDefiningExpression(); @@ -135,7 +138,7 @@ public class OperandValueSolver extends AbstractExpressionSolver { // Since I'm using this just for context, ignore shifting for now. //int shamt = AssemblyTreeResolver.computeOffset(sym, cons, rc.children); // Children would be null here, anyway. - return solver.valueForResolution(patexp, rc); + return solver.valueForResolution(patexp, vals, rc); // NOTE: To be paranoid, I could check for the existence of TokenField in the expression // And also check if a shift would be performed. } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OrExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OrExpressionSolver.java index e33e72b27b..84955d3596 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OrExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/OrExpressionSolver.java @@ -17,35 +17,27 @@ package ghidra.app.plugin.assembler.sleigh.expr; import java.util.*; +import ghidra.app.plugin.assembler.sleigh.expr.match.ExpressionMatcher; import ghidra.app.plugin.assembler.sleigh.sem.*; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx; -import ghidra.app.plugin.processors.sleigh.ParserWalker; -import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.expression.*; -import ghidra.program.model.mem.MemoryAccessException; import ghidra.util.Msg; -import ghidra.xml.XmlPullParser; /** - * Solves expressions of the form A | B + * Solves expressions of the form {@code A | B} */ public class OrExpressionSolver extends AbstractBinaryExpressionSolver { - static final PatternExpression DUMMY = new PatternExpression() { - @Override - public long getValue(ParserWalker walker) throws MemoryAccessException { - return 0; - } + protected static class Matchers implements ExpressionMatcher.Context { + protected ExpressionMatcher val = var(ConstantValue.class); + protected ExpressionMatcher size = var(ConstantValue.class); + protected ExpressionMatcher fld = fldSz(size); - @Override - public void restoreXml(XmlPullParser parser, SleighLanguage lang) { - // Dummy intentionally left empty - } + protected ExpressionMatcher neqConst = or( + and(shr(sub(opnd(fld), val), size), cv(1)), + and(shr(sub(val, opnd(fld)), size), cv(1))); + } - @Override - public String toString() { - return null; - } - }; + protected static final Matchers MATCHERS = new Matchers(); public OrExpressionSolver() { super(OrExpression.class); @@ -62,8 +54,8 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver vals, Map res, AssemblyResolvedConstructor cur, - Set hints, String description) throws SolverException { + Map vals, AssemblyResolvedPatterns cur, Set hints, + String description) throws SolverException { /* * If OR is being used to concatenate fields, then we can solve with some symbolic * manipulation. We'll descend to see if this is a tree of ORs with SHIFTs or fields at the @@ -71,12 +63,12 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver fields = new TreeMap<>(); - fields.put(0L, new ConstantValue(0)); - collectComponentsOr(exp, 0, fields, vals, res, cur); + collectComponentsOr(exp, 0, fields, vals, cur); + fields.computeIfAbsent(0L, __ -> new ConstantValue(0)); fields.put(64L, new ConstantValue(0)); long lo = 0; PatternExpression fieldExp = null; - AssemblyResolvedConstructor result = AssemblyResolution.nop(description, null); + AssemblyResolvedPatterns result = AssemblyResolution.nop(description); try (DbgCtx dc = dbg.start("Trying solution of field catenation")) { dbg.println("Original: " + goal + ":= " + exp); for (Map.Entry ent : fields.entrySet()) { @@ -89,12 +81,12 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver vals, Map res, AssemblyResolvedConstructor cur, - Set hints, String description) throws SolverException { + Map vals, AssemblyResolvedPatterns cur, Set hints, + String description) throws SolverException { // If OR is being used to accomplish a circular shift, then we can apply a clever solver. // We'll match against the patterns: (f << (C - g)) | (f >> g) // (f >> (C - g)) | (f << g) @@ -144,7 +136,7 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) throws NeedsBackfillException, SolverException { - MaskedLong valValue = solver.getValue(expValue, vals, res, cur); - MaskedLong valShift = solver.getValue(expShift, vals, res, cur); + AssemblyResolvedPatterns cur, Set hints, String description) + throws NeedsBackfillException, SolverException { + MaskedLong valValue = solver.getValue(expValue, vals, cur); + MaskedLong valShift = solver.getValue(expShift, vals, cur); if (valValue != null && !valValue.isFullyDefined()) { if (!valValue.isFullyUndefined()) { @@ -202,12 +194,12 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver, PatternExpression> match = MATCHERS.neqConst.match(exp); + if (match != null) { + long value = MATCHERS.val.get(match).getValue(); + PatternValue field = MATCHERS.fld.get(match); + // Solve for equals, then either return that, or forbid it, depending on goal + AssemblyResolution solution = + solver.solve(field, MaskedLong.fromLong(value), vals, cur, hints, description); + if (goal.equals(MaskedLong.fromMaskAndValue(0, 1))) { + return solution; + } + if (goal.equals(MaskedLong.fromMaskAndValue(1, 1))) { + if (solution.isError()) { + return AssemblyResolution.nop(description); + } + if (solution.isBackfill()) { + throw new AssertionError(); + } + AssemblyResolvedPatterns forbidden = (AssemblyResolvedPatterns) solution; + forbidden = forbidden.withDescription("Solved 'not equals'"); + return AssemblyResolution.nop(description).withForbids(Set.of(forbidden)); + } + } + throw new SolverException("Could not solve two-sided OR"); } void collectComponents(PatternExpression exp, long shift, Map components, Map vals, - Map res, AssemblyResolvedConstructor cur) throws SolverException { + AssemblyResolvedPatterns cur) throws SolverException { if (exp instanceof OrExpression) { - collectComponentsOr((OrExpression) exp, shift, components, vals, res, cur); + collectComponentsOr((OrExpression) exp, shift, components, vals, cur); } else if (exp instanceof LeftShiftExpression) { - collectComponentsLeft((LeftShiftExpression) exp, shift, components, vals, res, cur); + collectComponentsLeft((LeftShiftExpression) exp, shift, components, vals, cur); } else if (exp instanceof RightShiftExpression) { - collectComponentsRight((RightShiftExpression) exp, shift, components, vals, res, cur); + collectComponentsRight((RightShiftExpression) exp, shift, components, vals, cur); } else { assert shift < 64; - components.put(shift, exp); + PatternExpression conflict = components.put(shift, exp); + if (conflict != null) { + throw new SolverException("Two 'fields' at the same shift indicates conflict"); + } } } void collectComponentsOr(OrExpression exp, long shift, Map components, - Map vals, Map res, AssemblyResolvedConstructor cur) + Map vals, AssemblyResolvedPatterns cur) throws SolverException { - collectComponents(exp.getLeft(), shift, components, vals, res, cur); - collectComponents(exp.getRight(), shift, components, vals, res, cur); + collectComponents(exp.getLeft(), shift, components, vals, cur); + collectComponents(exp.getRight(), shift, components, vals, cur); } void collectComponentsLeft(LeftShiftExpression exp, long shift, Map components, Map vals, - Map res, AssemblyResolvedConstructor cur) throws SolverException { + AssemblyResolvedPatterns cur) throws SolverException { MaskedLong adj; try { - adj = solver.getValue(exp.getRight(), vals, res, cur); + adj = solver.getValue(exp.getRight(), vals, cur); } catch (NeedsBackfillException e) { throw new SolverException("Variable shifts break field catenation solver", e); @@ -335,15 +353,15 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver components, Map vals, - Map res, AssemblyResolvedConstructor cur) throws SolverException { + AssemblyResolvedPatterns cur) throws SolverException { MaskedLong adj; try { - adj = solver.getValue(exp.getRight(), vals, res, cur); + adj = solver.getValue(exp.getRight(), vals, cur); } catch (NeedsBackfillException e) { throw new SolverException("Variable shifts break field catenation solver", e); @@ -351,6 +369,6 @@ public class OrExpressionSolver extends AbstractBinaryExpressionSolver { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RecursiveDescentSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RecursiveDescentSolver.java index e2d785f45a..1f7df75fef 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RecursiveDescentSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RecursiveDescentSolver.java @@ -18,24 +18,30 @@ package ghidra.app.plugin.assembler.sleigh.expr; import java.util.*; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; /** * This singleton class seeks solutions to {@link PatternExpression}s * - * It is called naive, because it does not perform algebraic transformations. Rather, it attempts to - * fold constants, assuming there is a single variable in the expression, modifying the goal as it + *

+ * It is rather naive. It does not perform algebraic transformations. Instead, it attempts to fold + * constants, assuming there is a single variable in the expression, modifying the goal as it * descends toward that variable. If it finds a variable, i.e., token or context field, it encodes * the solution, positioned in the field. If the expression is constant, it checks that the goal - * agrees. If not, an error is returned. + * agrees. If not, an error is returned. There are some common cases where it is forced to solve + * expressions involving multiple variables. Those cases are addressed in the derivatives of + * {@link AbstractBinaryExpressionSolver} where the situation can be detected. One common example is + * field concatenation using the {@code (A << 4) | B} pattern. * - * TODO This whole mechanism ought to just be factored directly into {@link PatternExpression}. + *

+ * TODO: Perhaps this whole mechanism ought to just be factored directly into + * {@link PatternExpression}. */ public class RecursiveDescentSolver { - protected static final DbgTimer dbg = DbgTimer.INACTIVE; - private static final RecursiveDescentSolver solver = new RecursiveDescentSolver(); + protected static final DbgTimer DBG = DbgTimer.INACTIVE; + private static final RecursiveDescentSolver INSTANCE = new RecursiveDescentSolver(); // A mapping from each subclass of PatternExpression to the appropriate solver protected Map, AbstractExpressionSolver> registry = new HashMap<>(); @@ -67,7 +73,7 @@ public class RecursiveDescentSolver { * @return the singleton instance */ public static RecursiveDescentSolver getSolver() { - return solver; + return INSTANCE; } /** @@ -103,59 +109,52 @@ public class RecursiveDescentSolver { * @param exp the expression to solve * @param goal the desired output (modulo a mask) of the expression * @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next}) - * @param res resolved subconstructors, by operand index (see method details) * @param hints describes techniques applied by calling solvers * @param description a description to attached to the encoded solution * @return the encoded solution * @throws NeedsBackfillException a solution may exist, but a required symbol is missing */ protected AssemblyResolution solve(PatternExpression exp, MaskedLong goal, - Map vals, Map res, AssemblyResolvedConstructor cur, - Set hints, String description) throws NeedsBackfillException { + Map vals, AssemblyResolvedPatterns cur, Set hints, + String description) throws NeedsBackfillException { try { - return getRegistered(exp.getClass()).solve(exp, goal, vals, res, cur, hints, - description); + return getRegistered(exp.getClass()).solve(exp, goal, vals, cur, hints, description); } catch (UnsupportedOperationException e) { - dbg.println("Error solving " + exp + " = " + goal); + DBG.println("Error solving " + exp + " = " + goal); throw e; } } /** - * Solve a given expression, assuming it outputs a given masked value + * Solve a given expression, given a masked-value goal * + *

* From a simplified perspective, we need only the expression and the desired value to solve it. - * Generally speaking, the expression may have only contain a single variable, and the encoded - * result represents that single variable. It must be absorbed into the overall instruction - * and/or context encoding. + * Generally speaking, the expression may only contain a single field, and the encoded result + * specifies the bits of the solved field. It must be absorbed into the overall assembly + * pattern. * - * More realistically, however, these expressions may depend on quite a bit of extra - * information. For example, PC-relative encodings (i.e., those involving {@code inst_start} or + *

+ * More realistically, these expressions may depend on quite a bit of extra information. For + * example, PC-relative encodings (i.e., those involving {@code inst_start} or * {@code inst_next}, need to know the starting address of the resulting instruction. {@code * inst_start} must be provided to the solver by the assembler. {@code inst_next} cannot be * known until the instruction length is known. Thus, expressions using it always result in a * {@link NeedsBackfillException}. The symbols, when known, are provided to the solver via the * {@code vals} parameter. * - * Expressions involving {@link OperandValueSolver}s are a little more complicated, because they - * specify an offset that affects its encoding in the instruction. To compute this offset, the - * lengths of other surrounding operands must be known. Thus, when solving a context change for - * a given constructor, its resolved subconstructors must be provided to the solver via the - * {@code res} parameter. - * * @param exp the expression to solve * @param goal the desired output (modulo a mask) of the expression * @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next}) - * @param res resolved subconstructors, by operand index (see method details) * @param description a description to attached to the encoded solution * @return the encoded solution * @throws NeedsBackfillException a solution may exist, but a required symbol is missing */ public AssemblyResolution solve(PatternExpression exp, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, String description) + AssemblyResolvedPatterns cur, String description) throws NeedsBackfillException { - return solve(exp, goal, vals, res, cur, Set.of(), description); + return solve(exp, goal, vals, cur, Set.of(), description); } /** @@ -163,45 +162,44 @@ public class RecursiveDescentSolver { * * @param exp the (sub-)expression to fold * @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next}) - * @param res resolved subconstructors, by operand index (see - * {@link #solve(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor, String)}) * @return the masked solution * @throws NeedsBackfillException it may be folded, but a required symbol is missing */ protected MaskedLong getValue(T exp, Map vals, - Map res, AssemblyResolvedConstructor cur) - throws NeedsBackfillException { - MaskedLong value = getRegistered(exp.getClass()).getValue(exp, vals, res, cur); - dbg.println("Expression: " + value + " =: " + exp); + AssemblyResolvedPatterns cur) throws NeedsBackfillException { + MaskedLong value = getRegistered(exp.getClass()).getValue(exp, vals, cur); + DBG.println("Expression: " + value + " =: " + exp); return value; } /** * Determine the length of the instruction part of the encoded solution to the given expression * + *

* This is used to keep operands in their appropriate position when backfilling becomes * applicable. Normally, the instruction length is taken from the encoding of a solution, but if * the solution cannot be determined yet, the instruction length must still be obtained. * + *

* The length can be determined by finding token fields in the expression. * * @param exp the expression, presumably containing a token field - * @param res resolved subconstructors, by operand index (see - * {@link #solve(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor, String)}) * @return the anticipated length, in bytes, of the instruction encoding */ - public int getInstructionLength(PatternExpression exp, Map res) { - return getRegistered(exp.getClass()).getInstructionLength(exp, res); + public int getInstructionLength(PatternExpression exp) { + return getRegistered(exp.getClass()).getInstructionLength(exp); } /** * Compute the value of an expression given a (possibly-intermediate) resolution * * @param exp the expression to evaluate - * @param rc the resolution on which to evalute it + * @param vals values of defined symbols + * @param rc the resolution on which to evaluate it * @return the result */ - public MaskedLong valueForResolution(PatternExpression exp, AssemblyResolvedConstructor rc) { - return getRegistered(exp.getClass()).valueForResolution(exp, rc); + public MaskedLong valueForResolution(PatternExpression exp, Map vals, + AssemblyResolvedPatterns rc) { + return getRegistered(exp.getClass()).valueForResolution(exp, vals, rc); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RightShiftExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RightShiftExpressionSolver.java index 2d17254bea..0da326e314 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RightShiftExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/RightShiftExpressionSolver.java @@ -19,12 +19,12 @@ import java.util.Map; import java.util.Set; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.expression.RightShiftExpression; import ghidra.util.Msg; /** - * {@literal Solves expressions of the form A >> B} + * Solves expressions of the form {@code A >> B} */ public class RightShiftExpressionSolver extends AbstractBinaryExpressionSolver { @@ -62,15 +62,14 @@ public class RightShiftExpressionSolver @Override protected AssemblyResolution solveTwoSided(RightShiftExpression exp, MaskedLong goal, - Map vals, Map res, AssemblyResolvedConstructor cur, - Set hints, String description) - throws NeedsBackfillException, SolverException { + Map vals, AssemblyResolvedPatterns cur, Set hints, + String description) throws NeedsBackfillException, SolverException { // Do the similar thing as in {@link LeftShiftExpressionSolver} // Do not guess the same parameter recursively if (hints.contains(DefaultSolverHint.GUESSING_RIGHT_SHIFT_AMOUNT)) { // NOTE: Nested right shifts ought to be written as a right shift by a sum - return super.solveTwoSided(exp, goal, vals, res, cur, hints, description); + return super.solveTwoSided(exp, goal, vals, cur, hints, description); } int maxShift = Long.numberOfLeadingZeros(goal.val); @@ -82,18 +81,18 @@ public class RightShiftExpressionSolver MaskedLong reql = computeLeft(reqr, goal); AssemblyResolution lres = - solver.solve(exp.getLeft(), reql, vals, res, cur, hintsWithRShift, description); + solver.solve(exp.getLeft(), reql, vals, cur, hintsWithRShift, description); if (lres.isError()) { throw new SolverException("Solving left failed"); } AssemblyResolution rres = - solver.solve(exp.getRight(), reqr, vals, res, cur, hints, description); + solver.solve(exp.getRight(), reqr, vals, cur, hints, description); if (rres.isError()) { throw new SolverException("Solving right failed"); } - AssemblyResolvedConstructor lsol = (AssemblyResolvedConstructor) lres; - AssemblyResolvedConstructor rsol = (AssemblyResolvedConstructor) rres; - AssemblyResolvedConstructor sol = lsol.combine(rsol); + AssemblyResolvedPatterns lsol = (AssemblyResolvedPatterns) lres; + AssemblyResolvedPatterns rsol = (AssemblyResolvedPatterns) rres; + AssemblyResolvedPatterns sol = lsol.combine(rsol); if (sol == null) { throw new SolverException( "Left and right solutions conflict for shift=" + shift); @@ -105,6 +104,6 @@ public class RightShiftExpressionSolver // try the next } } - return super.solveTwoSided(exp, goal, vals, res, cur, hints, description); + return super.solveTwoSided(exp, goal, vals, cur, hints, description); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SolverHint.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SolverHint.java index 8ec0067a98..98d2517baa 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SolverHint.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SolverHint.java @@ -20,11 +20,13 @@ import java.util.*; /** * A type for solver hints * - * Hints inform "sub-"solvers of the techniques already being applied by the calling solvers. This + *

+ * Hints inform sub-solvers of the techniques already being applied by the calling solvers. This * helps prevent situations where, e.g., two multiplication solvers (applied to repeated or nested * multiplication) both attempt to synthesize new goals for repetition. This sort of expression is * common when decoding immediates in the AArch64 specification. * + *

* Using an interface implemented by an enumeration (instead of just using the enumeration directly) * eases expansion by extension without modifying the core code. * diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/StartInstructionValueSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/StartInstructionValueSolver.java index c4c1371636..b21e98952a 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/StartInstructionValueSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/StartInstructionValueSolver.java @@ -24,6 +24,7 @@ import ghidra.app.plugin.processors.sleigh.expression.StartInstructionValue; /** * "Solves" expression of {@code inst_start} * + *

* Works like the constant solver, but takes the value of {@code inst_start}, which is given by the * assembly address. */ @@ -35,28 +36,26 @@ public class StartInstructionValueSolver extends AbstractExpressionSolver vals, Map res, AssemblyResolvedConstructor cur, - Set hints, String description) { + Map vals, AssemblyResolvedPatterns cur, Set hints, + String description) { throw new AssertionError( "INTERNAL: Should never be asked to solve for " + AssemblyTreeResolver.INST_START); } @Override public MaskedLong getValue(StartInstructionValue iv, Map vals, - Map res, AssemblyResolvedConstructor cur) { + AssemblyResolvedPatterns cur) { return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_START)); } @Override - public int getInstructionLength(StartInstructionValue exp, Map res) { + public int getInstructionLength(StartInstructionValue exp) { return 0; } @Override - public MaskedLong valueForResolution(StartInstructionValue exp, - AssemblyResolvedConstructor rc) { - // Would need to pass in symbol values. - throw new UnsupportedOperationException( - "The solver should never ask for this value given a resolved constructor."); + public MaskedLong valueForResolution(StartInstructionValue exp, Map vals, + AssemblyResolvedPatterns rc) { + return MaskedLong.fromLong(vals.get(AssemblyTreeResolver.INST_START)); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SubExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SubExpressionSolver.java index 0801c0252a..5eed18943e 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SubExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/SubExpressionSolver.java @@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr; import ghidra.app.plugin.processors.sleigh.expression.SubExpression; /** - * Solves expressions of the form A - B + * Solves expressions of the form {@code A - B} */ public class SubExpressionSolver extends AbstractBinaryExpressionSolver { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/TokenFieldSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/TokenFieldSolver.java index 823ee17ec3..667a98a05d 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/TokenFieldSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/TokenFieldSolver.java @@ -24,6 +24,7 @@ import ghidra.app.plugin.processors.sleigh.expression.TokenField; /** * Solves expressions of a token (instruction encoding) field * + *

* Essentially, this just encodes the goal into the field, if it can be represented in the given * space and format. Otherwise, there is no solution. */ @@ -35,33 +36,33 @@ public class TokenFieldSolver extends AbstractExpressionSolver { @Override public AssemblyResolution solve(TokenField tf, MaskedLong goal, Map vals, - Map res, AssemblyResolvedConstructor cur, Set hints, - String description) { + AssemblyResolvedPatterns cur, Set hints, String description) { assert tf.minValue() == 0; // In case someone decides to do signedness there. if (!goal.isInRange(tf.maxValue(), tf.hasSignbit())) { return AssemblyResolution.error("Value " + goal + " is not valid for " + tf, - description, null); + description); } AssemblyPatternBlock block = AssemblyPatternBlock.fromTokenField(tf, goal); - return AssemblyResolution.instrOnly(block, description, null); + return AssemblyResolution.instrOnly(block, description); } @Override - public MaskedLong getValue(TokenField tf, Map vals, Map res, - AssemblyResolvedConstructor cur) { + public MaskedLong getValue(TokenField tf, Map vals, + AssemblyResolvedPatterns cur) { if (cur == null) { return null; } - return valueForResolution(tf, cur); + return valueForResolution(tf, vals, cur); } @Override - public int getInstructionLength(TokenField tf, Map res) { + public int getInstructionLength(TokenField tf) { return tf.getByteEnd() + 1; } @Override - public MaskedLong valueForResolution(TokenField tf, AssemblyResolvedConstructor rc) { + public MaskedLong valueForResolution(TokenField tf, Map vals, + AssemblyResolvedPatterns rc) { int size = tf.getByteEnd() - tf.getByteStart() + 1; MaskedLong res = rc.readInstruction(tf.getByteStart(), size); if (!tf.isBigEndian()) { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/XorExpressionSolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/XorExpressionSolver.java index 8c5ad88657..c05711e525 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/XorExpressionSolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/XorExpressionSolver.java @@ -18,7 +18,7 @@ package ghidra.app.plugin.assembler.sleigh.expr; import ghidra.app.plugin.processors.sleigh.expression.XorExpression; /** - * Solves expressions of the form A $xor B + * Solves expressions of the form {@code A $xor B} */ public class XorExpressionSolver extends AbstractBinaryExpressionSolver { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/AbstractExpressionMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/AbstractExpressionMatcher.java new file mode 100644 index 0000000000..203e9ffde9 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/AbstractExpressionMatcher.java @@ -0,0 +1,122 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.Map; +import java.util.Set; + +import ghidra.app.plugin.processors.sleigh.expression.*; + +/** + * Base implementation for expression matchers + * + * @param the type of expression matched + */ +public abstract class AbstractExpressionMatcher + implements ExpressionMatcher { + protected final Set> ops; + + public AbstractExpressionMatcher(Set> ops) { + this.ops = Set.copyOf(ops); + } + + public AbstractExpressionMatcher(Class cls) { + this.ops = Set.of(cls); + } + + protected T opMatches(PatternExpression expression) { + return ops.stream() + .filter(op -> op.isInstance(expression)) + .map(op -> op.cast(expression)) + .findAny() + .orElse(null); + } + + protected abstract boolean matchDetails(T expression, + Map, PatternExpression> result); + + @Override + public boolean match(PatternExpression expression, + Map, PatternExpression> result) { + T t = opMatches(expression); + if (t == null) { + return false; + } + if (!matchDetails(t, result)) { + return false; + } + return recordResult(t, result); + } + + protected boolean recordResult(PatternExpression expression, + Map, PatternExpression> result) { + PatternExpression already = result.put(this, expression); + if (already == null) { + return true; + } + return expressionsIdenticallyDefined(already, expression); + } + + protected static boolean expressionsIdenticallyDefined(PatternExpression a, + PatternExpression b) { + if (a.getClass() != b.getClass()) { + return false; + } + if (a instanceof EndInstructionValue) { + return true; + } + if (a instanceof StartInstructionValue) { + return true; + } + if (a instanceof ConstantValue) { + ConstantValue ca = (ConstantValue) a; + ConstantValue cb = (ConstantValue) b; + return ca.getValue() == cb.getValue(); + } + if (a instanceof UnaryExpression) { + UnaryExpression ua = (UnaryExpression) a; + UnaryExpression ub = (UnaryExpression) b; + return expressionsIdenticallyDefined(ua.getUnary(), ub.getUnary()); + } + if (a instanceof BinaryExpression) { + BinaryExpression ba = (BinaryExpression) a; + BinaryExpression bb = (BinaryExpression) b; + return expressionsIdenticallyDefined(ba.getLeft(), bb.getLeft()) && + expressionsIdenticallyDefined(ba.getRight(), bb.getRight()); + } + if (a instanceof TokenField) { + TokenField ta = (TokenField) a; + TokenField tb = (TokenField) b; + return ta.getBitStart() == tb.getBitStart() && + ta.getBitEnd() == tb.getBitEnd() && + ta.hasSignbit() == tb.hasSignbit(); + } + if (a instanceof ContextField) { + ContextField ca = (ContextField) a; + ContextField cb = (ContextField) b; + return ca.getStartBit() == cb.getStartBit() && + ca.getEndBit() == cb.getEndBit() && + ca.hasSignbit() == cb.hasSignbit(); + } + if (a instanceof OperandValue) { + OperandValue va = (OperandValue) a; + OperandValue vb = (OperandValue) b; + return va.getConstructor() == vb.getConstructor() && + va.getIndex() == vb.getIndex(); + } + throw new AssertionError(); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/AnyMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/AnyMatcher.java new file mode 100644 index 0000000000..fab7a9ff0a --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/AnyMatcher.java @@ -0,0 +1,50 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.Map; +import java.util.Set; + +import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; + +/** + * A matcher which accept any expression of the required type + * + *

+ * This requires no further consideration of the expressions operands. If the type matches, the + * expression matches. + * + * @param the type to match + */ +public class AnyMatcher extends AbstractExpressionMatcher { + public static AnyMatcher any() { + return new AnyMatcher<>(PatternExpression.class); + } + + public AnyMatcher(Set> ops) { + super(ops); + } + + public AnyMatcher(Class cls) { + super(cls); + } + + @Override + protected boolean matchDetails(T expression, + Map, PatternExpression> result) { + return true; + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/BinaryExpressionMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/BinaryExpressionMatcher.java new file mode 100644 index 0000000000..fbd5930867 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/BinaryExpressionMatcher.java @@ -0,0 +1,91 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.*; + +import ghidra.app.plugin.processors.sleigh.expression.BinaryExpression; +import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; + +/** + * A matcher for a binary expression + * + *

+ * If the required type matches, the matching descends to the left then right operands. + * + * @param the type of expression matched + */ +public class BinaryExpressionMatcher + extends AbstractExpressionMatcher { + + /** + * A matcher for binary expression allowing commutativity + * + *

+ * This behaves the same as {@link BinaryExpressionMatcher}, but if the first attempt fails, the + * operand match is re-attempted with the operands swapped. + * + * @param the type of expression matched + */ + public static class Commutative extends BinaryExpressionMatcher { + public Commutative(Set> ops, + ExpressionMatcher leftMatcher, ExpressionMatcher rightMatcher) { + super(ops, leftMatcher, rightMatcher); + } + + public Commutative(Class cls, ExpressionMatcher leftMatcher, + ExpressionMatcher rightMatcher) { + super(cls, leftMatcher, rightMatcher); + } + + @Override + protected boolean matchDetails(T expression, + Map, PatternExpression> result) { + Set> reset = new HashSet<>(result.keySet()); + if (leftMatcher.match(expression.getLeft(), result) && + rightMatcher.match(expression.getRight(), result)) { + return true; + } + result.keySet().retainAll(reset); + return rightMatcher.match(expression.getLeft(), result) && + leftMatcher.match(expression.getRight(), result); + } + } + + protected final ExpressionMatcher leftMatcher; + protected final ExpressionMatcher rightMatcher; + + public BinaryExpressionMatcher(Set> ops, + ExpressionMatcher leftMatcher, ExpressionMatcher rightMatcher) { + super(ops); + this.leftMatcher = leftMatcher; + this.rightMatcher = rightMatcher; + } + + public BinaryExpressionMatcher(Class cls, ExpressionMatcher leftMatcher, + ExpressionMatcher rightMatcher) { + super(cls); + this.leftMatcher = leftMatcher; + this.rightMatcher = rightMatcher; + } + + @Override + protected boolean matchDetails(T expression, + Map, PatternExpression> result) { + return leftMatcher.match(expression.getLeft(), result) && + rightMatcher.match(expression.getRight(), result); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/ConstantValueMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/ConstantValueMatcher.java new file mode 100644 index 0000000000..875f585c5d --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/ConstantValueMatcher.java @@ -0,0 +1,39 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.Map; + +import ghidra.app.plugin.processors.sleigh.expression.ConstantValue; +import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; + +/** + * A matcher for a given constant value + */ +public class ConstantValueMatcher extends AbstractExpressionMatcher { + protected final long value; + + public ConstantValueMatcher(long value) { + super(ConstantValue.class); + this.value = value; + } + + @Override + protected boolean matchDetails(ConstantValue expression, + Map, PatternExpression> result) { + return expression.getValue() == value; + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/ExpressionMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/ExpressionMatcher.java new file mode 100644 index 0000000000..76e13dc6a5 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/ExpressionMatcher.java @@ -0,0 +1,309 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.HashMap; +import java.util.Map; + +import ghidra.app.plugin.processors.sleigh.expression.*; + +/** + * A matcher for a form of patten expression + * + *

+ * Some solvers may need to apply sophisticated heuristics to recognize certain forms that commonly + * occur in pattern expressions. These can certainly be programmed manually, but for many cases, the + * form recognition can be accomplished by describing the form as an expression matcher. For a + * shorter syntax to construct such matchers. See {@link Context}. + * + * @param the type of expression matched + */ +public interface ExpressionMatcher { + + /** + * Attempt to match the given expression, recording the substitutions if successful + * + * @param expression the expression to match + * @return a map of matchers to substituted expressions + */ + default Map, PatternExpression> match(PatternExpression expression) { + Map, PatternExpression> result = new HashMap<>(); + if (match(expression, result)) { + return result; + } + return null; + } + + /** + * Retrieve the expression substituted for this matcher from a previous successful match + * + *

+ * Calling this on the root matcher is relatively useless, as it would simply return the + * expression passed to {@link #match(PatternExpression)}. Instead, sub-matchers should be saved + * in a variable, allowing their values to be retrieved. See {@link Context}, for an example. + * + * @param results the previous match results + * @return the substituted expression + */ + @SuppressWarnings("unchecked") + default T get(Map, PatternExpression> results) { + return (T) results.get(this); + } + + /** + * Attempt to match the given expression, recording substitutions in the given map + * + *

+ * Even if the match was unsuccessful, the result map may contain attempted substitutions. Thus, + * the map should be discarded if unsuccessful. + * + * @param expression the expression to match + * @param result a map to store matchers to substituted expressions + * @return true if successful, false if not + */ + boolean match(PatternExpression expression, + Map, PatternExpression> result); + + /** + * A context for defining expression matcher succinctly + * + *

+ * Implementations of this interface have easy access to factory methods for each kind of + * {@link PatternExpression}. Additionally, the class itself provide a convenient container for + * saving important sub-matchers, so that important sub-expression can be readily retrieved. For + * example: + * + *

+	 * static class MyMatchers implements ExpressionMatcher.Context {
+	 * 	ExpressionMatcher shamt = var(ConstantValue.class);
+	 * 	ExpressionMatcher exp = shl(var(), shamt);
+	 * }
+	 * 
+	 * static final MyMatchers MATCHERS = new MyMatchers();
+	 * 
+	 * public long getConstantShift(PatternExpression expression) {
+	 * 	Map, PatternExpression> result = MATCHERS.exp.match(expression);
+	 * 	if (result == null) {
+	 * 		return -1;
+	 * 	}
+	 * 	return MATCHERS.shamt.get(result).getValue();
+	 * }
+	 * 
+ * + *

+ * Saving a sub-matcher to a field (as in the example) also permits that sub-matcher to appear + * in multiple places. In that case, the sub-matcher must match identical expressions wherever + * it appears. For example, if {@code cv} matches any constant value, then {@code plus(cv, cv)} + * would match {@code 2 + 2}, but not {@code 2 + 3}. + */ + interface Context { + + /** + * Match the form {@code L & R} or {@code R & L} + * + * @param left the matcher for the left operand + * @param right the matcher for the right operand + * @return the matcher + */ + default ExpressionMatcher and(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher.Commutative<>(AndExpression.class, left, right); + } + + /** + * Match the form {@code L / R} + * + * @param left the matcher for the dividend + * @param right the matcher for the divisor + * @return the matcher for the quotient + */ + default ExpressionMatcher div(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher<>(DivExpression.class, left, right); + } + + /** + * Match the form {@code L << R} + * + * @param left the matcher for the left operand + * @param right the matcher for the shift amount + * @return the matcher + */ + default ExpressionMatcher shl(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher<>(LeftShiftExpression.class, left, right); + } + + /** + * Match the form {@code L * R} or {@code R * L} + * + * @param left the matcher for the left factor + * @param right the matcher for the right factor + * @return the matcher for the product + */ + default ExpressionMatcher mul(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher.Commutative<>(MultExpression.class, left, right); + } + + /** + * Match the form {@code L | R} or {@code R | L} + * + * @param left the matcher for the left operand + * @param right the matcher for the right operand + * @return the matcher + */ + default ExpressionMatcher or(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher.Commutative<>(OrExpression.class, left, right); + } + + /** + * Match the form {@code L + R} or {@code R + L} + * + * @param left the matcher for the left term + * @param right the matcher for the right term + * @return the matcher for the sum + */ + default ExpressionMatcher plus(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher<>(PlusExpression.class, left, right); + } + + /** + * Match the form {@code L >> R} + * + * @param left the matcher for the left operand + * @param right the matcher for the shift amount + * @return the matcher + */ + default ExpressionMatcher shr(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher<>(RightShiftExpression.class, left, right); + } + + /** + * Match the form {@code L - R} + * + * @param left the matcher for the left term + * @param right the matcher for the right term + * @return the matcher for the difference + */ + default ExpressionMatcher sub(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher<>(SubExpression.class, left, right); + } + + /** + * Match the form {@code L $xor R} or {@code R $xor L} + * + * @param left the matcher for the left operand + * @param right the matcher for the right operand + * @return the matcher + */ + default ExpressionMatcher xor(ExpressionMatcher left, + ExpressionMatcher right) { + return new BinaryExpressionMatcher<>(XorExpression.class, left, right); + } + + /** + * Match a given constant value + * + *

+ * NOTE: To match an unspecified constant value, use {@link #var(Class)} with + * {@link ConstantValue}. + * + * @param value the value to match + * @return the matcher + */ + default ExpressionMatcher cv(long value) { + return new ConstantValueMatcher(value); + } + + /** + * Match any expression + * + *

+ * This matches any expression without consideration of its operands, except insofar when it + * appears in multiple places, it will check that subsequent matches are identical to the + * first. + * + * @return the matcher + */ + default ExpressionMatcher var() { + return AnyMatcher.any(); + } + + /** + * Match any expression of the given type + * + * @param the type of expression to match + * @param cls the class of expression to match + * @return the matcher + */ + default ExpressionMatcher var(Class cls) { + return new AnyMatcher<>(cls); + } + + /** + * Match an operand value + * + *

+ * Typically, this must wrap any use of a field, since that field is considered an operand + * from the constructor's perspective. + * + * @param def the matcher for the operand's defining expression. + * @return the operand matcher + */ + default ExpressionMatcher opnd(ExpressionMatcher def) { + return new OperandValueMatcher(def); + } + + /** + * Match a field by its size + * + *

+ * This matches either a {@link TokenField} or a {@link ContextField}. If matched, it then + * passes a {@link ConstantValue} of the field's size (in bits) into the given size matcher. + * + * @param size the matcher for the field's size + * @return the field matcher + */ + default ExpressionMatcher fldSz(ExpressionMatcher size) { + return new FieldSizeMatcher(size); + } + + /** + * Match the form {@code -U} + * + * @param unary the child matcher + * @return the matcher + */ + default ExpressionMatcher neg(ExpressionMatcher unary) { + return new UnaryExpressionMatcher<>(MinusExpression.class, unary); + } + + /** + * Match the form {@code ~U} + * + * @param unary the child matcher + * @return the matcher + */ + default ExpressionMatcher not(ExpressionMatcher unary) { + return new UnaryExpressionMatcher<>(NotExpression.class, unary); + } + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/FieldSizeMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/FieldSizeMatcher.java new file mode 100644 index 0000000000..3b8a2cd6c2 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/FieldSizeMatcher.java @@ -0,0 +1,49 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.Map; +import java.util.Set; + +import ghidra.app.plugin.processors.sleigh.expression.*; + +/** + * A matcher for a token or context field, constrained by its size in bits + */ +public class FieldSizeMatcher extends AbstractExpressionMatcher { + protected final ExpressionMatcher sizeMatcher; + + public FieldSizeMatcher(ExpressionMatcher sizeMatcher) { + super(Set.of(ContextField.class, TokenField.class)); + this.sizeMatcher = sizeMatcher; + } + + @Override + protected boolean matchDetails(PatternValue expression, + Map, PatternExpression> result) { + if (expression instanceof ContextField) { + ContextField cf = (ContextField) expression; + long size = cf.getEndBit() - cf.getStartBit() + 1; + return sizeMatcher.match(new ConstantValue(size), result); + } + if (expression instanceof TokenField) { + TokenField tf = (TokenField) expression; + long size = tf.getBitEnd() - tf.getBitStart() + 1; + return sizeMatcher.match(new ConstantValue(size), result); + } + return false; + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/OperandValueMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/OperandValueMatcher.java new file mode 100644 index 0000000000..33f4fbab3d --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/OperandValueMatcher.java @@ -0,0 +1,42 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.Map; + +import ghidra.app.plugin.assembler.sleigh.expr.OperandValueSolver; +import ghidra.app.plugin.processors.sleigh.expression.OperandValue; +import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; + +/** + * A matcher for a constructor's operand value, constrained by its defining expression + */ +public class OperandValueMatcher extends AbstractExpressionMatcher { + protected final ExpressionMatcher defMatcher; + + public OperandValueMatcher(ExpressionMatcher defMatcher) { + super(OperandValue.class); + this.defMatcher = defMatcher; + } + + @Override + protected boolean matchDetails(OperandValue expression, + Map, PatternExpression> result) { + OperandSymbol opSym = expression.getConstructor().getOperand(expression.getIndex()); + return defMatcher.match(OperandValueSolver.getDefiningExpression(opSym), result); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/UnaryExpressionMatcher.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/UnaryExpressionMatcher.java new file mode 100644 index 0000000000..e595944e8c --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/expr/match/UnaryExpressionMatcher.java @@ -0,0 +1,51 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.expr.match; + +import java.util.Map; +import java.util.Set; + +import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; +import ghidra.app.plugin.processors.sleigh.expression.UnaryExpression; + +/** + * A matcher for a unnary expression + * + *

+ * If the required type matches, the matching descends to the child operand. + * + * @param the type of expression matched + */ +public class UnaryExpressionMatcher + extends AbstractExpressionMatcher { + protected final ExpressionMatcher unaryMatcher; + + public UnaryExpressionMatcher(Set> ops, ExpressionMatcher unaryMatcher) { + super(ops); + this.unaryMatcher = unaryMatcher; + } + + public UnaryExpressionMatcher(Class cls, ExpressionMatcher unaryMatcher) { + super(cls); + this.unaryMatcher = unaryMatcher; + } + + @Override + protected boolean matchDetails(T expression, + Map, PatternExpression> result) { + return unaryMatcher.match(expression.getUnary(), result); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AbstractAssemblyGrammar.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AbstractAssemblyGrammar.java index 2fd4bfa150..664f538535 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AbstractAssemblyGrammar.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AbstractAssemblyGrammar.java @@ -34,16 +34,17 @@ import ghidra.generic.util.datastruct.TreeSetValuedTreeMap; /** * Defines a context-free grammar, usually for the purpose of parsing mnemonic assembly instructions * - * As in classic computer science, a CFG consists of productions of non-terminals and terminals. - * The left-hand side of the a production must be a single non-terminal, but the right-hand side - * may be any string of symbols. To avoid overloading the term "String," here we call it a - * "Sentential." + *

+ * As in classic computer science, a CFG consists of productions of non-terminals and terminals. The + * left-hand side of the a production must be a single non-terminal, but the right-hand side may be + * any string of symbols. To avoid overloading the term "String," here we call it a "Sentential." * + *

* To define a grammar, simply construct an appropriate subclass (probably {@link AssemblyGrammar}) * and call {@link #addProduction(AbstractAssemblyProduction)} or - * {@link #addProduction(AssemblyNonTerminal, AssemblySentential)}. The grammar object will collect - * the non-terminals and terminals. + * {@link #addProduction(AssemblyNonTerminal, AssemblySentential)}. * + *

* By default, the start symbol is taken from the left-hand side of the first production added to * the grammar. * @@ -71,6 +72,7 @@ public abstract class AbstractAssemblyGrammar that) { @@ -190,6 +201,7 @@ public abstract class AbstractAssemblyGrammar + * The grammar is consistent if every non-terminal appearing in the grammar also appears as the + * left-hand side of some production. If not, such non-terminals are said to be undefined. + * * @throws AssemblyGrammarException the grammar is inconsistent, i.e., contains undefined - * non-terminals. + * non-terminals. */ public void verify() throws AssemblyGrammarException { if (!productions.containsKey(startName)) { throw new AssemblyGrammarException("Start symbol has no defining production"); } for (P prod : productions.values()) { - for (AssemblySymbol sym : prod) { + for (AssemblySymbol sym : prod.getRHS()) { if (sym instanceof AssemblyNonTerminal) { AssemblyNonTerminal nt = (AssemblyNonTerminal) sym; if (!(productions.containsKey(nt.getName()))) { @@ -233,6 +247,7 @@ public abstract class AbstractAssemblyGrammar nonTerminals() { @@ -241,6 +256,7 @@ public abstract class AbstractAssemblyGrammar terminals() { @@ -249,6 +265,7 @@ public abstract class AbstractAssemblyGrammar the type of non-terminals */ public abstract class AbstractAssemblyProduction - extends AbstractListDecorator implements Comparable> { private final NT lhs; private final AssemblySentential rhs; @@ -38,6 +32,7 @@ public abstract class AbstractAssemblyProduction /** * Construct a production with the given LHS and RHS + * * @param lhs the left-hand side * @param rhs the right-hand side */ @@ -47,16 +42,13 @@ public abstract class AbstractAssemblyProduction this.rhs = rhs; } - @Override - protected List decorated() { - return rhs; - } - /** * Get the index of the production * - * Instead of using deep comparison, the index is often used as the identify of the production + *

+ * Instead of using deep comparison, the index is often used as the identity of the production * within a grammar. + * * @return the index */ public int getIndex() { @@ -65,6 +57,7 @@ public abstract class AbstractAssemblyProduction /** * Get the left-hand side + * * @return the LHS */ public NT getLHS() { @@ -73,6 +66,7 @@ public abstract class AbstractAssemblyProduction /** * Get the right-hand side + * * @return the RHS */ public AssemblySentential getRHS() { @@ -123,15 +117,12 @@ public abstract class AbstractAssemblyProduction return result; } - @Override - public AssemblySentential subList(int fromIndex, int toIndex) { - return rhs.subList(fromIndex, toIndex); - } - /** * Get the "name" of this production * + *

* This is mostly just notional and for debugging. The name is taken as the name of the LHS. + * * @return the name of the LHS */ public String getName() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedGrammar.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedGrammar.java index 6aad89c2e2..629c7130d7 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedGrammar.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedGrammar.java @@ -20,9 +20,10 @@ import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyExtendedNonTerminal; /** * Defines an "extended" grammar * - * "Extended grammar" as in a grammar extended with state numbers from an LR0 parser. - * See LALR(1) Parsing from Stephen Jackson - * of Dalhousie University, Halifax, Nova Scotia, Canada. + *

+ * "Extended grammar" as in a grammar extended with state numbers from an LR0 parser. See + * LALR(1) Parsing from Stephen Jackson of + * Dalhousie University, Halifax, Nova Scotia, Canada. */ public class AssemblyExtendedGrammar extends AbstractAssemblyGrammar { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedProduction.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedProduction.java index d5eb06e902..e33f5f8feb 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedProduction.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyExtendedProduction.java @@ -29,6 +29,7 @@ public class AssemblyExtendedProduction /** * Construct an extended production based on the given ancestor + * * @param lhs the extended left-hand side * @param rhs the extended right-hand side * @param finalState the end state of the final symbol of the RHS @@ -49,6 +50,7 @@ public class AssemblyExtendedProduction /** * Get the final state of this production + * * @return the end state of the last symbol of the RHS */ public int getFinalState() { @@ -57,6 +59,7 @@ public class AssemblyExtendedProduction /** * Get the original production from which this production was derived + * * @return the original production */ public AssemblyProduction getAncestor() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyGrammar.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyGrammar.java index 1d765d774c..19bbe3b76f 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyGrammar.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblyGrammar.java @@ -17,8 +17,6 @@ package ghidra.app.plugin.assembler.sleigh.grammars; import java.util.*; -import org.apache.commons.collections4.map.LazyMap; - import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic; import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal; import ghidra.app.plugin.processors.sleigh.Constructor; @@ -27,6 +25,7 @@ import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern; /** * Defines a context free grammar, used to parse mnemonic assembly instructions * + *

* This stores the CFG and the associated semantics for each production. It also has mechanisms for * tracking "purely recursive" productions. These are productions of the form I => I, and they * necessarily create ambiguity. Thus, when constructing a parser, it is useful to identify them @@ -35,8 +34,10 @@ import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern; public class AssemblyGrammar extends AbstractAssemblyGrammar { // a nested map of semantics by production, by constructor - protected final Map> semantics = - LazyMap.lazyMap(new TreeMap<>(), () -> new TreeMap<>()); + protected final Map> semanticsByProduction = + new TreeMap<>(); + protected final Map semanticsByConstructor = + new HashMap<>(); // a map of purely recursive, e.g., I => I, productions by name of LHS protected final Map pureRecursive = new TreeMap<>(); @@ -58,6 +59,7 @@ public class AssemblyGrammar /** * Add a production associated with a SLEIGH constructor semantic + * * @param lhs the left-hand side * @param rhs the right-hand side * @param pattern the pattern associated with the constructor @@ -68,27 +70,32 @@ public class AssemblyGrammar DisjointPattern pattern, Constructor cons, List indices) { AssemblyProduction prod = newProduction(lhs, rhs); addProduction(prod); - Map map = semantics.get(prod); - AssemblyConstructorSemantic sem = map.get(cons); - if (sem == null) { - sem = new AssemblyConstructorSemantic(cons, indices); - map.put(cons, sem); - } - else if (!indices.equals(sem.getOperandIndices())) { + Map map = + semanticsByProduction.computeIfAbsent(prod, p -> new TreeMap<>()); + AssemblyConstructorSemantic sem = + map.computeIfAbsent(cons, c -> new AssemblyConstructorSemantic(cons, indices)); + if (!indices.equals(sem.getOperandIndices())) { throw new IllegalStateException( "Productions of the same constructor must have same operand indices"); } + semanticsByConstructor.put(cons, sem); sem.addPattern(pattern); } /** * Get the semantics associated with a given production + * * @param prod the production * @return all semantics associated with the given production */ public Collection getSemantics(AssemblyProduction prod) { - return Collections.unmodifiableCollection(semantics.get(prod).values()); + return Collections.unmodifiableCollection( + semanticsByProduction.computeIfAbsent(prod, p -> new TreeMap<>()).values()); + } + + public AssemblyConstructorSemantic getSemantic(Constructor cons) { + return semanticsByConstructor.get(cons); } @Override @@ -96,13 +103,15 @@ public class AssemblyGrammar super.combine(that); if (that instanceof AssemblyGrammar) { AssemblyGrammar ag = (AssemblyGrammar) that; - this.semantics.putAll(ag.semantics); + this.semanticsByProduction.putAll(ag.semanticsByProduction); + this.semanticsByConstructor.putAll(ag.semanticsByConstructor); this.pureRecursive.putAll(ag.pureRecursive); } } /** * Get all productions in the grammar that are purely recursive + * * @return */ public Collection getPureRecursive() { @@ -111,6 +120,7 @@ public class AssemblyGrammar /** * Obtain, if present, the purely recursive production having the given LHS + * * @param lhs the left-hand side * @return the desired production, or null */ diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblySentential.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblySentential.java index 1a5388ddc8..4a00aaef25 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblySentential.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/grammars/AssemblySentential.java @@ -16,8 +16,9 @@ package ghidra.app.plugin.assembler.sleigh.grammars; import java.util.*; - -import org.apache.commons.collections4.list.AbstractListDecorator; +import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import ghidra.app.plugin.assembler.sleigh.symbol.*; import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken; @@ -25,29 +26,29 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken; /** * A "string" of symbols * - * To avoid overloading the word "String", we call this a "sentential". Technically, to be a + *

+ * To avoid overloading the word "string", we call this a "sentential". Technically, to be a * "sentential" in the classic sense, it must be a possible element in the derivation of a sentence * in the grammar starting with the start symbol. We ignore that if only for the sake of naming. * * @param the type of non-terminals */ -public class AssemblySentential extends - AbstractListDecorator implements Comparable> { +public class AssemblySentential + implements Comparable>, Iterable { private List symbols; + private final List unmodifiableSymbols; private boolean finished = false; public static final AssemblyStringTerminal WHITE_SPACE = new WhiteSpace(); + private static final Pattern PAT_COMMA_WS = Pattern.compile(",\\s+"); /** * Construct a string from the given list of symbols + * * @param symbols */ public AssemblySentential(List symbols) { this.symbols = new ArrayList<>(symbols); - } - - @Override - protected List decorated() { - return symbols; + this.unmodifiableSymbols = Collections.unmodifiableList(symbols); } /** @@ -58,19 +59,22 @@ public class AssemblySentential extends */ public AssemblySentential() { this.symbols = new ArrayList<>(); + this.unmodifiableSymbols = Collections.unmodifiableList(symbols); } /** * Construct a string from any number of symbols + * * @param syms */ public AssemblySentential(AssemblySymbol... syms) { this.symbols = Arrays.asList(syms); + this.unmodifiableSymbols = Collections.unmodifiableList(symbols); } @Override public String toString() { - if (symbols.size() == 0) { + if (symbols.isEmpty()) { return "e"; } Iterator symIt = symbols.iterator(); @@ -117,6 +121,7 @@ public class AssemblySentential extends /** * A "whitespace" terminal * + *

* This terminal represents "optional" whitespace. "Optional" because in certain circumstances, * whitespace is not actually required, i.e., before or after a special character. */ @@ -132,7 +137,7 @@ public class AssemblySentential extends @Override public Collection match(String buffer, int pos, AssemblyGrammar grammar, - Map labels) { + AssemblyNumericSymbols symbols) { if (buffer.length() == 0) { return Collections.singleton(new WhiteSpaceParseToken(grammar, this, "")); } @@ -158,7 +163,7 @@ public class AssemblySentential extends } @Override - public Collection getSuggestions(String got, Map labels) { + public Collection getSuggestions(String got, AssemblyNumericSymbols symbols) { return Collections.singleton(" "); } } @@ -175,6 +180,7 @@ public class AssemblySentential extends /** * The token consumed by a whitespace terminal when it anticipates the end of input * + *

* "Expected" tokens given by a parse machine when this is the last token it has consumed are * not valid suggestions. The machine should instead suggest a whitespace character. */ @@ -185,7 +191,18 @@ public class AssemblySentential extends } /** - * Add "optional" whitespace, if not already preceded by whitespace + * Add a symbol to the right of this sentential + * + * @param symbol the symbol to add + * @return true + */ + public boolean addSymbol(AssemblySymbol symbol) { + return symbols.add(symbol); + } + + /** + * Add optional whitespace, if not already preceded by whitespace + * * @return true if whitespace was added */ public boolean addWS() { @@ -193,7 +210,95 @@ public class AssemblySentential extends if (last != null) { return false; } - return add(WHITE_SPACE); + return addSymbol(WHITE_SPACE); + } + + /** + * Add a comma followed by optional whitespace. + */ + public void addCommaWS() { + addSymbol(new AssemblyStringTerminal(",")); + addWS(); + } + + /** + * Add a syntactic terminal element, but with consideration for optional whitespace surrounding + * special characters + * + * @param str the expected terminal + */ + public void addSeparatorPart(String str) { + String tstr = str.trim(); + if (tstr.equals("")) { + addWS(); + return; + } + char first = tstr.charAt(0); + if (!str.startsWith(tstr)) { + addWS(); + } + if (!Character.isLetterOrDigit(first)) { + addWS(); + } + addSymbol(new AssemblyStringTerminal(tstr)); + char last = tstr.charAt(tstr.length() - 1); + if (!str.endsWith(tstr)) { + addWS(); + } + if (!Character.isLetterOrDigit(last)) { + addWS(); + } + } + + /** + * Get the symbols in this sentential + * + * @return the symbols; + */ + public List getSymbols() { + return unmodifiableSymbols; + } + + public AssemblySymbol getSymbol(int pos) { + return symbols.get(pos); + } + + /** + * Split the given string into pieces matched by the pattern, and the pieces between + * + *

+ * This invokes the given callbacks as the string is processed from left to right. + * + * @param str the string to split + * @param pat the pattern to match + * @param matched the callback for matched portions + * @param unmatched the callback for unmatched portions + */ + private static void forMatchUnmatch(String str, Pattern pat, Consumer matched, + Consumer unmatched) { + int startU = 0; + Matcher mat = pat.matcher(str); + while (mat.find()) { + if (startU < mat.start()) { + unmatched.accept(str.substring(startU, mat.start())); + } + matched.accept(mat.group()); + startU = mat.end(); + } + if (startU < str.length()) { + unmatched.accept(str.substring(startU)); + } + } + + /** + * Add a syntactic terminal element, but considering that commas contained within may be + * followed by optional whitespace + * + * @param str the expected terminal + */ + public void addSeparators(String str) { + // NB. When displaying print pieces, the disassembler replaces all ",\\s+" with "," + forMatchUnmatch(str, PAT_COMMA_WS, matched -> addCommaWS(), this::addSeparatorPart); } // If the right-most symbol is whitespace, return it @@ -209,18 +314,31 @@ public class AssemblySentential extends } /** - * Trim leading and trailing whitespace, and make the string immutable + * Trim leading and trailing whitespace, and make the sentential immutable */ public void finish() { if (finished) { return; } - symbols = Collections.unmodifiableList(symbols); + symbols = unmodifiableSymbols; finished = true; } @Override - public AssemblySentential subList(int fromIndex, int toIndex) { + public Iterator iterator() { + return unmodifiableSymbols.iterator(); + } + + public AssemblySentential sub(int fromIndex, int toIndex) { return new AssemblySentential<>(symbols.subList(fromIndex, toIndex)); } + + /** + * Get the number of symbols, including whitespace, in this sentential + * + * @return the number of symbols + */ + public int size() { + return symbols.size(); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyFirstFollow.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyFirstFollow.java index 5c3b7ba19e..e35a5e0238 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyFirstFollow.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyFirstFollow.java @@ -28,6 +28,7 @@ import ghidra.generic.util.datastruct.TreeSetValuedTreeMap; /** * A class to compute the first and follow of every non-terminal in a grammar * + *

* See Alfred V. Aho, Monica S. Lam, Ravi Sethi, Jeffrey D. Ullman, Compilers: Principles, * Techniques, & Tools. Bostom, MA: Pearson, 2007, pp. 220-2. */ @@ -43,6 +44,7 @@ public class AssemblyFirstFollow { /** * Compute the first and follow sets for every non-terminal in the given grammar + * * @param grammar the grammar */ public AssemblyFirstFollow(AbstractAssemblyGrammar grammar) { @@ -61,7 +63,7 @@ public class AssemblyFirstFollow { while (changed) { changed = false; for (AbstractAssemblyProduction prod : grammar) { - if (nullable.containsAll(prod)) { + if (nullable.containsAll(prod.getRHS().getSymbols())) { changed |= nullable.add(prod.getLHS()); } } @@ -81,7 +83,7 @@ public class AssemblyFirstFollow { // Add the first of all each symbol // Terminate after a terminal or non-nullable symbol for (AbstractAssemblyProduction prod : grammar) { - for (AssemblySymbol sym : prod) { + for (AssemblySymbol sym : prod.getRHS()) { if (sym instanceof AssemblyNonTerminal) { AssemblyNonTerminal nt = (AssemblyNonTerminal) sym; changed |= first.putAll(prod.getLHS(), first.get(nt)); @@ -116,13 +118,13 @@ public class AssemblyFirstFollow { // Finish the subwalk after a terminal or non-nullable symbol // If you hit the end, add follow(LHS) to follow the current symbol for (AbstractAssemblyProduction prod : grammar) { - nextX: for (int i = 0; i < prod.size(); i++) { - AssemblySymbol px = prod.get(i); + nextX: for (int i = 0; i < prod.getRHS().size(); i++) { + AssemblySymbol px = prod.getRHS().getSymbol(i); if (px instanceof AssemblyNonTerminal) { AssemblyNonTerminal X = (AssemblyNonTerminal) px; int j; - for (j = i + 1; j < prod.size(); j++) { - AssemblySymbol B = prod.get(j); + for (j = i + 1; j < prod.getRHS().size(); j++) { + AssemblySymbol B = prod.getRHS().getSymbol(j); if (B instanceof AssemblyNonTerminal) { AssemblyNonTerminal nt = (AssemblyNonTerminal) B; changed |= follow.putAll(X, first.get(nt)); @@ -149,7 +151,9 @@ public class AssemblyFirstFollow { /** * Get the nullable set * + *

* That is the set of all non-terminals, which through some derivation, can produce epsilon. + * * @return the set */ public Collection getNullable() { @@ -159,8 +163,10 @@ public class AssemblyFirstFollow { /** * Get the first set for a given non-terminal * + *

* That is the set of all terminals, which through some derivation from the given non-terminal, * can appear first in a sentential form. + * * @param nt the non-terminal * @return the set */ @@ -171,8 +177,10 @@ public class AssemblyFirstFollow { /** * Get the follow set for a given non-terminal * + *

* That is the set of all terminals, which through some derivation from the start symbol, can * appear immediately after the given non-terminal in a sentential form. + * * @param nt the non-terminal * @return the set */ @@ -182,6 +190,7 @@ public class AssemblyFirstFollow { /** * For debugging, print out the computed sets to the given stream + * * @param out the stream */ public void print(PrintStream out) { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseAcceptResult.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseAcceptResult.java index 6bf4f3c5ae..554e06a216 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseAcceptResult.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseAcceptResult.java @@ -40,6 +40,7 @@ public class AssemblyParseAcceptResult extends AssemblyParseResult { /** * Get the tree + * * @return the tree */ public AssemblyParseBranch getTree() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseActionGotoTable.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseActionGotoTable.java index 8d60f65e4b..9a19f68fd3 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseActionGotoTable.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseActionGotoTable.java @@ -30,11 +30,11 @@ import ghidra.generic.util.datastruct.TreeSetValuedTreeMap; /** * The Action/Goto table for a LALR(1) parser * + *

* This table is unconventional in that it permits a single cell to be populated by more than one - * action. Typically, such a situation would indicate an ambiguity, or the need for a longer - * look-ahead value. Because we do not presume to control the grammar (which was automatically - * derived from another source), the parsing algorithm will simply branch, eventually trying both - * options. + * action. Typically, such a situation would indicate ambiguity, or the need for a longer look-ahead + * value. Because we do not presume to control the grammar (which was automatically derived from + * another source), the parsing algorithm will simply branch, eventually trying both options. */ public class AssemblyParseActionGotoTable { // A map representing the actual (sparse) table @@ -45,6 +45,7 @@ public class AssemblyParseActionGotoTable { /** * Add an action entry to the given cell + * * @param fromState the state (row) in the table * @param next the symbol (column) in the table * @param action the entry to add to the cell @@ -59,6 +60,7 @@ public class AssemblyParseActionGotoTable { /** * Add a SHIFT (Sn) entry to the given cell + * * @param fromState the state (row) in the table * @param next the symbol (column) in the table * @param newState the state (n) after the shift is applied @@ -70,6 +72,7 @@ public class AssemblyParseActionGotoTable { /** * Add a REDUCE (Rn) entry to the given cell + * * @param fromState the state (row) in the table * @param next the symbol (column) in the table * @param prod the production (having index n) associated with the reduction @@ -81,6 +84,7 @@ public class AssemblyParseActionGotoTable { /** * Add a GOTO entry to the given cell + * * @param fromState the state (row) in the table * @param next the symbol (column) in the table * @param newState the target state @@ -92,6 +96,7 @@ public class AssemblyParseActionGotoTable { /** * Add an ACCEPT entry for the given state at the end of input + * * @param fromState the state (row) in the table * @return true, if the state does not already accept on end of input */ @@ -101,6 +106,7 @@ public class AssemblyParseActionGotoTable { /** * Get the terminals that are expected, i.e., have entries for the given state + * * @param fromState the state (row) in the table * @return the collection of populated columns (terminals) for the given state */ @@ -110,6 +116,7 @@ public class AssemblyParseActionGotoTable { /** * Get all entries in a given cell + * * @param fromState the state (row) in the table * @param next the symbol (column) in the table * @return all action entries in the given cell diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseMachine.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseMachine.java index 8fc4721d69..7c950981cd 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseMachine.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseMachine.java @@ -24,16 +24,18 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AssemblySentential.TruncatedW import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseActionGotoTable.*; import ghidra.app.plugin.assembler.sleigh.symbol.*; import ghidra.app.plugin.assembler.sleigh.tree.*; +import ghidra.app.plugin.assembler.sleigh.util.AsmUtil; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx; -import ghidra.app.plugin.assembler.sleigh.util.SleighUtil; /** * A class that implements the LALR(1) parsing algorithm * - * Instances of this class store a parse state. In order to work correctly, the class must be - * given a properly-constructed Action/Goto table. + *

+ * Instances of this class store a parse state. In order to work correctly, the class must be given + * a properly-constructed Action/Goto table. * + *

* This implementation is somewhat unconventional. First, instead of strictly tokenizing and then * parsing, each terminal is given the opportunity to match a token in the input. If none match, it * results in a syntax error (equivalent to the token type having an empty cell in the classical @@ -62,8 +64,7 @@ public class AssemblyParseMachine implements Comparable { // The last token we consumed (i.e., last terminal pushed to the stack) protected AssemblyParseToken lastTok; - // A set of labels that identify valid tokens for some terminals - protected final Map labels; // used for label -> number substitution + protected final AssemblyNumericSymbols symbols; // used for symbol -> number substitution protected boolean accepted = false; // the machine is in the accepted state protected int error = ERROR_NONE; // non-zero if the machine is in an error state @@ -74,24 +75,25 @@ public class AssemblyParseMachine implements Comparable { static int nextMachineId = 0; - static final DbgTimer dbg = DbgTimer.INACTIVE; + static final DbgTimer DBG = DbgTimer.INACTIVE; /** * Construct a new parse state + * * @param parser the parser driving this machine * @param input the full input line * @param pos the position in the line identifying the next characters to parse * @param labels a map of valid tokens to number for numeric terminals */ public AssemblyParseMachine(AssemblyParser parser, String input, int pos, - AssemblyParseToken lastTok, Map labels) { + AssemblyParseToken lastTok, AssemblyNumericSymbols symbols) { this.parser = parser; this.stack.push(0); this.buffer = input; this.pos = pos; this.lastTok = lastTok; this.id = nextMachineId++; - this.labels = labels; + this.symbols = symbols; } /* ******************************************************************************************** @@ -155,12 +157,12 @@ public class AssemblyParseMachine implements Comparable { return result; } - result = SleighUtil.compareInOrder(this.stack, that.stack); + result = AsmUtil.compareInOrder(this.stack, that.stack); if (result != 0) { return result; } - result = SleighUtil.compareInOrder(this.output, that.output); + result = AsmUtil.compareInOrder(this.output, that.output); if (result != 0) { return result; } @@ -182,11 +184,13 @@ public class AssemblyParseMachine implements Comparable { /** * Duplicate this machine state * + *

* This is used extensively when branching + * * @return the duplicate */ public AssemblyParseMachine copy() { - AssemblyParseMachine c = new AssemblyParseMachine(parser, buffer, pos, lastTok, labels); + AssemblyParseMachine c = new AssemblyParseMachine(parser, buffer, pos, lastTok, symbols); // leave labels copied by reference c.output.clear(); @@ -201,25 +205,25 @@ public class AssemblyParseMachine implements Comparable { c.accepted = accepted; c.error = error; - dbg.println("Copied " + id + " to " + c.id); + DBG.println("Copied " + id + " to " + c.id); return c; } /** * Perform a given action and continue parsing, exhausting all results after the action + * + *

+ * The visited list prevents infinite loops or stack overflows resulting from consuming epsilon + * and going to the same state. Such loops may involve many states. + * * @param a the action * @param tok the token given by the terminal (column) of the entry containing this action * @param results a place to store all the parsing results (each must be accept or error state) * @param visited a collection of machine states already visited - * - * The visited "collection" prevents infinite loops or stack overflows resulting from - * "consuming" epsilon and going to the same state. Such loops may involve many states. It is - * also defined as a map here for debugging purposes, so that when a loop is detected, we can - * print the ID of the first visit. */ protected void doAction(Action a, AssemblyParseToken tok, Set results, Deque visited) { - try (DbgCtx dc = dbg.start("Action: " + a)) { + try (DbgCtx dc = DBG.start("Action: " + a)) { if (a instanceof ShiftAction) { AssemblyParseMachine m = copy(); m.stack.push(((ShiftAction) a).newStateNum); @@ -233,15 +237,15 @@ public class AssemblyParseMachine implements Comparable { AssemblyParseBranch branch = new AssemblyParseBranch(parser.grammar, prod); AssemblyParseMachine m = copy(); m.output.add(prod.getIndex()); - dbg.println("Prod: " + prod); + DBG.println("Prod: " + prod); for (@SuppressWarnings("unused") - AssemblySymbol sym : prod) { + AssemblySymbol sym : prod.getRHS()) { m.stack.pop(); branch.addChild(m.treeStack.pop()); } for (Action aa : m.parser.actions.get(m.stack.peek(), prod.getLHS())) { GotoAction ga = (GotoAction) aa; - dbg.println("Goto: " + ga); + DBG.println("Goto: " + ga); AssemblyParseMachine n = m.copy(); n.stack.push(ga.newStateNum); n.treeStack.push(branch); @@ -258,6 +262,7 @@ public class AssemblyParseMachine implements Comparable { /** * Consume a given terminal (and corresponding token) and continue parsing + * * @param t the terminal * @param tok the corresponding token * @param results a place to store all the parsing results @@ -265,10 +270,10 @@ public class AssemblyParseMachine implements Comparable { */ protected void consume(AssemblyTerminal t, AssemblyParseToken tok, Set results, Deque visited) { - try (DbgCtx dc = dbg.start("Matched " + t + " " + tok)) { + try (DbgCtx dc = DBG.start("Matched " + t + " " + tok)) { Collection as = parser.actions.get(stack.peek(), t); assert !as.isEmpty(); - dbg.println("Actions: " + as); + DBG.println("Actions: " + as); for (Action a : as) { doAction(a, tok, results, visited); } @@ -278,7 +283,9 @@ public class AssemblyParseMachine implements Comparable { /** * Look for previous machine states having the same stack and position * + *

* This would imply we have gone in a loop without consuming anything. We need to prune. + * * @param machine the machine state to check * @param visited the stack of previous machine states * @return if there is a loop, the machine state proving it, null otherwise @@ -307,15 +314,16 @@ public class AssemblyParseMachine implements Comparable { /** * Parse (or continue parsing) all possible trees from this machine state + * * @param results a place to store all the parsing results * @param visited a collection of machine states already visited */ protected void exhaust(Set results, Deque visited) { - try (DbgCtx dc = dbg.start("Exhausting machine " + id)) { - dbg.println("Machine: " + this); + try (DbgCtx dc = DBG.start("Exhausting machine " + id)) { + DBG.println("Machine: " + this); AssemblyParseMachine loop = findLoop(this, visited); if (loop != null) { - dbg.println("Pruned. Loop of " + loop.id); + DBG.println("Pruned. Loop of " + loop.id); return; } try (DequePush push = DequePush.push(visited, this)) { @@ -332,7 +340,7 @@ public class AssemblyParseMachine implements Comparable { } Set unmatched = new TreeSet<>(terms); for (AssemblyTerminal t : terms) { - for (AssemblyParseToken tok : t.match(buffer, pos, parser.grammar, labels)) { + for (AssemblyParseToken tok : t.match(buffer, pos, parser.grammar, symbols)) { unmatched.remove(t); assert buffer.regionMatches(pos, tok.getString(), 0, tok.getString().length()); @@ -350,9 +358,9 @@ public class AssemblyParseMachine implements Comparable { newExpected = new TreeSet<>(); newExpected.add(AssemblySentential.WHITE_SPACE); } - dbg.println("Syntax Error: "); - dbg.println(" Expected: " + newExpected); - dbg.println(" Got: " + buffer.substring(pos)); + DBG.println("Syntax Error: "); + DBG.println(" Expected: " + newExpected); + DBG.println(" Got: " + buffer.substring(pos)); m.error = ERROR_SYNTAX; m.got = buffer.substring(pos); m.expected = newExpected; @@ -365,6 +373,7 @@ public class AssemblyParseMachine implements Comparable { /** * Parse (or continue parsing) all possible trees from this machine state + * * @return the set of all possible trees and errors */ public Set exhaust() { @@ -376,6 +385,7 @@ public class AssemblyParseMachine implements Comparable { /** * If in the accepted state, get the resulting parse tree for this machine + * * @return the parse tree */ public AssemblyParseBranch getTree() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseResult.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseResult.java index c6051f1c08..1c8b4b6386 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseResult.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParseResult.java @@ -22,6 +22,7 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseBranch; /** * A result of parsing a sentence * + *

* If the sentence was accepted, this yields a parse tree. If not, this describes the error and * provides suggestions to correct the error. */ @@ -29,6 +30,7 @@ public abstract class AssemblyParseResult implements Comparable + * Each item consists of a kernel and an implied closure. Only the kernel is necessary to define the + * item, but the whole closure must be considered when deriving new states. The kernel can be + * retrieved and mutated via {@link #getKernel()}, then the closure derived from it via + * {@link #getClosure()}. */ -public class AssemblyParseState extends AbstractSetDecorator - implements Comparable { +public class AssemblyParseState implements Comparable { private final AssemblyGrammar grammar; private final Set kernel = new LinkedHashSet<>(); private Set closure; /** * Construct a new state associated with the given grammar + * * @param grammar the grammar */ public AssemblyParseState(AssemblyGrammar grammar) { @@ -44,6 +45,7 @@ public class AssemblyParseState extends AbstractSetDecorator decorated() { + /** + * Get the (mutable) kernel for this state + * + * @return the kernel + */ + public Set getKernel() { return kernel; } /** * Get the closure of this item, caching the result + * * @return the closure */ public Set getClosure() { @@ -93,7 +100,7 @@ public class AssemblyParseState extends AbstractSetDecorator * An item is a production with a dot indicating a position while parsing */ public class AssemblyParseStateItem implements Comparable { @@ -32,6 +33,7 @@ public class AssemblyParseStateItem implements Comparable prod.size()) { + if (pos > prod.getRHS().size()) { throw new AssertionError("INTERNAL: Attempt to advance beyond end of RHS"); } } /** * Advance the dot by one position, producing a new item + * * @return the new item */ public AssemblyParseStateItem read() { @@ -63,20 +67,24 @@ public class AssemblyParseStateItem implements Comparable * This is the symbol which must be matched to advance the dot. + * * @return the symbol, or null if the item is completed, i.e., the dot is at the far right */ public AssemblySymbol getNext() { if (completed()) { return null; } - return prod.get(pos); + return prod.getRHS().getSymbol(pos); } /** * "Fill" one step out to close a state containing this item * + *

* To compute the full closure, you must continue stepping out until no new items are generated + * * @param grammar the grammar containing the production * @return a subset of items in the closure of a state containing this item */ @@ -137,8 +145,9 @@ public class AssemblyParseStateItem implements Comparable prec = prod.subList(0, pos); - AssemblySentential proc = prod.subList(pos, prod.size()); + AssemblySentential rhs = prod.getRHS(); + AssemblySentential prec = rhs.sub(0, pos); + AssemblySentential proc = rhs.sub(pos, rhs.size()); StringBuilder sb = new StringBuilder(prod.getIndex() + ". " + prod.getLHS() + " => "); if (prec.size() != 0) { sb.append(prec + " "); @@ -153,18 +162,22 @@ public class AssemblyParseStateItem implements Comparable * The item is completed if all symbols have been matched, i.e., the dot is at the far right of * the production. + * * @return true iff the item is completed */ public boolean completed() { - return (pos == prod.size()); + return (pos == prod.getRHS().size()); } /** * Get the position of the dot * + *

* The position is the number of symbols to the left of the dot. + * * @return */ public int getPos() { @@ -173,6 +186,7 @@ public class AssemblyParseStateItem implements Comparable + * NOTE: Generally, if this returns non-null, something is probably wrong with your LR(0) + * machine generator + * * @param fromState the source state * @param next the symbol that is matched * @param newState the destination state * @return the previous value for newState - * - * NOTE: Generally, if this return non-null, something is probably wrong with your LR(0) - * machine generator */ public Integer put(int fromState, AssemblySymbol next, int newState) { return map.put(new TableEntryKey(fromState, next), newState); @@ -46,6 +48,7 @@ public class AssemblyParseTransitionTable { /** * Get an entry from the state machine + * * @param fromState the source state * @param next the symbol that has been matched * @return the destination state @@ -56,6 +59,7 @@ public class AssemblyParseTransitionTable { /** * Traverse every entry in the table, invoking {@link Consumer#accept(Object)} on each + * * @param consumer the callback */ public void forEach(Consumer> consumer) { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParser.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParser.java index ae48977961..6f2b712974 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParser.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/parse/AssemblyParser.java @@ -31,16 +31,20 @@ import ghidra.app.plugin.assembler.sleigh.util.TableEntry; /** * A class to encapsulate LALR(1) parsing for a given grammar * - * This class constructs the Action/Goto table (and all the other trappings) of a LALR(1) parser - * and provides a {@link #parse(String)} method to parse actual sentences. + *

+ * This class constructs the Action/Goto table (and all the other trappings) of a LALR(1) parser and + * provides a {@link #parse(String)} method to parse actual sentences. * + *

* This implementation is somewhat unconventional in that it permits ambiguous grammars. Instead of * complaining, it produces the set of all possible parse trees. Of course, this comes at the cost * of some efficiency. * + *

* See Alfred V. Aho, Monica S. Lam, Ravi Sethi, Jeffrey D. Ullman, Compilers: Principles, - * Techniques, & Tools. Bostom, MA: Pearson, 2007. + * Techniques, & Tools. Boston, MA: Pearson, 2007. * + *

* See Jackson, Stephen. LALR(1) Parsing. * Halifax, Nova Scotia, Canada: Dalhousie University. * <http://web.cs.dal.ca/~sjackson/lalr1.html> @@ -61,15 +65,15 @@ public class AssemblyParser { // the LALR(1) Action/Goto table protected AssemblyParseActionGotoTable actions; - /** A convenience to specify no labels in {@link #parse(String, Map)} */ - public static final Map EMPTY_LABELS = - Collections.unmodifiableMap(new HashMap()); - - protected static final DbgTimer dbg = DbgTimer.INACTIVE; - protected static final boolean dbg_detail = false; + /** + * Change this to {@link DbgTimer#ACTIVE} for verbose diagnostics + */ + protected static final DbgTimer DBG = DbgTimer.INACTIVE; + protected static final boolean DBG_DETAIL = false; /** * Construct a LALR(1) parser from the given grammar + * * @param grammar the grammar */ public AssemblyParser(AssemblyGrammar grammar) { @@ -86,39 +90,39 @@ public class AssemblyParser { grammar.addProduction(start, new AssemblySentential<>(grammar.getStart(), AssemblyEOI.EOI)); grammar.setStart(start); - try (DbgCtx dc = dbg.start("Computing First/Follow for General Grammar")) { + try (DbgCtx dc = DBG.start("Computing First/Follow for General Grammar")) { this.ff = new AssemblyFirstFollow(grammar); - if (dbg_detail) { - printGeneralFF(dbg); + if (DBG_DETAIL) { + printGeneralFF(DBG); } } - try (DbgCtx dc = dbg.start("Computing LR0 States and Transition Table")) { + try (DbgCtx dc = DBG.start("Computing LR0 States and Transition Table")) { buildLR0Machine(); - if (dbg_detail) { - printLR0States(dbg); - printLR0TransitionTable(dbg); + if (DBG_DETAIL) { + printLR0States(DBG); + printLR0TransitionTable(DBG); } } - try (DbgCtx dc = dbg.start("Computing Extended Grammar")) { + try (DbgCtx dc = DBG.start("Computing Extended Grammar")) { buildExtendedGrammar(); - if (dbg_detail) { - printExtendedGrammar(dbg); + if (DBG_DETAIL) { + printExtendedGrammar(DBG); } } - try (DbgCtx dc = dbg.start("Computing First/Follow for Extended Grammar")) { + try (DbgCtx dc = DBG.start("Computing First/Follow for Extended Grammar")) { this.extff = new AssemblyFirstFollow(extendedGrammar); - if (dbg_detail) { - printExtendedFF(dbg); + if (DBG_DETAIL) { + printExtendedFF(DBG); } } - try (DbgCtx dc = dbg.start("Computing Parse Table")) { + try (DbgCtx dc = DBG.start("Computing Parse Table")) { buildActionGotoTable(); - if (dbg_detail) { - printParseTable(dbg); + if (DBG_DETAIL) { + printParseTable(DBG); } } } @@ -145,7 +149,7 @@ public class AssemblyParser { AssemblySymbol sym = item.getNext(); if (sym != null) { AssemblyParseStateItem ni = item.read(); - go.get(sym).add(ni); + go.get(sym).getKernel().add(ni); } } // Now, add the appropriate entries to the transition table @@ -158,10 +162,12 @@ public class AssemblyParser { /** * Add a newly-constructed LR0 state, and return it's assigned number + * + *

+ * If the state already exists, this just returns its previously assigned number + * * @param state the newly-constructed state * @return the assigned number - * - * If the state already exists, this just returns its previously assigned number */ protected int addLR0State(AssemblyParseState state) { int num = states.indexOf(state); @@ -188,6 +194,7 @@ public class AssemblyParser { /** * Extend a production, using the given LR0 start state + * * @param prod the production to extend * @param start the starting LR0 state * @return the extended production, if the start state is valid for it @@ -195,17 +202,17 @@ public class AssemblyParser { protected AssemblyExtendedProduction extend(AssemblyProduction prod, int start) { AssemblySentential extR = new AssemblySentential<>(); int curState = start; - for (AssemblySymbol sym : prod) { + for (AssemblySymbol sym : prod.getRHS()) { int nextState = table.get(curState, sym); if (sym instanceof AssemblyTerminal) { - extR.add(sym); + extR.addSymbol(sym); } else if (sym instanceof AssemblyNonTerminal) { - extR.add(new AssemblyExtendedNonTerminal(curState, (AssemblyNonTerminal) sym, + extR.addSymbol(new AssemblyExtendedNonTerminal(curState, (AssemblyNonTerminal) sym, nextState)); } else { - throw new RuntimeException( + throw new AssertionError( "Internal error: all AssemblySymbols must be either terminal or non-terminal"); } curState = nextState; @@ -265,7 +272,7 @@ public class AssemblyParser { // Make $ accept on any state with a completed start item. nextState: for (i = 0; i < states.size(); i++) { AssemblyParseState state = states.get(i); - for (AssemblyParseStateItem item : state) { + for (AssemblyParseStateItem item : state.getKernel()) { if (item.completed() && item.getProduction().getLHS().getName().equals("$S")) { actions.putAccept(i); continue nextState; @@ -340,24 +347,28 @@ public class AssemblyParser { /** * Parse the given sentence + * * @param input the sentence to parse * @return all possible parse trees (and possible errors) */ public Iterable parse(final String input) { - return parse(input, EMPTY_LABELS); + return parse(input, AssemblyNumericSymbols.EMPTY); } /** * Parse the given sentence with the given defined labels + * + *

+ * The tokenizer for numeric terminals also accepts any key in {@code labels}. In such cases, + * the resulting token is assigned the value of the label. + * * @param input the sentence to parser * @param labels a map of label to number substitutions * @return all possible parse results (trees and errors) - * - * The tokenizer for numeric terminals also accepts any key in {@code labels.} In such cases, - * the resulting token is assigned the value of the label. */ - public Collection parse(final String input, Map labels) { - AssemblyParseMachine init = new AssemblyParseMachine(this, input, 0, null, labels); + public Collection parse(final String input, + AssemblyNumericSymbols symbols) { + AssemblyParseMachine init = new AssemblyParseMachine(this, input, 0, null, symbols); Set results = init.exhaust(); Set ret = new LinkedHashSet<>(); @@ -368,7 +379,7 @@ public class AssemblyParser { else if (m.error != 0) { Set suggestions = new TreeSet<>(); for (AssemblyTerminal t : m.expected) { - suggestions.addAll(t.getSuggestions(m.got, labels)); + suggestions.addAll(t.getSuggestions(m.got, symbols)); } ret.add(AssemblyParseResult.error(m.got, suggestions)); } @@ -395,11 +406,11 @@ public class AssemblyParser { for (int i = 0; i < states.size(); i++) { AssemblyParseState state = states.get(i); out.println("I" + i); - for (AssemblyParseStateItem item : state) { + for (AssemblyParseStateItem item : state.getKernel()) { out.println("K: " + item); } for (AssemblyParseStateItem item : state.getClosure()) { - if (!state.contains(item)) { + if (!state.getKernel().contains(item)) { out.println("C: " + item); } } @@ -519,6 +530,7 @@ public class AssemblyParser { /** * Get the grammar used to construct this parser + * * @return the grammar */ public AssemblyGrammar getGrammar() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyState.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyState.java new file mode 100644 index 0000000000..249786dfe4 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyState.java @@ -0,0 +1,88 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.Collection; +import java.util.List; +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; + +/** + * Base for a node in an assembly prototype + */ +public abstract class AbstractAssemblyState { + protected static final DbgTimer DBG = AssemblyTreeResolver.DBG; + + protected final AssemblyTreeResolver resolver; + protected final List path; + protected final int shift; + protected final int length; + + protected final int hash; + + /** + * Construct a node + * + * @param resolver the resolver + * @param path the path to this node for diagnostics + * @param shift the (right) shift in bytes for this operand + * @param length the length of this operand + */ + protected AbstractAssemblyState(AssemblyTreeResolver resolver, + List path, int shift, int length) { + this.resolver = resolver; + this.path = path; + this.shift = shift; + this.length = length; + + this.hash = computeHash(); + } + + @Override + public int hashCode() { + return hash; + } + + /** + * Pre compute this nodes hash + * + * @return the hash + */ + public abstract int computeHash(); + + @Override + public abstract boolean equals(Object obj); + + /** + * Generate machine (partial) code for this node + * + * @param fromRight the accumulated patterns thus far, from the right sibling or left-most child + * @param errors a place to collect error reports + * @return the stream of generated patterns, as accumulated + */ + protected abstract Stream resolve(AssemblyResolvedPatterns fromRight, + Collection errors); + + /** + * Get the length in bytes of the operand represented by this node + * + * @return the length + */ + public int getLength() { + return length; + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyStateGenerator.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyStateGenerator.java new file mode 100644 index 0000000000..6fc28bace6 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyStateGenerator.java @@ -0,0 +1,112 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseTreeNode; +import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; + +/** + * Base class for generating prototype nodes ("states") from a parse tree node + * + * @param the type of parse tree node to process + */ +public abstract class AbstractAssemblyStateGenerator { + protected static final DbgTimer DBG = AssemblyTreeResolver.DBG; + + /** + * Context to pass along as states are generated + */ + protected static class GeneratorContext { + + /** + * Render the path as a printable string + * + * @param path the path + * @return the string + */ + public static String pathToString(List path) { + return "[" + + path.stream().map(sem -> sem.getLocation()).collect(Collectors.joining(",")) + "]"; + } + + final List path; + final int shift; + + /** + * Construct a context + * + * @param path the path of constructors, for diagnostics + * @param shift the (right) shift in bytes of the operand whose state is being generated + */ + public GeneratorContext(List path, int shift) { + this.path = List.copyOf(path); + this.shift = shift; + } + + /** + * Construct a context suitable for descent into an operand + * + * @param cons the parent constructor + * @param shift the shift offset of the operand + * @return the context + */ + public GeneratorContext push(AssemblyConstructorSemantic cons, int shift) { + List path = new ArrayList<>(this.path); + path.add(cons); + return new GeneratorContext(path, this.shift + shift); + } + + /** + * Print a debug line + * + * @param string the message + */ + public void dbg(String string) { + DBG.println(pathToString(path) + ":" + string); + } + } + + protected final AssemblyTreeResolver resolver; + protected final N node; + protected final AssemblyResolvedPatterns fromLeft; + + /** + * Construct a generator + * + * @param resolver the resolver + * @param node the node from which to generate states + * @param fromLeft the accumulated patterns from the left sibling or the parent + */ + public AbstractAssemblyStateGenerator(AssemblyTreeResolver resolver, N node, + AssemblyResolvedPatterns fromLeft) { + this.resolver = resolver; + this.node = node; + this.fromLeft = fromLeft; + } + + /** + * Generate states + * + * @param gc the generator context for this node + * @return the stream of prototypes, each including accumulated patterns + */ + public abstract Stream generate(GeneratorContext gc); +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructState.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructState.java new file mode 100644 index 0000000000..157985208c --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructState.java @@ -0,0 +1,214 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import ghidra.app.plugin.processors.sleigh.ConstructState; + +/** + * The state corresponding to a sub-table operand + * + *

+ * This is roughly analogous to {@link ConstructState}, but for assembly. It records the assembly + * semantic, i.e., SLEIGH constructor, and the child states, one for each operand in the + * constructor. It's implementation of {@link #resolve(AssemblyResolvedPatterns, Collection)} + * encapsulates, perhaps the very kernel of, machine-code generation. Operands can have there own + * complexity, but most of the core machine-code concepts of SLEIGH are handled by constructors. + */ +public class AssemblyConstructState extends AbstractAssemblyState { + + /** + * Compute the farthest end byte (exclusive) among the given operands + * + * @param operands the operands + * @return the farthest end byte + */ + protected static int computeEnd(List operands) { + return operands.stream() + .map(s -> s.shift + s.length) + .reduce(0, Integer::max); + } + + protected final AssemblyConstructorSemantic sem; + protected final List children; + + /** + * Construct the state for a selected SLEIGH constructor of a sub-table operand + * + *

+ * The operand's length is computed from the constructors length and the shifts and lengths of + * its generated operands. + * + * @param resolver the resolver + * @param path the path for diagnostics + * @param shift the (right) shift of this operand + * @param sem the selected SLEIGH constructor + * @param children the child state for each operand in the constructor + */ + public AssemblyConstructState(AssemblyTreeResolver resolver, + List path, int shift, + AssemblyConstructorSemantic sem, List children) { + super(resolver, path, shift, + Integer.max(computeEnd(children) - shift, sem.cons.getMinimumLength())); + this.sem = sem; + this.children = children; + } + + @Override + public int computeHash() { + return Objects.hash(getClass(), shift, sem, children); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof AssemblyConstructState)) { + return false; + } + AssemblyConstructState that = (AssemblyConstructState) obj; + if (this.resolver != that.resolver) { + return false; + } + if (this.shift != that.shift) { + return false; + } + if (!Objects.equals(this.sem, that.sem)) { + return false; + } + if (!Objects.equals(this.children, that.children)) { + return false; + } + return true; + } + + @Override + public String toString() { + return sem.getLocation() + "[" + + children.stream().map(s -> s.toString()).collect(Collectors.joining(",")) + "]"; + } + + /** + * {@inheritDoc} + * + *

+ * Currently, this is used to generate machine-code from a generated assembly instruction + * prototype, but it is not used to apply recursive constructors, i.e., for prefix generation. + * TODO: That should change. This performs the reverse of the machine-code parsing process, both + * in concept and in implementation. First, it descends to the children. Each child is a + * {@link AbstractAssemblyState}, i.e., either another constructor, or a value operand. (There + * are also specializations for dealing with hidden constructor and value operands.) Then it + * solves context changes, in the reverse order of the specification. Finally, it applies the + * patterns, in order to satisfy the constraints specified by the constructor. As a final + * detail, it records, for diagnostic purposes, the intermediate child patterns into the parent + * pattern. + */ + @Override + protected Stream resolve(AssemblyResolvedPatterns fromRight, + Collection errors) { + String desc = "Resolving constructor: " + sem.getLocation(); + return resolveRemainingChildren(fromRight, errors, children) + .flatMap(fromChildren -> resolveMutations(fromChildren, errors)) + .flatMap(fromMutations -> resolvePatterns(fromMutations, errors)) + .map(pat -> pat.parent(desc, children.size()).withConstructor(sem.cons)); + } + + /** + * Apply each possible pattern for the selected constructor + * + * @param fromMutations the assembly pattern after mutations were solved + * @param errors a place to collect errors + * @return the stream of patterns, as accumulated with {@code fromMutations} + */ + protected Stream resolvePatterns( + AssemblyResolvedPatterns fromMutations, Collection errors) { + return sem.getPatterns() + .stream() + .map(pat -> { + DBG.println(path + ": Constructor pattern: " + pat.lineToString()); + DBG.println(path + ": Current pattern: " + fromMutations.lineToString()); + AssemblyResolvedPatterns combined = fromMutations.combine(pat.shift(shift)); + //DBG.println("Combined pattern: " + combined); + return combined; + }) + .filter(ar -> { + if (ar == null) { + errors.add(AssemblyResolution.error("Pattern conflict", + "Resolving " + sem.getLocation() + " in " + path)); + return false; + } + return true; + }); + } + + /** + * Solve the mutations for the selected constructor + * + * @param fromChildren the assembly pattern as accumulated from the left-most child + * @param errors a place to collect errors + * @return the stream of patterns, as accumulated with {@code fromChildren} + */ + protected Stream resolveMutations( + AssemblyResolvedPatterns fromChildren, Collection errors) { + AssemblyResolution ar = sem.solveContextChanges(fromChildren, resolver.vals); + if (ar.isError()) { + errors.add((AssemblyResolvedError) ar); + return Stream.of(); + } + if (ar.isBackfill()) { + throw new AssertionError(); + } + AssemblyResolvedPatterns pat = (AssemblyResolvedPatterns) ar; + return Stream.of(pat.solveContextChangesForForbids(sem, resolver.vals)); + } + + /** + * A recursive function from resolving all children right-to-left and accumulating the patterns + * + *

+ * This pops the right-most child in {@code children}, resolves it, and then recurses, passing + * the accumulated patterns in as {@code fromRight} with the remaining children. + * {@link Stream#flatMap(java.util.function.Function)} makes this somewhat fluent, given the + * possibility of multiple resolutions. + * + * @param fromRight the assembly pattern as accumulated from the right sibling. If this is the + * right-most sibling, then this is the pattern accumulated from the parent's right + * sibling, as so on. If no such sibling exists, it is the unrestricted (empty) + * pattern. + * @param errors a place to collect errors + * @param children the remaining children to resolve + * @return the stream of accumulated patterns + */ + protected Stream resolveRemainingChildren( + AssemblyResolvedPatterns fromRight, Collection errors, + List children) { + + // Need to resolve children (as they apply context changes) from right to left + if (children.isEmpty()) { + return Stream.of(fromRight); + } + + AbstractAssemblyState rightMost = children.get(children.size() - 1); + return rightMost.resolve(fromRight, errors).flatMap(fromChild -> { + return resolveRemainingChildren(fromChild, errors, + children.subList(0, children.size() - 1)); + }); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructStateGenerator.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructStateGenerator.java new file mode 100644 index 0000000000..879c363f8a --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructStateGenerator.java @@ -0,0 +1,212 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.*; +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction; +import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol; +import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseBranch; +import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseTreeNode; +import ghidra.app.plugin.assembler.sleigh.util.AsmUtil; +import ghidra.app.plugin.processors.sleigh.Constructor; +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; + +/** + * The generator of {@link AssemblyConstructState} from {@link AssemblyParseBranch} + * + *

+ * In short, this handles the selection of each possible constructor for the production recorded by + * a given parse branch. + */ +public class AssemblyConstructStateGenerator + extends AbstractAssemblyStateGenerator { + + /** + * Construct the instruction state generator or a sub-table operand state generator + * + * @param resolver the resolver + * @param node the node from which to generate states + * @param fromLeft the accumulated patterns from the left sibling or the parent + */ + public AssemblyConstructStateGenerator(AssemblyTreeResolver resolver, AssemblyParseBranch node, + AssemblyResolvedPatterns fromLeft) { + super(resolver, node, fromLeft); + } + + @Override + public Stream generate(GeneratorContext gc) { + AssemblyProduction production = node.getProduction(); + return resolver.grammar.getSemantics(production) + .stream() + .flatMap(sem -> applyConstructor(gc, sem)); + } + + /** + * Arrange the branch's (mnemonic) children according to the machine-code production + * + *

+ * This orders the parsed children so that each is readily paired to its operand as given by + * {@link Constructor#getOperand(int)}. + * + * @param sem the SLEIGH constructor whose machine-code production to consider + * @return the children arranged in constructor operand order + */ + protected List orderOpNodes(AssemblyConstructorSemantic sem) { + Constructor cons = sem.getConstructor(); + List result = + Arrays.asList(new AssemblyParseTreeNode[cons.getNumOperands()]); + int index = 0; + AssemblyProduction production = node.getProduction(); + List substitutions = node.getSubstitutions(); + for (int i = 0; i < production.getRHS().size(); i++) { + AssemblySymbol sym = production.getRHS().getSymbol(i); + if (!sym.takesOperandIndex()) { + continue; + } + result.set(sem.getOperandIndex(index), substitutions.get(i)); + index++; + } + return result; + } + + /** + * Generate prototypes, considering the given SLEIGH constructor + * + *

+ * This comprises three steps: apply patterns, apply context changes, apply operands + * left-to-right. + * + * @param gc the generator context for this node + * @param sem the SLEIGH constructor to apply + * @return the stream of generated (sub) prototypes + */ + protected Stream applyConstructor(GeneratorContext gc, + AssemblyConstructorSemantic sem) { + Stream applied = sem.applyPatternsForward(gc.shift, fromLeft) + .filter(pat -> { + if (pat == null) { + gc.dbg("Conflicting pattern. fromLeft=" + fromLeft + ",sem=" + + sem.getLocation()); + return false; + } + return true; + }) + .map(pat -> sem.applyContextChangesForward(resolver.vals, pat)); + List opOrdered = orderOpNodes(sem); + return applied.flatMap( + patterned -> applyOperands(gc, patterned, sem, opOrdered)); + } + + /** + * Generate prototypes by considering all the operands of the given SLEIGH constructor + * + *

+ * This is the last step of applying a constructor. + * + * @param gc the generator context for this node + * @param fromMutations the patterns as accumulated after context changes + * @param sem the selected SLEIGH constructor + * @param opOrdered the parsed children ordered as the constructor's operands + * @return the stream of generated (sub) prototypes + */ + protected Stream applyOperands(GeneratorContext gc, + AssemblyResolvedPatterns fromMutations, AssemblyConstructorSemantic sem, + List opOrdered) { + Constructor cons = sem.getConstructor(); + List siblingGcs = + Arrays.asList(new GeneratorContext[cons.getNumOperands()]); + return applyRemainingOperands(gc, siblingGcs, fromMutations, sem, opOrdered, List.of()); + } + + /** + * A recursive function for generating child operand prototypes and constructing the parent(s) + * + *

+ * The implementation generates states for the left-most node not yet considered. It knows which + * is next by examining the length of {@code children}, which records the generated state for + * each child already considered. It then appends the result to {@code children} and recurses, + * using the resulting patterns as {@code fromLeft}. Given that multiple prototypes can be + * generated, {@link Stream#flatMap(java.util.function.Function)} makes the recursive invocation + * somewhat fluent. The base case occurs when all children have states generated. It constructs + * the state for this node, storing the generated children with it. + * + *

+ * This routine is also operative in computing shifts, since the offset of each operand is + * incorporated here. Two accessors are needed to compute the offset: + * {@link OperandSymbol#getOffsetBase()} and {@link OperandSymbol#getRelativeOffset()}. The + * former identifies which operand's end (exclusive) byte is the base of the offset. The latter + * specifies an additional number of bytes to the right. Consider an operand consisting of three + * operands, each consuming a 1-byte token. + * + *

+	 * +-----+-----+-----+
+	 * | op0 | op1 | op2 |
+	 * +-----+-----+-----+
+	 *  ^-1   ^0    ^1    ^2
+	 * 
+ * + *

+ * A base offset of 0 would indicate that the overall offset is the end of op0 (relative to the + * parent op) plus the relative offset. A base offset of -1 is special, but is easy to + * conceptualize from the diagram. It indicates the beginning byte of the parent op. Thus every + * child operand boundary is numbered. The offset base must always refer to an operand to the + * left. + * + * @param parentGc the generator context for othis node + * @param childGcs a list to collect the generator context for each child operand. The root + * invocation should pass a fixed-length mutable list of nulls, one for each child. + * @param fromLeft the accumulated patterns from the left sibling. The root invocation should + * pass the patterns accumulated after context changes. + * @param sem the selected SLEIGH constructor, whose operands to generate + * @param opOrdered the paresd children ordered as the constructor's operands + * @param children the list of children generated so far. The root invocation should pass the + * empty list. + * @return the stream of generated (sub) prototypes + */ + protected Stream applyRemainingOperands(GeneratorContext parentGc, + List childGcs, AssemblyResolvedPatterns fromLeft, + AssemblyConstructorSemantic sem, List opOrdered, + List children) { + Constructor cons = sem.getConstructor(); + int opIdx = children.size(); + if (opIdx == cons.getNumOperands()) { + // We're done! + return Stream.of(new AssemblyGeneratedPrototype( + new AssemblyConstructState(resolver, parentGc.path, parentGc.shift, sem, children), + fromLeft)); + } + AssemblyParseTreeNode opNode = opOrdered.get(opIdx); + OperandSymbol opSym = cons.getOperand(opIdx); + int offset = opSym.getRelativeOffset(); + int offsetBase = opSym.getOffsetBase(); + if (-1 != offsetBase) { + int baseShift = childGcs.get(offsetBase).shift; + int baseLength = children.get(offsetBase).getLength(); + offset += baseShift - parentGc.shift + baseLength; + } + + AbstractAssemblyStateGenerator opGen = + resolver.getStateGenerator(opSym, opNode, fromLeft); + GeneratorContext opGc = parentGc.push(sem, offset); + childGcs.set(opIdx, opGc); + return opGen.generate(opGc).flatMap(prot -> { + return applyRemainingOperands(parentGc, new ArrayList<>(childGcs), prot.patterns, sem, + opOrdered, AsmUtil.extendList(children, prot.state)); + }); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructorSemantic.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructorSemantic.java index 38f4c5be69..a19a171fce 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructorSemantic.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyConstructorSemantic.java @@ -16,6 +16,7 @@ package ghidra.app.plugin.assembler.sleigh.sem; import java.util.*; +import java.util.stream.Stream; import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong; import ghidra.app.plugin.assembler.sleigh.expr.RecursiveDescentSolver; @@ -30,19 +31,22 @@ import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol; /** * Describes a SLEIGH constructor semantic * + *

* These are collected and associated with productions in the grammar based on the given * constructor's print pieces. */ public class AssemblyConstructorSemantic implements Comparable { - protected static final RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver(); - protected static final DbgTimer dbg = AssemblyTreeResolver.dbg; + protected static final RecursiveDescentSolver SOLVER = RecursiveDescentSolver.getSolver(); + protected static final DbgTimer DBG = AssemblyTreeResolver.DBG; - protected final Set patterns = new HashSet<>(); + protected final Set patterns = new HashSet<>(); protected final Constructor cons; protected final List indices; + protected final List contextChanges; + protected final List reversedChanges; // A set initialized on first access with forbidden patterns added - protected Set upatterns; + protected Set upatterns; /** * Build a new SLEIGH constructor semantic @@ -54,13 +58,28 @@ public class AssemblyConstructorSemantic implements Comparable indices) { this.cons = cons; this.indices = Collections.unmodifiableList(indices); + List changes = new ArrayList<>(cons.getContextChanges()); + this.contextChanges = List.copyOf(changes); + Collections.reverse(changes); + this.reversedChanges = List.copyOf(changes); } + /** + * Record a pattern that would select the constructor + * + * @param pat the pattern + */ public void addPattern(DisjointPattern pat) { - addPattern(AssemblyResolution.fromPattern(pat, cons.getMinimumLength(), cons.toString())); + addPattern(AssemblyResolution.fromPattern(pat, cons.getMinimumLength(), + "Generated constructor pattern " + getLocation(), cons)); } - public void addPattern(AssemblyResolvedConstructor pat) { + /** + * Record a pattern that would select the constructor + * + * @param pat the pattern + */ + public void addPattern(AssemblyResolvedPatterns pat) { if (upatterns != null) { throw new IllegalStateException("Cannot add patterns after a call to getPatterns()"); } @@ -69,7 +88,26 @@ public class AssemblyConstructorSemantic implements Comparable getPatterns() { + public Collection getPatterns() { if (upatterns == null) { computeAllForbids(); } @@ -115,6 +153,7 @@ public class AssemblyConstructorSemantic implements Comparable * If this iterator is advanced for each non-terminal, while simultaneously iterating over the * RHS of the associated production, then this will identify the corresponding operand index for * each non-terminal @@ -133,9 +172,9 @@ public class AssemblyConstructorSemantic implements Comparable result = new HashSet<>(); - for (AssemblyResolvedConstructor pat : patterns) { - AssemblyResolvedConstructor fpat = withComputedForbids(pat); + Set result = new HashSet<>(); + for (AssemblyResolvedPatterns pat : patterns) { + AssemblyResolvedPatterns fpat = withComputedForbids(pat); result.add(fpat); } upatterns = Collections.unmodifiableSet(result); @@ -144,6 +183,7 @@ public class AssemblyConstructorSemantic implements Comparable * SLEIGH disambiguates multiple matching pattern by two rules. First, if one is more specific * than ("specializes") another, i.e., it matches on more bits than another pattern, the more * specific pattern is chosen. Second, if the two are equally special, then the one that occurs @@ -151,18 +191,20 @@ public class AssemblyConstructorSemantic implements Comparable * Essentially, this states, "you may choose any value matching my pattern, except those that * match these forbidden patterns." * + *

* This takes a given pattern, and searches the rest of the language for any patterns that would * take precedence, and combines them as forbidden patterns with the given pattern. * * @param pat the given pattern * @return the same pattern with forbidden records added */ - protected AssemblyResolvedConstructor withComputedForbids(AssemblyResolvedConstructor pat) { + protected AssemblyResolvedPatterns withComputedForbids(AssemblyResolvedPatterns pat) { // Forbid anything more specific (or otherwise takes precedence) over me. - Set forbids = new HashSet<>(); + Set forbids = new HashSet<>(); SubtableSymbol parent = cons.getParent(); SleighLanguages.traverseConstructors(parent, new SubtableEntryVisitor() { @@ -173,45 +215,58 @@ public class AssemblyConstructorSemantic implements Comparable + * Each value in {@code opvals} must either be a numeric value, e.g., an index from a varnode + * list, or another {@link AssemblyResolvedPatterns} for a subconstructor operand. + * + *

+ * It's helpful to think of the SLEIGH disassembly process here. Normally, once the appropriate + * constructor has been identified (by matching patterns), its context changes are applied, and + * then its operands parsed (possibly parsing subconstructor operands). Thus, {@code res} can be + * thought of as the intermediate result between applying context changes and parsing operands, + * except in reverse. The output of this method corresponds to the state before context changes + * were applied, i.e., immediately after selecting the constructor. Thus, in reverse, the + * context is solved immediately before applying the selected constructor patterns. + * * @param res the combined resolution requirements derived from the subconstructors * @param vals any defined symbols (usually {@code inst_start}, and {@code inst_next}) * @param opvals a map from operand index to operand value * @return the resolution with context changes applied in reverse, or an error * - * Each value in {@code opvals} must either be a numeric value, e.g., an index from a - * varnode list, or another {@link AssemblyResolvedConstructor} for a subconstructor - * operand. - * - * It's helpful to think of the SLEIGH disassembly process here. Normally, once the - * appropriate constructor has been identified (by matching patterns), its context - * changes are applied, and then its operands parsed (possibly parsing subconstructor - * operands). Thus, {@code res} can be thought of as the intermediate result between - * applying context changes and parsing operands, except in reverse. The output of this - * method corresponds to the state before context changes were applied, i.e., - * immediately after selecting the constructor. Thus, in reverse, the context is solved - * immediately before applying the selected constructor patterns. - * * @see AssemblyTreeResolver#resolveSelectedChildren(AssemblyProduction, List, List, Collection) */ - public AssemblyResolution solveContextChanges(AssemblyResolvedConstructor res, - Map vals, Map opvals) { - List contextChanges = cons.getContextChanges(); - List reversed = new LinkedList<>(); - for (ContextChange chg : contextChanges) { - reversed.add(0, chg); - } - for (ContextChange chg : reversed) { + public AssemblyResolution solveContextChanges(AssemblyResolvedPatterns res, + Map vals) { + for (ContextChange chg : reversedChanges) { if (chg instanceof ContextOp) { - dbg.println("Current: " + res.lineToString()); + DBG.println("Current: " + res.lineToString()); // This seems backwards. That's because we're going backwards. // This is the "write" location for disassembly. ContextOp cop = (ContextOp) chg; - dbg.println("Handling context change: " + cop); + DBG.println("Handling context change: " + cop); // TODO: Is this res or subres? MaskedLong reqval = res.readContextOp(cop); if (reqval.equals(MaskedLong.UNKS)) { - dbg.println("Doesn't affect a current requirement"); + DBG.println("Doesn't affect a current requirement"); continue; // this context change does not satisfy any requirement } - dbg.println("'read' " + reqval); + DBG.println("'read' " + reqval); // Remove the requirement that we just read before trying to solve res = res.maskOut(cop); - dbg.println("Masked out: " + res.lineToString()); + DBG.println("Masked out: " + res.lineToString()); // Now, solve AssemblyResolution sol = AssemblyTreeResolver.solveOrBackfill( - cop.getPatternExpression(), reqval, vals, opvals, res, "Solution to " + cop); - dbg.println("Solution: " + sol.lineToString()); + cop.getPatternExpression(), reqval, vals, res, "Solution to " + cop); + DBG.println("Solution: " + sol.lineToString()); if (sol.isError()) { AssemblyResolvedError err = (AssemblyResolvedError) sol; return AssemblyResolution.error(err.getError(), res); } // Now, forward the new requirements to my parents. - if (sol instanceof AssemblyResolvedConstructor) { - AssemblyResolvedConstructor solcon = (AssemblyResolvedConstructor) sol; - AssemblyResolvedConstructor check = res.combine(solcon); + if (sol instanceof AssemblyResolvedPatterns) { + AssemblyResolvedPatterns solcon = (AssemblyResolvedPatterns) sol; + AssemblyResolvedPatterns check = res.combine(solcon); if (null == check) { return AssemblyResolution.error( "A context change caused a conflict: " + sol, res); @@ -296,7 +346,7 @@ public class AssemblyConstructorSemantic implements Comparable + * Unlike the usual disassembly process, this method does not take into account any information + * from the instruction encoding. Any context bits that depend on it are set to unknown + * ({@code x}) in the output. This method is used to pre-compute a context transition graph in + * order to quickly resolve purely-recursive semantics on the root constructor table. * - * Unlike the usual disassembly process, this method does not take into account any - * information from the instruction encoding. Any context bits that depend on it are set - * to unknown ({@code x}) in the output. This method is used to pre-compute a context - * transition graph in order to quickly resolve purely-recursive semantics on the root - * constructor table. + * @param fromLeft the state before context changes + * @return the state after context changes */ - public AssemblyResolvedConstructor applyForward(AssemblyResolvedConstructor outer) { - AssemblyResolvedConstructor res = outer; + public AssemblyResolvedPatterns applyContextChangesForward(Map vals, + AssemblyResolvedPatterns fromLeft) { + AssemblyResolvedPatterns res = fromLeft; // TODO: Figure out semantics of ContextCommit. Not sure it matters here. - for (ContextChange chg : cons.getContextChanges()) { + for (ContextChange chg : contextChanges) { if (chg instanceof ContextOp) { ContextOp cop = (ContextOp) chg; - MaskedLong val = solver.valueForResolution(cop.getPatternExpression(), res); + MaskedLong val = SOLVER.valueForResolution(cop.getPatternExpression(), vals, res); res = res.writeContextOp(cop, val); } } return res; } + /** + * Apply just the instruction patterns in the forward (disassembly) direction + * + * @param shift the (right) shift in bytes to apply to the patterns before combining + * @param fromLeft the accumulated patterns from the left sibling or parent + * @return + */ + public Stream applyPatternsForward(int shift, + AssemblyResolvedPatterns fromLeft) { + if (patterns.isEmpty()) { + DBG.println("No patterns for " + getLocation() + "?" + "(hash=" + + System.identityHashCode(this) + ")"); + } + return patterns.stream().map(pat -> fromLeft.combine(pat.shift(shift))); + } + @Override public int compareTo(AssemblyConstructorSemantic that) { // TODO: This could be better diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyContextGraph.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyContextGraph.java index 7a3ee47be5..638eeae7da 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyContextGraph.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyContextGraph.java @@ -33,11 +33,13 @@ import ghidra.graph.algo.DijkstraShortestPathsAlgorithm; /** * A graph of possible context changes via the application of various constructors * + *

* This is used primarily to find optimal paths for the application of recursive rules, i.e., those * of the form I => I. These cannot be resolved without some form of semantic analysis. The most - * notable disadvantage to all of this is that you no longer get all of the possible assemblies, - * but only those with the fewest rule applications. + * notable disadvantage to all of this is that you no longer get all of the possible assemblies, but + * only those with the fewest rule applications. * + *

* Conceivably, this may also be used to prune some possibilities during semantic resolution of a * parse tree. Even better, it may be possible to derive a grammar which accounts for the context * changes already; however, it's unclear how many rules this will generate, and consequently, how @@ -58,12 +60,15 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph * The grammar must have been constructed from the given language. The language is used just to * obtain the most common default context. * + *

* At the moment, this graph only expands the recursive rules at the root constructor table, * i.e., "instruction". Thus, the assembler will not be able to process any language that has - * purely-recursive rules at subconstructors. + * purely-recursive rules at sub-constructors. + * * @param lang the language * @param grammar the grammar derived from the given language */ @@ -89,17 +94,16 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph> computeOptimalApplications( AssemblyPatternBlock src, String srcTable, AssemblyPatternBlock dst, String dstTable) { @@ -140,6 +144,7 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph * Currently, only semantics from {@code :^instruction} constructors are taken. */ protected void gatherSemantics() { @@ -156,14 +161,16 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph + * Each vertex consists of a context block and a (sub-)table name */ protected static class Vertex implements Comparable { protected final AssemblyPatternBlock context; protected final String subtable; /** - * Construct a new vertex with the given block and subtable name + * Construct a new vertex with the given block and sub-table name + * * @param context the context * @param subtable the name */ @@ -175,10 +182,12 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph + * This does not mean they are equal, but that they share a sub-table, and the defined bits + * of their context blocks agree. + * * @param that the other vertex - * @return true iff they share subtables and defined bits + * @return true iff they share sub-tables and defined bits */ public boolean matches(Vertex that) { if (!this.subtable.equals(that.subtable)) { @@ -233,10 +242,10 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph * A transition consists of the constructor whose context changes were applied. The operand - * index is included for reference and debugging. If we ever need to process rules with - * multiple subconstructors, the operand index explains the subtable name of the destination - * vertex. + * index is included for reference and debugging. If we ever need to process rules with multiple + * sub-constructors, the operand index explains the sub-table name of the destination vertex. */ protected static class Edge implements GEdge, Comparable { protected final AssemblyConstructorSemantic sem; @@ -247,6 +256,7 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph result = new HashSet<>(); for (AssemblyConstructorSemantic sem : semantics.get(from.subtable)) { - for (AssemblyResolvedConstructor rc : sem.patterns) { + for (AssemblyResolvedPatterns rc : sem.patterns) { AssemblyPatternBlock pattern = rc.ctx; AssemblyPatternBlock outer = from.context.combine(pattern); if (outer == null) { @@ -342,9 +352,9 @@ public class AssemblyContextGraph implements GImplicitDirectedGraph * I could implement this using the cached edges, but that may not be semantically, what a path * computation algorithm actually requires. Instead, I will assume the algorithm only explores * the graph in the same direction as its edges. If not, I will hear about it quickly. diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyDefaultContext.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyDefaultContext.java index 2fac8340dc..f7613850f8 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyDefaultContext.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyDefaultContext.java @@ -28,6 +28,7 @@ import ghidra.program.model.listing.DefaultProgramContext; /** * A class that computes the default context for a language, and acts as a pseudo context * + *

* This class helps maintain context consistency when performing both assembly and disassembly. */ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgramContext { @@ -41,6 +42,7 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr /** * Compute the default context at most addresses for the given language + * * @param lang the language */ public AssemblyDefaultContext(SleighLanguage lang) { @@ -49,6 +51,7 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr /** * Compute the default context at the given address for the given language + * * @param lang the language * @param at the address */ @@ -72,16 +75,23 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr /** * Set the value of the pseudo context register * + *

* If the provided value has length less than the register, it will be left aligned, and the * remaining bytes will be set to unknown (masked out). + * * @param val the value of the register */ public void setContextRegister(byte[] val) { curctx = AssemblyPatternBlock.fromBytes(0, val); } + public void setContextRegister(AssemblyPatternBlock ctx) { + curctx = curctx.combine(ctx); + } + /** * Get the default value of the context register + * * @return the value as a pattern block for assembly */ public AssemblyPatternBlock getDefault() { @@ -90,6 +100,7 @@ public class AssemblyDefaultContext implements DisassemblerContext, DefaultProgr /** * Compute the default value of the context register at the given address + * * @param addr the addres * @return the value as a pattern block for assembly */ diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyGeneratedPrototype.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyGeneratedPrototype.java new file mode 100644 index 0000000000..a1b4fcd2f7 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyGeneratedPrototype.java @@ -0,0 +1,40 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +/** + * A tree of generated assembly node states, paired with the resulting patterns + * + *

+ * This is used as the intermediate result when generating states, since the patterns must be + * propagated to each operand as generation proceeds. Usually, the patterns in the final output are + * discarded, and machine code generation proceeds using only the state tree. + */ +public class AssemblyGeneratedPrototype { + protected final AbstractAssemblyState state; + protected final AssemblyResolvedPatterns patterns; + + public AssemblyGeneratedPrototype(AbstractAssemblyState state, + AssemblyResolvedPatterns patterns) { + this.state = state; + this.patterns = patterns; + } + + @Override + public String toString() { + return state + " [" + patterns + "]"; + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyHiddenConstructStateGenerator.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyHiddenConstructStateGenerator.java new file mode 100644 index 0000000000..cd13c5a302 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyHiddenConstructStateGenerator.java @@ -0,0 +1,65 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseTreeNode; +import ghidra.app.plugin.processors.sleigh.Constructor; +import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol; + +/** + * The generator of {@link AssemblyConstructState} for a hidden sub-table operand + * + *

+ * In short, this exhausts all possible constructors in the given sub-table. For well-designed + * languages, such exhaustion produces a very small set of possibilities. In general, hidden + * sub-table operands are a bad idea. + */ +public class AssemblyHiddenConstructStateGenerator extends AssemblyConstructStateGenerator { + protected final SubtableSymbol subtableSym; + + /** + * Construct the hidden sub-table operand state generator + * + * @param resolver the resolver + * @param node the node from which to generate states + * @param fromLeft the accumulated patterns from the left sibling or the parent + */ + public AssemblyHiddenConstructStateGenerator(AssemblyTreeResolver resolver, + SubtableSymbol subtableSym, AssemblyResolvedPatterns fromLeft) { + super(resolver, null, fromLeft); + this.subtableSym = subtableSym; + } + + @Override + public Stream generate(GeneratorContext gc) { + return IntStream.range(0, subtableSym.getNumConstructors()) + .mapToObj(subtableSym::getConstructor) + .map(resolver.grammar::getSemantic) + .flatMap(sem -> applyConstructor(gc, sem)); + } + + @Override + protected List orderOpNodes(AssemblyConstructorSemantic sem) { + // Just provide null operands, since they're hidden, too. + Constructor cons = sem.getConstructor(); + return Arrays.asList(new AssemblyParseTreeNode[cons.getNumOperands()]); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyNopState.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyNopState.java new file mode 100644 index 0000000000..32806079ca --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyNopState.java @@ -0,0 +1,63 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.Collection; +import java.util.List; +import java.util.stream.Stream; + +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; + +public class AssemblyNopState extends AbstractAssemblyState { + public AssemblyNopState(AssemblyTreeResolver resolver, List path, + int shift, OperandSymbol opSym) { + super(resolver, path, shift, opSym.getMinimumLength()); + } + + @Override + public int computeHash() { + return "NOP".hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof AssemblyNopState)) { + return false; + } + AssemblyNopState that = (AssemblyNopState) obj; + if (this.resolver != that.resolver) { + return false; + } + if (this.shift != that.shift) { + return false; + } + return true; + } + + @Override + public String toString() { + return "NOP"; + } + + @Override + protected Stream resolve(AssemblyResolvedPatterns fromRight, + Collection errors) { + return Stream.of(fromRight.nopLeftSibling()); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyNopStateGenerator.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyNopStateGenerator.java new file mode 100644 index 0000000000..59a3a34d4c --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyNopStateGenerator.java @@ -0,0 +1,55 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken; +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; + +/** + * The generator of {@link AssemblyOperandState} for a hidden value operand + * + *

+ * In short, this does nothing, except to hold the place of the operand for diagnostics. Likely, the + * "hidden" operand appears in the defining expression of a temporary symbol used in the print + * pieces. + */ +public class AssemblyNopStateGenerator + extends AbstractAssemblyStateGenerator { + protected final OperandSymbol opSym; + + /** + * Construct the hidden value operand state generator + * + * @param resolver the resolver + * @param opSym the operand symbol + * @param fromLeft the accumulated patterns from the left sibling or parent + */ + public AssemblyNopStateGenerator(AssemblyTreeResolver resolver, OperandSymbol opSym, + AssemblyResolvedPatterns fromLeft) { + super(resolver, null, fromLeft); + this.opSym = opSym; + } + + @Override + public Stream generate(GeneratorContext gc) { + gc.dbg("Generating NOP for " + opSym); + return Stream.of( + new AssemblyGeneratedPrototype(new AssemblyNopState(resolver, gc.path, gc.shift, opSym), + fromLeft)); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyOperandState.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyOperandState.java new file mode 100644 index 0000000000..c96ff4cacc --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyOperandState.java @@ -0,0 +1,155 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.*; +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNumericTerminal; +import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyTerminal; +import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx; +import ghidra.app.plugin.processors.sleigh.ConstructState; +import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; + +/** + * The state corresponding to a non-sub-table operand + * + *

+ * This is roughly analogous to {@link ConstructState}, but for assembly. However, it also records + * the value of the operand and the actual operand symbol whose value it specifies. + */ +public class AssemblyOperandState extends AbstractAssemblyState { + protected final AssemblyTerminal terminal; + protected final long value; + protected final OperandSymbol opSym; + + /** + * Construct the state for a given operand and selected value + * + * @param resolver the resolver + * @param path the path for diagnostics + * @param shift the (right) shift of this operand + * @param terminal the terminal that generated this state + * @param value the value of the operand + * @param opSym the operand symbol + */ + public AssemblyOperandState(AssemblyTreeResolver resolver, + List path, int shift, AssemblyTerminal terminal, + long value, OperandSymbol opSym) { + super(resolver, path, shift, opSym.getMinimumLength()); + this.terminal = terminal; + this.value = value; + this.opSym = opSym; + } + + @Override + public int computeHash() { + return Objects.hash(getClass(), shift, value, opSym); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof AssemblyOperandState)) { + return false; + } + AssemblyOperandState that = (AssemblyOperandState) obj; + if (this.resolver != that.resolver) { + return false; + } + if (this.shift != that.shift) { + return false; + } + if (this.value != that.value) { + return false; + } + if (!Objects.equals(this.opSym, that.opSym)) { + return false; + } + return true; + } + + @Override + public String toString() { + return terminal + "=" + value + "(0x" + Long.toHexString(value) + ")"; + } + + /** + * Compute the size in bits of this operand's value + * + *

+ * If this operand does not have a strict size, 0 is returned. + * + * @return the size + */ + protected int computeBitsize() { + if (!(terminal instanceof AssemblyNumericTerminal)) { + return 0; + } + AssemblyNumericTerminal numeric = (AssemblyNumericTerminal) terminal; + return numeric.getBitSize(); + } + + /** + * Solve the operand's defining expression set equal to the desired value + * + * @return the resolved patterns, an error, or a backfill + */ + protected AssemblyResolution solveNumeric() { + int bitsize = computeBitsize(); + PatternExpression symExp = opSym.getDefiningExpression(); + if (symExp == null) { + symExp = opSym.getDefiningSymbol().getPatternExpression(); + } + DBG.println("Equation: " + symExp + " = " + Long.toHexString(value)); + String desc = "Solution to " + opSym + " in " + Long.toHexString(value) + " = " + symExp; + AssemblyResolution sol = + AssemblyTreeResolver.solveOrBackfill(symExp, value, bitsize, resolver.vals, null, desc); + DBG.println("Solution: " + sol); + AssemblyResolution shifted = sol.shift(shift); + DBG.println("Shifted: " + shifted); + return shifted; + } + + @Override + protected Stream resolve(AssemblyResolvedPatterns fromRight, + Collection errors) { + try (DbgCtx dc = DBG.start("Resolving " + terminal)) { + AssemblyResolution sol = solveNumeric(); + if (sol.isError()) { + errors.add((AssemblyResolvedError) sol); + return Stream.of(); + } + if (sol.isBackfill()) { + AssemblyResolvedPatterns combined = + fromRight.combine((AssemblyResolvedBackfill) sol); + return Stream.of(combined.withRight(fromRight)); + } + AssemblyResolution combined = fromRight.combine((AssemblyResolvedPatterns) sol); + if (combined == null) { + errors.add( + AssemblyResolution.error("Pattern/operand conflict", "Resolving " + terminal)); + return Stream.of(); + } + AssemblyResolvedPatterns pats = (AssemblyResolvedPatterns) combined; + // Do not take constructor from right + return Stream.of(pats.withRight(fromRight).withConstructor(null)); + } + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyOperandStateGenerator.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyOperandStateGenerator.java new file mode 100644 index 0000000000..477e4f8cca --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyOperandStateGenerator.java @@ -0,0 +1,55 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.sem; + +import java.util.stream.Stream; + +import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken; +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; + +/** + * The generator of {@link AssemblyOperandState} from {@link AssemblyParseNumericToken} + * + *

+ * In short, this handles generation of a single operand state for the operand and value recorded by + * the given parse token. + */ +public class AssemblyOperandStateGenerator + extends AbstractAssemblyStateGenerator { + protected final OperandSymbol opSym; + + /** + * Construct the operand state generator + * + * @param resolver the resolver + * @param node the ndoe from which to generate the state + * @param fromLeft the accumulated patterns from the left sibling or parent + * @param opSym the operand symbol + */ + public AssemblyOperandStateGenerator(AssemblyTreeResolver resolver, + AssemblyParseNumericToken node, OperandSymbol opSym, + AssemblyResolvedPatterns fromLeft) { + super(resolver, node, fromLeft); + this.opSym = opSym; + } + + @Override + public Stream generate(GeneratorContext gc) { + return Stream.of( + new AssemblyGeneratedPrototype(new AssemblyOperandState(resolver, gc.path, gc.shift, + node.getSym(), node.getNumericValue(), opSym), fromLeft)); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java index 50006cb542..afd36e2c67 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlock.java @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicLong; import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong; import ghidra.app.plugin.assembler.sleigh.expr.SolverException; -import ghidra.app.plugin.assembler.sleigh.util.SleighUtil; +import ghidra.app.plugin.assembler.sleigh.util.AsmUtil; import ghidra.app.plugin.processors.sleigh.ContextOp; import ghidra.app.plugin.processors.sleigh.expression.ContextField; import ghidra.app.plugin.processors.sleigh.expression.TokenField; @@ -31,15 +31,16 @@ import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern; import ghidra.app.plugin.processors.sleigh.pattern.PatternBlock; import ghidra.program.model.lang.RegisterValue; import ghidra.util.NumericUtilities; -import ghidra.util.StringUtilities; /** * The analog of {@link PatternBlock}, designed for use by the assembler * + *

* It is suitable for the assembler because it is represented byte-by-byte, and it offers a number * of useful conversions and operations. * - * TODO A lot of this could probably be factored into the {@link PatternBlock} class, but it was + *

+ * TODO: A lot of this could probably be factored into the {@link PatternBlock} class, but it was * best to experiment in another class altogether to avoid breaking things. */ public class AssemblyPatternBlock implements Comparable { @@ -53,9 +54,10 @@ public class AssemblyPatternBlock implements Comparable { /** * Construct a new pattern block with the given mask, values, and offset + * * @param offset an offset (0-up, left-to-right) where the pattern actually starts - * @param mask a mask: only {@code 1} bits are included in the pattern - * @param vals the value, excluding corresponding {@code 0} bits in the mask + * @param mask a mask: only 1 bits are included in the pattern + * @param vals the value, excluding corresponding 0 bits in the mask */ protected AssemblyPatternBlock(int offset, byte[] mask, byte[] vals) { assert mask.length == vals.length; @@ -66,6 +68,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Construct a new empty pattern block at the given offset, prepared with the given capacity + * * @param offset an offset (0-up, left-to-right) where the pattern will start * @param capacity the space to allocate for the mask and values */ @@ -77,6 +80,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Get an empty pattern block + * * @return the pattern block */ public static AssemblyPatternBlock nop() { @@ -85,6 +89,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Get a pattern block with the given (fully-included) values at the given offset + * * @param offset the offset (0-up, left-to-right) * @param vals the values * @return a pattern block (having a full mask) @@ -100,7 +105,10 @@ public class AssemblyPatternBlock implements Comparable { /** * Convert the given long to a pattern block (having offset 0 and a full mask) - * NOTE: The result will be 8 bytes in length + * + *

+ * NOTE: The result will be 8 bytes in length + * * @param value the value to convert * @return the pattern block containing the big-endian representation of the value */ @@ -118,7 +126,10 @@ public class AssemblyPatternBlock implements Comparable { /** * Convert the given masked long to a pattern block (having offset 0) - * NOTE: The result will be 8 bytes in length + * + *

+ * NOTE: The result will be 8 bytes in length + * * @param ml the masked long, whose values and mask to convert * @return the pattern block containing the big-endian representation of the value */ @@ -139,7 +150,9 @@ public class AssemblyPatternBlock implements Comparable { /** * Convert a string representation to a pattern block - * @see NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String) + * + * @see NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, + * String) * @param str the string to convert * @return the resulting pattern block */ @@ -187,7 +200,8 @@ public class AssemblyPatternBlock implements Comparable { } /** - * Convert a block from a disjoint pattern into an assembly pattern block + * Convert a block from a disjoint pattern into an assembly pattern block + * * @param pat the pattern to convert * @param context true to select the context block, false to select the instruction block * @return the converted pattern block @@ -229,6 +243,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Encode the given masked long into a pattern block as specified by a given token field + * * @param tf the token field specifying the location of the value to encode * @param val the value to encode * @return the pattern block with the encoded value @@ -262,6 +277,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Encode the given masked long into a pattern block as specified by a given context field + * * @param cf the context field specifying the location of the value to encode * @param val the value to encode * @return the pattern block with the encoded value @@ -293,11 +309,12 @@ public class AssemblyPatternBlock implements Comparable { /** * Convert a register value into a pattern block - * @param rv the register value - * @return the pattern block * * This is used primarily to compute default context register values, and pass them into an * assembler. + * + * @param rv the register value + * @return the pattern block */ public static AssemblyPatternBlock fromRegisterValue(RegisterValue rv) { byte[] mb = rv.toBytes(); @@ -310,6 +327,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Allocate a fully-undefined pattern block of the given length + * * @param length the length in bytes * @return the block of all unknown bits */ @@ -321,6 +339,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Duplicate this pattern block + * * @return the duplicate */ public AssemblyPatternBlock copy() { @@ -330,6 +349,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Get the length (plus the offset) of this pattern block + * * @return the total length */ public int length() { @@ -338,6 +358,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Shift, i.e., increase the offset of, this pattern block + * * @param amt the amount to shift right * @return the shifted pattern block */ @@ -350,6 +371,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Truncate (unshift) this pattern block by removing bytes from the left + * * @param amt the amount to truncate or shift left * @return the truncated pattern block */ @@ -373,12 +395,13 @@ public class AssemblyPatternBlock implements Comparable { /** * Combine this pattern block with another given block * + *

* Two blocks can be combined in their corresponding defined bits agree. When blocks are * combined, their bytes are aligned according to their shifts, and the defined bits are taken - * from either block. If neither block defines a bit (i.e., the mask bit at that position is - * {@code 0} for both input blocks, then the output has an undefined bit in the corresponding - * position. If both blocks define the bit, but they have opposite values, then the result is - * an error. + * from either block. If neither block defines a bit (i.e., the mask bit at that position is 0 + * for both input blocks, then the output has an undefined bit in the corresponding position. If + * both blocks define the bit, but they have opposite values, then the result is an error. + * * @param that the other block * @return the new combined block, or null if the blocks disagree for any bit */ @@ -487,12 +510,12 @@ public class AssemblyPatternBlock implements Comparable { return result; } - result = SleighUtil.compareArrays(this.mask, that.mask); + result = AsmUtil.compareArrays(this.mask, that.mask); if (result != 0) { return result; } - result = SleighUtil.compareArrays(this.vals, that.vals); + result = AsmUtil.compareArrays(this.vals, that.vals); if (result != 0) { return result; } @@ -501,6 +524,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Read an array, returning a default if the index is out of bounds + * * @param arr the array to read * @param idx the index * @param def the default value @@ -517,11 +541,12 @@ public class AssemblyPatternBlock implements Comparable { /** * Encode the given value into a copy of this pattern block as specified by a context operation * - * NOTE: this method is given as a special operation, instead of a conversion factory method, - * because this is a write operation, not a combine operation. As such, the bits (including - * undefined bits) replace the bits in the existing pattern block. Were this a conversion - * method, we would lose the distinction between unknown bits being written, and bits whose - * values are simply not included in the write. + *

+ * NOTE: this method is given as a special operation, instead of a conversion factory + * method, because this is a write operation, not a combine operation. As such, the bits + * (including undefined bits) replace the bits in the existing pattern block. Were this a + * conversion method, we would lose the distinction between unknown bits being written, and bits + * whose values are simply not included in the write. * * @param cop the context operation specifying the location of the value to encode * @param val the value to encode @@ -565,6 +590,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Read the input of a context operation from this pattern block + * * @param cop the context operation * @return the decoded input, as a masked value */ @@ -595,12 +621,14 @@ public class AssemblyPatternBlock implements Comparable { /** * Set all bits read by a given context operation to unknown + * + *

+ * This is used during resolution to remove a context requirement passed upward by a child. When + * a parent constructor writes the required value to the context register, that requirement need + * not be passed further upward, since the write satisfies the requirement. + * * @param cop the context operation * @return the result - * - * This is used during resolution to remove a context requirement passed upward by a child. - * When a parent constructor writes the required value to the context register, that - * requirement need not be passed further upward, since the write satisfies the requirement. */ public AssemblyPatternBlock maskOut(ContextOp cop) { byte[] newMask = Arrays.copyOf(this.mask, this.mask.length); @@ -623,6 +651,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Get the values array + * * @return the array */ public byte[] getVals() { @@ -631,6 +660,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Get the mask array + * * @return the array */ public byte[] getMask() { @@ -639,6 +669,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Get the number of undefined bytes preceding the mask and values arrays + * * @return the offset */ public int getOffset() { @@ -646,7 +677,8 @@ public class AssemblyPatternBlock implements Comparable { } /** - * Decode {@code} len value bytes in big-endian format, beginning at {@code start} + * Decode {@code len} value bytes in big-endian format, beginning at {@code start} + * * @param start the first byte to decode * @param len the number of bytes to decode * @return the decoded long @@ -664,7 +696,8 @@ public class AssemblyPatternBlock implements Comparable { } /** - * Decode {@code} len mask bytes in big-endian format, beginning at {@code start} + * Decode {@code len} mask bytes in big-endian format, beginning at {@code start} + * * @param start the first byte to decode * @param len the number of bytes to decode * @return the decoded long @@ -682,7 +715,8 @@ public class AssemblyPatternBlock implements Comparable { } /** - * Decode {@code} len bytes (values and mask) in big-endian format, beginning at {@code start} + * Decode {@code len} bytes (values and mask) in big-endian format, beginning at {@code start} + * * @param start the first byte to decode * @param len the number of bytes to decode * @return the decoded masked long @@ -692,7 +726,8 @@ public class AssemblyPatternBlock implements Comparable { } /** - * Fill all unknown bits with {@code 0} bits + * Fill all unknown bits with 0 bits + * * @return the result */ public AssemblyPatternBlock fillMask() { @@ -705,6 +740,7 @@ public class AssemblyPatternBlock implements Comparable { /** * Check if there are any unknown bits + * * @return true if no unknown bits are present, false otherwise */ public boolean isFullMask() { @@ -720,8 +756,9 @@ public class AssemblyPatternBlock implements Comparable { } /** - * Check if all bits are {@code 0} bits - * @return true if all are {@code 0}, false otherwise + * Check if all bits are 0 bits + * + * @return true if all are 0, false otherwise */ public boolean isZero() { if (!isFullMask()) { @@ -738,8 +775,10 @@ public class AssemblyPatternBlock implements Comparable { /** * Decode the values array into a {@link BigInteger} of length {@code n} bytes * + *

* The array is either truncated or zero-extended on the right to match the requested * number of bytes, then decoded in big-endian format as an unsigned value. + * * @param n the number of bytes (left-to-right) to decode * @return the decoded big integer */ @@ -757,7 +796,9 @@ public class AssemblyPatternBlock implements Comparable { /** * Counts the total number of known bits in the pattern * + *

* At a slightly lower level, counts the number of 1-bits in the mask. + * * @return the count */ public int getSpecificity() { @@ -785,19 +826,21 @@ public class AssemblyPatternBlock implements Comparable { /** * Get an iterable over all the possible fillings of the value, given a partial mask * + *

* This is meant to be used idiomatically, as in an enhanced for loop: * *

-	 * {@code
 	 * for (byte[] val : pattern.possibleVals()) {
-	 *     System.out.println(format(val));
-	 * }
+	 * 	System.out.println(format(val));
 	 * }
 	 * 
* - * NOTE: A single byte array is instantiated with the call to {@link Iterable#iterator()}. Each - * call to {@link Iterator#next()} modifies the one byte array and returns it. As such, if you - * intend to preserve the value in the array for later use, you must make a copy. + *

+ * NOTE: A single byte array is instantiated with the call to + * {@link Iterable#iterator()}. Each call to {@link Iterator#next()} modifies the one byte array + * and returns it. As such, if you intend to preserve the value in the array for later use, you + * must make a copy. + * * @return the iterable. */ public Iterable possibleVals() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolution.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolution.java index 19464d47b3..bd5d485fdf 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolution.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolution.java @@ -15,24 +15,27 @@ */ package ghidra.app.plugin.assembler.sleigh.sem; -import java.util.List; -import java.util.Map; +import java.util.*; import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong; +import ghidra.app.plugin.processors.sleigh.Constructor; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern; /** * The (often intermediate) result of assembly * - * These may represent a successful construction ({@link AssemblyResolvedConstructor}, a future - * field ({@link AssemblyResolvedBackfill}), or an error ({@link AssemblyResolvedError}). + *

+ * These may represent a successful construction ({@link AssemblyResolvedPatterns}, a future field + * ({@link AssemblyResolvedBackfill}), or an error ({@link AssemblyResolvedError}). * + *

* This class also provides the static factory methods for constructing any of its subclasses. */ public abstract class AssemblyResolution implements Comparable { protected final String description; - protected final List children; + protected final List children; + protected final AssemblyResolution right; private boolean hashed = false; private int hash; @@ -50,12 +53,15 @@ public abstract class AssemblyResolution implements Comparable children) { + AssemblyResolution(String description, List children, + AssemblyResolution right) { this.description = description; - this.children = children == null ? List.of() : children; + this.children = children == null ? List.of() : Collections.unmodifiableList(children); + this.right = right; } /* ******************************************************************************************** @@ -65,61 +71,69 @@ public abstract class AssemblyResolution implements Comparable + * NOTE: This is not used strictly for resolved SLEIGH constructors. It may also be used + * to store intermediates, e.g., encoded operands, during constructor resolution. + * * @param ins the instruction pattern block * @param ctx the context pattern block * @param description a description of the resolution - * @param sel the children selected to resolve this constructor, or null + * @param cons the constructor, or null + * @param children the children of this constructor, or null * @return the new resolution */ - public static AssemblyResolvedConstructor resolved(AssemblyPatternBlock ins, - AssemblyPatternBlock ctx, String description, - List sel) { - return new AssemblyResolvedConstructor(description, sel, ins, ctx, null, null); + public static AssemblyResolvedPatterns resolved(AssemblyPatternBlock ins, + AssemblyPatternBlock ctx, String description, Constructor cons, + List children, AssemblyResolution right) { + return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, null, + null); } /** * Build an instruction-only successful resolution result + * * @see #resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List) * @param ins the instruction pattern block * @param description a description of the resolution * @param children the children selected to resolve this constructor, or null * @return the new resolution */ - public static AssemblyResolvedConstructor instrOnly(AssemblyPatternBlock ins, - String description, List children) { - return resolved(ins, AssemblyPatternBlock.nop(), description, children); + public static AssemblyResolvedPatterns instrOnly(AssemblyPatternBlock ins, + String description) { + return resolved(ins, AssemblyPatternBlock.nop(), description, null, null, null); } /** * Build a context-only successful resolution result + * * @see #resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List) * @param ctx the context pattern block * @param description a description of the resolution * @param children the children selected to resolve this constructor, or null * @return the new resolution */ - public static AssemblyResolvedConstructor contextOnly(AssemblyPatternBlock ctx, - String description, List children) { - return resolved(AssemblyPatternBlock.nop(), ctx, description, children); + public static AssemblyResolvedPatterns contextOnly(AssemblyPatternBlock ctx, + String description) { + return resolved(AssemblyPatternBlock.nop(), ctx, description, null, null, null); } /** * Build a successful resolution result from a SLEIGH constructor's patterns + * * @param pat the constructor's pattern * @param description a description of the resolution * @return the new resolution */ - public static AssemblyResolvedConstructor fromPattern(DisjointPattern pat, int minLen, - String description) { + public static AssemblyResolvedPatterns fromPattern(DisjointPattern pat, int minLen, + String description, Constructor cons) { AssemblyPatternBlock ins = AssemblyPatternBlock.fromPattern(pat, minLen, false); AssemblyPatternBlock ctx = AssemblyPatternBlock.fromPattern(pat, 0, true); - return resolved(ins, ctx, description, null); + return resolved(ins, ctx, description, cons, null, null); } /** * Build a backfill record to attach to a successful resolution result + * * @param exp the expression depending on a missing symbol * @param goal the desired value of the expression * @param res the resolution result for child constructors @@ -128,41 +142,69 @@ public abstract class AssemblyResolution implements Comparable res, int inslen, String description) { - return new AssemblyResolvedBackfill(description, exp, goal, res, inslen, 0); + int inslen, String description) { + return new AssemblyResolvedBackfill(description, exp, goal, inslen, 0); } /** * Obtain a new "blank" resolved SLEIGH constructor record + * * @param description a description of the resolution - * @param sel any children that will be involved in populating this record + * @param children any children that will be involved in populating this record * @return the new resolution */ - public static AssemblyResolvedConstructor nop(String description, - List sel) { - return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, sel); + public static AssemblyResolvedPatterns nop(String description, + List children, AssemblyResolution right) { + return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, null, + children, right); + } + + /** + * Obtain a new "blank" resolved SLEIGH constructor record + * + * @param description a description of the resolution + * @param chilren any children that will be involved in populating this record + * @return the new resolution + */ + public static AssemblyResolvedPatterns nop(String description) { + return resolved(AssemblyPatternBlock.nop(), AssemblyPatternBlock.nop(), description, null, + null, null); } /** * Build an error resolution record + * * @param error a description of the error * @param description a description of what the resolver was doing when the error ocurred * @param children any children involved in generating the error * @return the new resolution */ public static AssemblyResolvedError error(String error, String description, - List children) { - return new AssemblyResolvedError(description, children, error); + List children, AssemblyResolution right) { + return new AssemblyResolvedError(description, children, right, error); + } + + /** + * Build an error resolution record + * + * @param error a description of the error + * @param description a description of what the resolver was doing when the error occurred + * @param children any children involved in generating the error + * @return the new resolution + */ + public static AssemblyResolvedError error(String error, String description) { + return new AssemblyResolvedError(description, null, null, error); } /** * Build an error resolution record, based on an intermediate SLEIGH constructor record + * * @param error a description of the error * @param res the constructor record that was being populated when the error ocurred * @return the new error resolution */ - public static AssemblyResolution error(String error, AssemblyResolvedConstructor res) { - return error(error, res.description, res.children); + public static AssemblyResolution error(String error, AssemblyResolvedPatterns res) { + return error(error, res.description, res.children, res.right); } /* ******************************************************************************************** @@ -171,18 +213,21 @@ public abstract class AssemblyResolution implements Comparable getAllRight() { + List result = new ArrayList<>(); + collectAllRight(result); + return result; + } + + protected void collectAllRight(Collection into) { + into.add(this); + if (right == null) { + return; + } + right.collectAllRight(into); + } + /** * Get the child portion of {@link #toString()} * + *

* If a subclass has another, possible additional, notion of children that it would like to * include in {@link #toString()}, it must override this method. + * * @see #hasChildren() * @param indent the current indentation * @return the indented description for each child on its own line @@ -210,6 +271,7 @@ public abstract class AssemblyResolution implements Comparable * If a subclass has another, possibly additional, notion of children that it would like to * include in {@link #toString()}, it must override this method to return true when such * children are present. + * * @see #childrenToString(String) * @return true if this record has children */ @@ -256,4 +320,36 @@ public abstract class AssemblyResolution implements Comparable + * This also shifts any backfill and forbidden pattern records. + * + * @param amt the number of bytes to shift. + * @return the result + */ + public abstract AssemblyResolution shift(int amt); + + /** + * Get this same resolution, but without any right siblings + * + * @return the resolution + */ + public AssemblyResolution withoutRight() { + return withRight(null); + } + + /** + * Get this same resolution, but with the given right sibling + * + * @return the resolution + */ + public abstract AssemblyResolution withRight(AssemblyResolution right); + + /** + * Get this same resolution, pushing its right siblings down to its children + */ + public abstract AssemblyResolution parent(String description, int opCount); } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolutionResults.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolutionResults.java index 9228b785d8..c366ffab84 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolutionResults.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolutionResults.java @@ -16,12 +16,17 @@ package ghidra.app.plugin.assembler.sleigh.sem; import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.commons.collections4.set.AbstractSetDecorator; +import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; + /** * A set of possible assembly resolutions for a single SLEIGH constructor * + *

* Since the assembler works from the leaves up, it unclear in what context a given token appears. * Thus, every possible encoding is collected and passed upward. As resolution continues, many of * the possible encodings are pruned out. When the resolver reaches the root, we end up with every @@ -29,6 +34,55 @@ import org.apache.commons.collections4.set.AbstractSetDecorator; * encodings, including error records describing the pruned intermediate results. */ public class AssemblyResolutionResults extends AbstractSetDecorator { + protected static final DbgTimer DBG = AssemblyTreeResolver.DBG; + + public interface Applicator { + Iterable getPatterns(AssemblyResolvedPatterns cur); + + default AssemblyResolvedPatterns setDescription( + AssemblyResolvedPatterns res, AssemblyResolution from) { + AssemblyResolvedPatterns temp = res.withDescription(from.description); + return temp; + } + + default AssemblyResolvedPatterns setRight(AssemblyResolvedPatterns res, + AssemblyResolvedPatterns cur) { + return res.withRight(cur); + } + + default AssemblyResolvedPatterns combineConstructor(AssemblyResolvedPatterns cur, + AssemblyResolvedPatterns pat) { + AssemblyResolvedPatterns combined = cur.combine(pat); + if (combined == null) { + return null; + } + return setRight(setDescription(combined, pat), cur); + } + + default AssemblyResolvedPatterns combineBackfill(AssemblyResolvedPatterns cur, + AssemblyResolvedBackfill bf) { + AssemblyResolvedPatterns combined = cur.combine(bf); + return setRight(setDescription(combined, bf), cur); + } + + default AssemblyResolvedPatterns combine(AssemblyResolvedPatterns cur, + AssemblyResolution pat) { + if (pat.isError()) { + throw new AssertionError(); + } + if (pat.isBackfill()) { + return combineBackfill(cur, (AssemblyResolvedBackfill) pat); + } + return combineConstructor(cur, (AssemblyResolvedPatterns) pat); + } + + String describeError(AssemblyResolvedPatterns rc, AssemblyResolution pat); + + default AssemblyResolution finish(AssemblyResolvedPatterns resolved) { + return resolved; + } + } + protected final Set resolutions; /** @@ -48,7 +102,7 @@ public class AssemblyResolutionResults extends AbstractSetDecorator function) { + return stream().map(res -> { + assert !(res instanceof AssemblyResolvedBackfill); + if (res.isError()) { + return res; + } + return function.apply((AssemblyResolvedPatterns) res); + }).collect(Collectors.toCollection(AssemblyResolutionResults::new)); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedBackfill.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedBackfill.java index c3dee97089..47fe59b02d 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedBackfill.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedBackfill.java @@ -23,16 +23,17 @@ import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; /** * A {@link AssemblyResolution} indicating the need to solve an expression in the future * - * Such records are collected within a {@link AssemblyResolvedConstructor} and then solved just - * before the final result(s) are assembled. This is typically required by instructions that refer - * to the {@code inst_next} symbol. + *

+ * Such records are collected within a {@link AssemblyResolvedPatterns} and then solved just before + * the final result(s) are assembled. This is typically required by instructions that refer to the + * {@code inst_next} symbol. * - * NOTE: These are used internally. The user ought never to see these from the assembly API. + *

+ * NOTE: These are used internally. The user ought never to see these from the assembly API. */ public class AssemblyResolvedBackfill extends AssemblyResolution { protected final PatternExpression exp; protected final MaskedLong goal; - protected final Map res; protected final int inslen; protected final int offset; @@ -52,31 +53,37 @@ public class AssemblyResolvedBackfill extends AssemblyResolution { /** * @see {@link AssemblyResolution#backfill(PatternExpression, MaskedLong, Map, int, String)} */ - AssemblyResolvedBackfill(String description, PatternExpression exp, MaskedLong goal, - Map res, int inslen, int offset) { - super(description, null); + AssemblyResolvedBackfill(String description, PatternExpression exp, MaskedLong goal, int inslen, + int offset) { + super(description, null, null); this.exp = exp; this.goal = goal; - this.res = res; this.inslen = inslen; this.offset = offset; } /** * Duplicate this record + * * @return the duplicate */ AssemblyResolvedBackfill copy() { AssemblyResolvedBackfill cp = - new AssemblyResolvedBackfill(description, exp, goal, res, inslen, offset); + new AssemblyResolvedBackfill(description, exp, goal, inslen, offset); return cp; } + @Override + public AssemblyResolvedBackfill withRight(AssemblyResolution right) { + throw new AssertionError(); + } + /** * Get the expected length of the instruction portion of the future encoding * * This is used to make sure that operands following a to-be-determined encoding are placed * properly. Even though the actual encoding cannot yet be determined, its length can. + * * @return the total expected length (including the offset) */ public int getInstructionLength() { @@ -99,13 +106,14 @@ public class AssemblyResolvedBackfill extends AssemblyResolution { description + ")"; } - /** - * Shift the back-fill record's "instruction" pattern to the right. - * @param amt the number of bytes to shift the result when solved. - * @return the result - */ + @Override public AssemblyResolvedBackfill shift(int amt) { - return new AssemblyResolvedBackfill(description, exp, goal, res, inslen, offset + amt); + return new AssemblyResolvedBackfill(description, exp, goal, inslen, offset + amt); + } + + @Override + public AssemblyResolution parent(String description, int opCount) { + throw new AssertionError(); } /** @@ -117,26 +125,27 @@ public class AssemblyResolvedBackfill extends AssemblyResolution { * {@link NeedsBackfillException}, since that would imply the missing symbol(s) from the * original attempt are still missing. Instead, the method returns an instance of * {@link AssemblyResolvedError}. + * * @param solver a solver, usually the same as the one from the original attempt. * @param vals the defined symbols, usually the same, but with the missing symbol(s). * @return the solution result */ public AssemblyResolution solve(RecursiveDescentSolver solver, Map vals, - AssemblyResolvedConstructor cur) { + AssemblyResolvedPatterns cur) { try { AssemblyResolution ar = - solver.solve(exp, goal, vals, res, cur.truncate(offset), description); + solver.solve(exp, goal, vals, cur.truncate(offset), description); if (ar.isError()) { return ar; } - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar; return rc.shift(offset); } catch (NeedsBackfillException e) { - return AssemblyResolution.error("Solution still requires backfill", description, null); + return AssemblyResolution.error("Solution still requires backfill", description); } catch (UnsupportedOperationException e) { - return AssemblyResolution.error("Unsupported: " + e.getMessage(), description, null); + return AssemblyResolution.error("Unsupported: " + e.getMessage(), description); } } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedError.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedError.java index 24517fc328..4bee9da6e4 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedError.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedError.java @@ -20,6 +20,7 @@ import java.util.List; /** * A {@link AssemblyResolution} indicating the occurrence of a (usually semantic) error * + *

* The description should indicate where the error occurred. The error message should explain the * actual error. To help the user diagnose the nature of the error, errors in sub-constructors * should be placed as children of an error given by the parent constructor. @@ -48,9 +49,9 @@ public class AssemblyResolvedError extends AssemblyResolution { * @see AssemblyResolution#error(String, String, List) */ AssemblyResolvedError(String description, List children, - String error) { - super(description, children); - AssemblyTreeResolver.dbg.println(error); + AssemblyResolution right, String error) { + super(description, children, right); + AssemblyTreeResolver.DBG.println(error); this.error = error; } @@ -66,6 +67,7 @@ public class AssemblyResolvedError extends AssemblyResolution { /** * Get a description of the error + * * @return the description */ public String getError() { @@ -76,4 +78,20 @@ public class AssemblyResolvedError extends AssemblyResolution { public String lineToString() { return error + " (" + description + ")"; } + + @Override + public AssemblyResolution shift(int amt) { + return this; + } + + @Override + public AssemblyResolution withRight(AssemblyResolution right) { + return new AssemblyResolvedError(description, null, right, error); + } + + @Override + public AssemblyResolution parent(String description, int opCount) { + List allRight = getAllRight(); + return new AssemblyResolvedError(description, allRight, null, error); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedConstructor.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedPatterns.java similarity index 61% rename from Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedConstructor.java rename to Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedPatterns.java index f7ca45bb80..83a09f81a2 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedConstructor.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyResolvedPatterns.java @@ -17,8 +17,8 @@ package ghidra.app.plugin.assembler.sleigh.sem; import java.util.*; import java.util.concurrent.atomic.AtomicLong; -import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.apache.commons.collections4.IteratorUtils; import org.apache.commons.collections4.Predicate; @@ -27,17 +27,20 @@ import org.apache.commons.lang3.StringUtils; import ghidra.app.plugin.assembler.AssemblySelector; import ghidra.app.plugin.assembler.sleigh.expr.MaskedLong; import ghidra.app.plugin.assembler.sleigh.expr.RecursiveDescentSolver; -import ghidra.app.plugin.processors.sleigh.ConstructState; -import ghidra.app.plugin.processors.sleigh.ContextOp; +import ghidra.app.plugin.processors.sleigh.*; +import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; +import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol; /** * A {@link AssemblyResolution} indicating successful application of a constructor * + *

* This is almost analogous to {@link ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern - * DisjointPattern}, in that is joins an instruction {@link AssemblyPatternBlock} with a corresponding - * context {@link AssemblyPatternBlock}. However, this object is mutable, and it collects backfill records, - * as well as forbidden patterns. + * DisjointPattern}, in that is joins an instruction {@link AssemblyPatternBlock} with a + * corresponding context {@link AssemblyPatternBlock}. However, this object is mutable, and it + * collects backfill records, as well as forbidden patterns. * + *

* When the applied constructor is from the "instruction" subtable, this represents a fully- * constructed instruction with required context. All backfill records ought to be resolved and * applied before the final result is given to the user, i.e., passed into the @@ -45,16 +48,17 @@ import ghidra.app.plugin.processors.sleigh.ContextOp; * becomes confined to one of the forbidden patterns, it must be dropped, since the encoding will * actually invoke a more specific SLEIGH constructor. */ -public class AssemblyResolvedConstructor extends AssemblyResolution { +public class AssemblyResolvedPatterns extends AssemblyResolution { protected static final String INS = "ins:"; protected static final String CTX = "ctx:"; protected static final String SEP = ","; + protected final Constructor cons; protected final AssemblyPatternBlock ins; protected final AssemblyPatternBlock ctx; protected final Set backfills; - protected final Set forbids; + protected final Set forbids; @Override protected int computeHash() { @@ -71,10 +75,10 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { @Override public boolean equals(Object obj) { - if (!(obj instanceof AssemblyResolvedConstructor)) { + if (!(obj instanceof AssemblyResolvedPatterns)) { return false; } - AssemblyResolvedConstructor that = (AssemblyResolvedConstructor) obj; + AssemblyResolvedPatterns that = (AssemblyResolvedPatterns) obj; if (!this.ins.equals(that.ins)) { return false; } @@ -93,11 +97,12 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * @see AssemblyResolution#resolved(AssemblyPatternBlock, AssemblyPatternBlock, String, List) */ - AssemblyResolvedConstructor(String description, - List children, AssemblyPatternBlock ins, - AssemblyPatternBlock ctx, Set backfills, - Set forbids) { - super(description, children); + AssemblyResolvedPatterns(String description, Constructor cons, + List children, AssemblyResolution right, + AssemblyPatternBlock ins, AssemblyPatternBlock ctx, + Set backfills, Set forbids) { + super(description, children, right); + this.cons = cons; this.ins = ins; this.ctx = ctx; this.backfills = backfills == null ? Set.of() : backfills; @@ -107,15 +112,18 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Build a new successful SLEIGH constructor resolution from a string representation * + *

* This was used primarily in testing, to specify expected results. + * * @param str the string representation: "{@code ins:[pattern],ctx:[pattern]}" - * @see ghidra.util.NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String) - * NumericUtilities.convertHexStringToMaskedValue(AtomicLong, AtomicLong, String, int, int, String) + * @see ghidra.util.NumericUtilities#convertHexStringToMaskedValue(AtomicLong, AtomicLong, + * String, int, int, String) NumericUtilities.convertHexStringToMaskedValue(AtomicLong, + * AtomicLong, String, int, int, String) * @param description a description of the resolution * @param children any children involved in the resolution * @return the decoded resolution */ - public static AssemblyResolvedConstructor fromString(String str, String description, + public static AssemblyResolvedPatterns fromString(String str, String description, List children) { AssemblyPatternBlock ins = null; if (str.startsWith(INS)) { @@ -141,17 +149,11 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { return AssemblyResolution.resolved(// ins == null ? AssemblyPatternBlock.nop() : ins,// ctx == null ? AssemblyPatternBlock.nop() : ctx,// - description, children); + description, null, children, null); } - /** - * Shift the resolved instruction pattern to the right - * - * This also shifts any backfill and forbidden pattern records. - * @param amt the number of bytes to shift. - * @return the result - */ - public AssemblyResolvedConstructor shift(int amt) { + @Override + public AssemblyResolvedPatterns shift(int amt) { if (amt == 0) { return this; } @@ -163,43 +165,47 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { newBackfills.add(bf.shift(amt)); } - Set newForbids = new HashSet<>(); - for (AssemblyResolvedConstructor f : this.forbids) { + Set newForbids = new HashSet<>(); + for (AssemblyResolvedPatterns f : this.forbids) { newForbids.add(f.shift(amt)); } - return new AssemblyResolvedConstructor(description, children, newIns, ctx, + return new AssemblyResolvedPatterns(description, cons, children, right, newIns, ctx, Collections.unmodifiableSet(newBackfills), Collections.unmodifiableSet(newForbids)); } /** * Truncate (unshift) the resolved instruction pattern from the left * - * NOTE: This drops all backfill and forbidden pattern records, since this method is typically - * used to read token fields rather than passed around for resolution. + * NOTE: This drops all backfill and forbidden pattern records, since this method is + * typically used to read token fields rather than passed around for resolution. + * * @param amt the number of bytes to remove from the left * @return the result */ - public AssemblyResolvedConstructor truncate(int amt) { + public AssemblyResolvedPatterns truncate(int amt) { if (amt == 0) { return this; } AssemblyPatternBlock newIns = this.ins.truncate(amt); - return new AssemblyResolvedConstructor("Truncated: " + description, null, newIns, ctx, null, - null); + return new AssemblyResolvedPatterns("Truncated: " + description, cons, null, right, + newIns, ctx, + null, null); } /** * Check if the current encoding is forbidden by one of the attached patterns * - * The pattern become forbidden if this encoding's known bits are an overset of any forbidden + *

+ * The pattern becomes forbidden if this encoding's known bits are an overset of any forbidden * pattern's known bits. + * * @return false if the pattern is forbidden (and thus in error), true if permitted */ public AssemblyResolution checkNotForbidden() { - Set newForbids = new HashSet<>(); - for (AssemblyResolvedConstructor f : this.forbids) { - AssemblyResolvedConstructor check = this.combine(f); + Set newForbids = new HashSet<>(); + for (AssemblyResolvedPatterns f : this.forbids) { + AssemblyResolvedPatterns check = this.combine(f); if (null == check) { continue; } @@ -209,46 +215,51 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { return AssemblyResolution.error("The result is forbidden by " + f, this); } } - return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills, - Collections.unmodifiableSet(newForbids)); + return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, + backfills, Collections.unmodifiableSet(newForbids)); } /** * Check if this and another resolution have equal encodings * - * This is like {@link #equals(Object)}, but it ignores backfills records and forbidden - * patterns. + *

+ * This is like {@link #equals(Object)}, but it ignores backfill records and forbidden patterns. + * * @param that the other resolution * @return true if both have equal encodings */ - protected boolean bitsEqual(AssemblyResolvedConstructor that) { + protected boolean bitsEqual(AssemblyResolvedPatterns that) { return this.ins.equals(that.ins) && this.ctx.equals(that.ctx); } /** * Combine the encodings and backfills of the given resolution into this one * - * This combines corresponding pattern blocks (assuming they agree), collects backfill - * records, and collects forbidden patterns. + *

+ * This combines corresponding pattern blocks (assuming they agree), collects backfill records, + * and collects forbidden patterns. + * * @param that the other resolution * @return the result if successful, or null */ - public AssemblyResolvedConstructor combine(AssemblyResolvedConstructor that) { + public AssemblyResolvedPatterns combine(AssemblyResolvedPatterns that) { // Not really a backfill, but I would like to re-use code return combineLessBackfill(that, null); } /** * Combine a backfill result - * @param that the result from backfilling - * @param bf the resolved backfilled record - * @return the result if successful, or null * + *

* When a backfill is successful, the result should be combined with the owning resolution. In * addition, for bookkeeping's sake, the resolved record should be removed from the list of * backfills. + * + * @param that the result from backfilling + * @param bf the resolved backfilled record + * @return the result if successful, or null */ - protected AssemblyResolvedConstructor combineLessBackfill(AssemblyResolvedConstructor that, + protected AssemblyResolvedPatterns combineLessBackfill(AssemblyResolvedPatterns that, AssemblyResolvedBackfill bf) { AssemblyPatternBlock newIns = this.ins.combine(that.ins); if (newIns == null) { @@ -263,68 +274,87 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { if (bf != null) { newBackfills.remove(bf); } - Set newForbids = new HashSet<>(this.forbids); + Set newForbids = new HashSet<>(this.forbids); newForbids.addAll(that.forbids); - return new AssemblyResolvedConstructor(description, children, newIns, newCtx, + return new AssemblyResolvedPatterns(description, cons, children, right, newIns, newCtx, Collections.unmodifiableSet(newBackfills), Collections.unmodifiableSet(newForbids)); } /** * Combine the given backfill record into this resolution + * * @param bf the backfill record * @return the result */ - public AssemblyResolvedConstructor combine(AssemblyResolvedBackfill bf) { + public AssemblyResolvedPatterns combine(AssemblyResolvedBackfill bf) { Set newBackfills = new HashSet<>(this.backfills); newBackfills.add(bf); - return new AssemblyResolvedConstructor(description, children, ins, ctx, + return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, Collections.unmodifiableSet(newBackfills), forbids); } /** * Create a new resolution from this one with the given forbidden patterns recorded + * * @param more the additional forbidden patterns to record * @return the new resolution */ - public AssemblyResolvedConstructor withForbids(Set more) { - Set combForbids = new HashSet<>(this.forbids); + public AssemblyResolvedPatterns withForbids(Set more) { + Set combForbids = new HashSet<>(this.forbids); combForbids.addAll(more); - return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills, - Collections.unmodifiableSet(more)); + return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, + backfills, Collections.unmodifiableSet(more)); } /** * Create a copy of this resolution with a new description + * * @param desc the new description * @return the copy */ - public AssemblyResolvedConstructor withDescription(String desc) { - return new AssemblyResolvedConstructor(desc, children, ins, ctx, backfills, forbids); + public AssemblyResolvedPatterns withDescription(String desc) { + return new AssemblyResolvedPatterns(desc, cons, children, right, ins, ctx, backfills, + forbids); + } + + /** + * Create a copy of this resolution with a replaced constructor + * + * @param cons the new constructor + * @return the copy + */ + public AssemblyResolvedPatterns withConstructor(Constructor cons) { + return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, + backfills, + forbids); } /** * Encode the given value into the context block as specified by an operation + * * @param cop the context operation specifying the location of the value to encode * @param val the masked value to encode * @return the result * - * This is the forward (as in disassembly) direction of applying context operations. The - * pattern expression is evaluated, and the result is written as specified. + * This is the forward (as in disassembly) direction of applying context operations. The + * pattern expression is evaluated, and the result is written as specified. */ - public AssemblyResolvedConstructor writeContextOp(ContextOp cop, MaskedLong val) { + public AssemblyResolvedPatterns writeContextOp(ContextOp cop, MaskedLong val) { AssemblyPatternBlock newCtx = this.ctx.writeContextOp(cop, val); - return new AssemblyResolvedConstructor(description, children, ins, newCtx, backfills, - forbids); + return new AssemblyResolvedPatterns(description, cons, children, right, ins, newCtx, + backfills, forbids); } /** * Decode the value from the context located where the given context operation would write * - * This is used to read the value from the left-hand-side "variable" of a context operation. - * It seems backward, because it is. When assembling, the right-hand-side expression of a - * context operation must be solved. This means the "variable" is known from the context(s) of - * the resolved children constructors. The value read is then used as the goal in solving the + *

+ * This is used to read the value from the left-hand-side "variable" of a context operation. It + * seems backward, because it is. When assembling, the right-hand-side expression of a context + * operation must be solved. This means the "variable" is known from the context(s) of the + * resolved children constructors. The value read is then used as the goal in solving the * expression. + * * @param cop the context operation whose "variable" to read. * @return the masked result. */ @@ -334,36 +364,60 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Duplicate this resolution, with additional description text appended + * * @param append the text to append - * @return the duplicate - * NOTE: An additional separator {@code ": "} is inserted + * @return the duplicate NOTE: An additional separator {@code ": "} is inserted */ - public AssemblyResolvedConstructor copyAppendDescription(String append) { - AssemblyResolvedConstructor cp = new AssemblyResolvedConstructor( - description + ": " + append, children, ins.copy(), ctx.copy(), backfills, forbids); + public AssemblyResolvedPatterns copyAppendDescription(String append) { + AssemblyResolvedPatterns cp = new AssemblyResolvedPatterns( + description + ": " + append, cons, children, right, ins.copy(), ctx.copy(), backfills, + forbids); + return cp; + } + + @Override + public AssemblyResolvedPatterns withRight(AssemblyResolution right) { + AssemblyResolvedPatterns cp = new AssemblyResolvedPatterns(description, cons, + children, right, ins.copy(), ctx.copy(), backfills, forbids); + return cp; + } + + public AssemblyResolvedPatterns nopLeftSibling() { + return new AssemblyResolvedPatterns("nop-left", null, null, this, ins.copy(), + ctx.copy(), backfills, forbids); + } + + @Override + public AssemblyResolvedPatterns parent(String description, int opCount) { + List allRight = getAllRight(); + AssemblyResolvedPatterns cp = new AssemblyResolvedPatterns(description, cons, + allRight.subList(0, opCount), allRight.get(opCount), ins, ctx, backfills, forbids); return cp; } /** * Set all bits read by a given context operation to unknown + * * @param cop the context operation * @return the result * @see AssemblyPatternBlock#maskOut(ContextOp) */ - public AssemblyResolvedConstructor maskOut(ContextOp cop) { + public AssemblyResolvedPatterns maskOut(ContextOp cop) { AssemblyPatternBlock newCtx = this.ctx.maskOut(cop); - return new AssemblyResolvedConstructor(description, children, ins, newCtx, backfills, - forbids); + return new AssemblyResolvedPatterns(description, cons, children, right, ins, newCtx, + backfills, forbids); } /** * Apply as many backfill records as possible * + *

* Each backfill record is resolved in turn, if the record cannot be resolved, it remains * listed. If the record can be resolved, but it conflicts, an error record is returned. Each * time a record is resolved and combined successfully, all remaining records are tried again. * The result is the combined resolved backfills, with only the unresolved backfill records * listed. + * * @param solver the solver, usually the same as the original attempt to solve. * @param vals the values. * @return the result, or an error. @@ -373,15 +427,15 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { return this; } - AssemblyResolvedConstructor res = this; + AssemblyResolvedPatterns res = this; loop: while (true) { for (AssemblyResolvedBackfill bf : res.backfills) { AssemblyResolution ar = bf.solve(solver, vals, this); if (ar.isError()) { continue; } - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; - AssemblyResolvedConstructor check = res.combineLessBackfill(rc, bf); + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar; + AssemblyResolvedPatterns check = res.combineLessBackfill(rc, bf); if (check == null) { return AssemblyResolution.error("Conflict: Backfill " + bf.description, res); } @@ -399,6 +453,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Check if this resolution has pending backfills to apply + * * @return true if there are backfills */ public boolean hasBackfills() { @@ -407,6 +462,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Check if this resolution includes forbidden patterns + * * @return true if there are forbidden patterns */ private boolean hasForbids() { @@ -416,43 +472,48 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Solve and apply context changes in reverse to forbidden patterns * - * To avoid circumstances where a context change during disassembly would invoke a more - * specific subconstructor than was used to assembly the instruction, we must solve the - * forbidden patterns in tandem with the overall resolution. If the context of any forbidden - * pattern cannot be solved, we simply drop the forbidden pattern -- the lack of a solution - * implies there is no way the context change could produce the forbidden pattern. + *

+ * To avoid circumstances where a context change during disassembly would invoke a more specific + * sub-constructor than was used to assembly the instruction, we must solve the forbidden + * patterns in tandem with the overall resolution. If the context of any forbidden pattern + * cannot be solved, we simply drop the forbidden pattern -- the lack of a solution implies + * there is no way the context change could produce the forbidden pattern. + * * @param sem the constructor whose context changes to solve * @param vals any defined symbols * @param opvals the operand values * @return the result - * @see AssemblyConstructorSemantic#solveContextChanges(AssemblyResolvedConstructor, Map, Map) + * @see AssemblyConstructorSemantic#solveContextChanges(AssemblyResolvedPatterns, Map, Map) */ - public AssemblyResolvedConstructor solveContextChangesForForbids( - AssemblyConstructorSemantic sem, Map vals, Map opvals) { + public AssemblyResolvedPatterns solveContextChangesForForbids( + AssemblyConstructorSemantic sem, Map vals) { if (!hasForbids()) { return this; } - Set newForbids = new HashSet<>(); - for (AssemblyResolvedConstructor f : this.forbids) { - AssemblyResolution t = sem.solveContextChanges(f, vals, opvals); - if (!(t instanceof AssemblyResolvedConstructor)) { + Set newForbids = new HashSet<>(); + for (AssemblyResolvedPatterns f : this.forbids) { + AssemblyResolution t = sem.solveContextChanges(f, vals); + if (!(t instanceof AssemblyResolvedPatterns)) { // Can't be solved, so it can be dropped continue; } - newForbids.add((AssemblyResolvedConstructor) t); + newForbids.add((AssemblyResolvedPatterns) t); } - return new AssemblyResolvedConstructor(description, children, ins, ctx, backfills, - Collections.unmodifiableSet(newForbids)); + return new AssemblyResolvedPatterns(description, cons, children, right, ins, ctx, + backfills, Collections.unmodifiableSet(newForbids)); } /** * Get the length of the instruction encoding * + *

* This is used to ensure each operand is encoded at the correct offset - * @return the length of the instruction block * - * NOTE: this DOES include the offset - * NOTE: this DOES include pending backfills + *

+ * NOTE: this DOES include the offset
+ * NOTE: this DOES include pending backfills + * + * @return the length of the instruction block */ public int getInstructionLength() { int inslen = ins.length(); @@ -464,10 +525,12 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Get the length of the instruction encoding, excluding trailing undefined bytes - * @return the length of the defined bytes in the instruction block * - * NOTE: this DOES include the offset - * NOTE: this DOES NOT include pending backfills + *

+ * NOTE: this DOES include the offset
+ * NOTE: this DOES NOT include pending backfills + * + * @return the length of the defined bytes in the instruction block */ public int getDefinedInstructionLength() { byte[] imsk = ins.getMask(); @@ -482,6 +545,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Get the instruction block + * * @return the instruction block */ public AssemblyPatternBlock getInstruction() { @@ -490,6 +554,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Get the context block + * * @return the context block */ public AssemblyPatternBlock getContext() { @@ -498,6 +563,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Decode a portion of the instruction block + * * @param start the first byte to decode * @param len the number of bytes to decode * @return the read masked value @@ -509,6 +575,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Decode a portion of the context block + * * @param start the first byte to decode * @param len the number of bytes to decode * @return the read masked value @@ -543,7 +610,7 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { sb.append(indent); sb.append("backfill: " + bf + "\n"); } - for (AssemblyResolvedConstructor f : forbids) { + for (AssemblyResolvedPatterns f : forbids) { sb.append(indent); sb.append("forbidden: " + f + "\n"); } @@ -556,20 +623,18 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { * Used for testing and diagnostics: list the constructor line numbers used to resolve this * encoding * + *

* This includes braces to describe the tree structure + * * @see ConstructState#dumpConstructorTree() * @return the constructor tree */ public String dumpConstructorTree() { StringBuilder sb = new StringBuilder(); - // TODO: HACK, but diagnostic - Matcher mat = pat.matcher(description); - if (mat.find()) { - sb.append(mat.group(1)); - } - else { + if (cons == null) { return null; } + sb.append(cons.getSourceFile() + ":" + cons.getLineno()); if (children == null) { return sb.toString(); @@ -577,8 +642,8 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { List subs = new ArrayList<>(); for (AssemblyResolution c : children) { - if (c instanceof AssemblyResolvedConstructor) { - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) c; + if (c instanceof AssemblyResolvedPatterns) { + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) c; String s = rc.dumpConstructorTree(); if (s != null) { subs.add(s); @@ -598,7 +663,9 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Count the number of bits specified in the resolution patterns * + *

* Totals the specificity of the instruction and context pattern blocks. + * * @return the number of bits in the resulting patterns * @see AssemblyPatternBlock#getSpecificity() */ @@ -609,33 +676,34 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { /** * Get an iterable over all the possible fillings of the instruction pattern given a context * + *

* This is meant to be used idiomatically, as in an enhanced for loop: * *

-	 * {@code
 	 * for (byte[] ins : rcon.possibleInsVals(ctx)) {
-	 *     System.out.println(format(ins));
-	 * }
+	 * 	System.out.println(format(ins));
 	 * }
 	 * 
* + *

* This is similar to calling * {@link #getInstruction()}.{@link AssemblyPatternBlock#possibleVals()}, but with * forbidden patterns removed. A context is required so that only those forbidden patterns * matching the given context are actually removed. This method should always be preferred to * the sequence mentioned above, since {@link AssemblyPatternBlock#possibleVals()} on its own - * may yield bytes that do not produce the desired instruction. + * may yield bytes that do not produce the desired instruction. * - * NOTE: The implementation is based on {@link AssemblyPatternBlock#possibleVals()}, so be - * aware that a single array is reused for each iterate. You should not retain a pointer to the - * array, but rather make a copy. + *

+ * NOTE: The implementation is based on {@link AssemblyPatternBlock#possibleVals()}, so + * be aware that a single array is reused for each iterate. You should not retain a pointer to + * the array, but rather make a copy. * * @param forCtx the context at the assembly address * @return the iterable */ public Iterable possibleInsVals(AssemblyPatternBlock forCtx) { Predicate removeForbidden = (byte[] val) -> { - for (AssemblyResolvedConstructor f : forbids) { + for (AssemblyResolvedPatterns f : forbids) { // If the forbidden length is larger than us, we can ignore it if (f.getDefinedInstructionLength() > val.length) { continue; @@ -663,4 +731,73 @@ public class AssemblyResolvedConstructor extends AssemblyResolution { } }; } + + protected static int getOpIndex(String piece) { + if (piece.charAt(0) != '\n') { + return -1; + } + return piece.charAt(1) - 'A'; + } + + /** + * If the construct state is a {@code ^instruction} or other purely-recursive constructor, get + * its single child. + * + * @param state the parent state + * @return the child state if recursive, or null + */ + protected static ConstructState getPureRecursion(ConstructState state) { + // NB. There can be other operands, but only one can be printed + // Furthermore, nothing else can be printed, whether an operand or not + List pieces = state.getConstructor().getPrintPieces(); + if (pieces.size() != 1) { + return null; + } + int opIdx = getOpIndex(pieces.get(0)); + if (opIdx < 0) { + return null; + } + ConstructState sub = state.getSubState(opIdx); + if (sub == null || sub.getConstructor() == null || + sub.getConstructor().getParent() != state.getConstructor().getParent()) { + // not recursive + return null; + } + return sub; + } + + public boolean equivalentConstructState(ConstructState state) { + ConstructState rec = getPureRecursion(state); + if (rec != null) { + if (state.getConstructor() == cons) { + assert children.size() == 1; + AssemblyResolvedPatterns recRes = (AssemblyResolvedPatterns) children.get(0); + return recRes.equivalentConstructState(rec); + } + return equivalentConstructState(rec); + } + if (state.getConstructor() != cons) { + return false; + } + int opCount = cons.getNumOperands(); + for (int opIdx = 0; opIdx < opCount; opIdx++) { + OperandSymbol opSym = cons.getOperand(opIdx); + Set printed = + Arrays.stream(cons.getOpsPrintOrder()).boxed().collect(Collectors.toSet()); + if (!(opSym.getDefiningSymbol() instanceof SubtableSymbol)) { + AssemblyTreeResolver.DBG.println("Operand " + opSym + " is not a sub-table"); + continue; + } + if (!printed.contains(opIdx)) { + AssemblyTreeResolver.DBG.println("Operand " + opSym + " is hidden"); + continue; + } + AssemblyResolvedPatterns child = (AssemblyResolvedPatterns) children.get(opIdx); + ConstructState subState = state.getSubState(opIdx); + if (!child.equivalentConstructState(subState)) { + return false; + } + } + return true; + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyTreeResolver.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyTreeResolver.java index 7498971747..6e7e11afd8 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyTreeResolver.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyTreeResolver.java @@ -16,27 +16,32 @@ package ghidra.app.plugin.assembler.sleigh.sem; import java.util.*; - -import org.apache.commons.collections4.IteratorUtils; - -import com.google.common.collect.Sets; +import java.util.stream.Collectors; +import java.util.stream.Stream; import ghidra.app.plugin.assembler.sleigh.SleighAssemblerBuilder; import ghidra.app.plugin.assembler.sleigh.expr.*; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction; -import ghidra.app.plugin.assembler.sleigh.symbol.*; +import ghidra.app.plugin.assembler.sleigh.sem.AbstractAssemblyStateGenerator.GeneratorContext; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolutionResults.Applicator; +import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal; import ghidra.app.plugin.assembler.sleigh.tree.*; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer; import ghidra.app.plugin.assembler.sleigh.util.DbgTimer.DbgCtx; -import ghidra.app.plugin.processors.sleigh.Constructor; -import ghidra.app.plugin.processors.sleigh.SleighLanguage; +import ghidra.app.plugin.processors.sleigh.*; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; -import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; +import ghidra.app.plugin.processors.sleigh.symbol.*; +import ghidra.program.model.address.Address; +import ghidra.program.model.lang.InsufficientBytesException; +import ghidra.program.model.lang.UnknownInstructionException; +import ghidra.program.model.mem.ByteMemBufferImpl; +import ghidra.program.model.mem.MemBuffer; /** * The workhorse of semantic resolution for the assembler * + *

* This class takes a parse tree and some additional information (start address, context, etc.) and * attempts to determine possible encodings using the semantics associated with each branch of the * given parse tree. Details of this process are described in {@link SleighAssemblerBuilder}. @@ -44,34 +49,34 @@ import ghidra.app.plugin.processors.sleigh.symbol.OperandSymbol; * @see SleighAssemblerBuilder */ public class AssemblyTreeResolver { - protected static final RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver(); - protected static final DbgTimer dbg = DbgTimer.INACTIVE; + protected static final RecursiveDescentSolver SOLVER = RecursiveDescentSolver.getSolver(); + protected static final DbgTimer DBG = DbgTimer.INACTIVE; + + public static final String INST_START = "inst_start"; + public static final String INST_NEXT = "inst_next"; protected final SleighLanguage lang; - protected final long instStart; + protected final Address at; protected final Map vals = new HashMap<>(); protected final AssemblyParseBranch tree; protected final AssemblyGrammar grammar; protected final AssemblyPatternBlock context; protected final AssemblyContextGraph ctxGraph; - public static final String INST_START = "inst_start"; - public static final String INST_NEXT = "inst_next"; - /** * Construct a resolver for the given parse tree * * @param lang - * @param instStart the byte offset where the instruction will start + * @param at the address where the instruction will start * @param tree the parse tree * @param context the context expected at {@code instStart} * @param ctxGraph the context transition graph used to resolve purely-recursive productions */ - public AssemblyTreeResolver(SleighLanguage lang, long instStart, AssemblyParseBranch tree, + public AssemblyTreeResolver(SleighLanguage lang, Address at, AssemblyParseBranch tree, AssemblyPatternBlock context, AssemblyContextGraph ctxGraph) { this.lang = lang; - this.instStart = instStart; - this.vals.put(INST_START, lang.getDefaultSpace().getAddressableWordOffset(instStart)); + this.at = at; + this.vals.put(INST_START, at.getAddressableWordOffset()); this.tree = tree; this.grammar = tree.getGrammar(); this.context = context.fillMask(); @@ -84,77 +89,324 @@ public class AssemblyTreeResolver { * @return a set of resolutions (encodings and errors) */ public AssemblyResolutionResults resolve() { - AssemblyResolutionResults results = resolveBranch(tree); - AssemblyResolutionResults ret = new AssemblyResolutionResults(); - for (AssemblyResolution ar : results) { - assert !(ar instanceof AssemblyResolvedBackfill); - if (ar.isError()) { - ret.add(ar); - continue; - } - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; - vals.put(INST_NEXT, lang.getDefaultSpace().getAddressableWordOffset( - instStart + rc.getInstructionLength())); - if (rc.hasBackfills()) { - dbg.println("Backfilling: " + rc); - } - ar = rc.backfill(solver, vals); - dbg.println("Backfilled final: " + ar); - if (ar.isError()) { - ret.add(ar); - continue; - } - rc = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns empty = AssemblyResolution.nop("Empty"); + AssemblyConstructStateGenerator rootGen = + new AssemblyConstructStateGenerator(this, tree, empty); - if (rc.hasBackfills()) { - ret.add(AssemblyResolution.error("Solution is incomplete", "failed backfill", - List.of(rc))); - continue; - } - AssemblyResolvedConstructor ctx = - AssemblyResolution.contextOnly(context, "Selecting context", null); - AssemblyResolvedConstructor check = rc.combine(ctx); - if (null == check) { - ret.add(AssemblyResolution.error("Incompatible context", "resolving", - List.of(rc))); - continue; - } - rc = check; + Collection errors = new ArrayList<>(); + Stream protStream = + rootGen.generate(new GeneratorContext(List.of(), 0)); - AssemblyResolution fcheck = rc.checkNotForbidden(); - if (fcheck.isError()) { - ret.add(fcheck); - continue; + if (DBG == DbgTimer.ACTIVE) { + try (DbgCtx dc = DBG.start("Prototypes:")) { + protStream = protStream.map(prot -> { + DBG.println(prot); + return prot; + }).collect(Collectors.toList()).stream(); } - rc = (AssemblyResolvedConstructor) fcheck; - - ret.add(rc); } - return ret; + + Stream patStream = + protStream.map(p -> p.state).distinct().flatMap(s -> s.resolve(empty, errors)); + + AssemblyResolutionResults results = new AssemblyResolutionResults(); + patStream.forEach(results::add); + + results = resolveRootRecursion(results); + results = resolvePendingBackfills(results); + results = selectContext(results); + // TODO: Remove this? It's subsumed by filterByDisassembly, and more accurately.... + results = filterForbidden(results); + results = filterByDisassembly(results); + results.addAll(errors); + return results; } /** - * Resolve a branch of the parse tree + * If applicable, get the {@code I => I} production of the grammar * - * @param branch the branch - * @return the intermediate results + * @return the production */ - protected AssemblyResolutionResults resolveBranch(AssemblyParseBranch branch) { - AssemblyProduction prod = branch.getProduction(); - AssemblyNonTerminal lhs = prod.getLHS(); - AssemblyProduction rec = grammar.getPureRecursion(lhs); - // Currently, the assembler only allows recursion at the root. - // Otherwise, the input context cannot be known. - if (rec != null && branch.getParent() == null) { - return resolveBranchRecursive(branch, rec); + protected AssemblyProduction getRootRecursion() { + assert tree.getParent() == null; + AssemblyProduction rootProd = tree.getProduction(); + AssemblyNonTerminal start = rootProd.getLHS(); + AssemblyProduction rec = grammar.getPureRecursion(start); + return rec; + } + + /** + * If necessary, resolve recursive constructors at the root, usually for prefixes + * + *

+ * If there are no pure recursive constructors at the root, then this simply returns + * {@code temp} unmodified. + * + * @param temp the resolved root results + * @return the results with pure recursive constructors applied to obtain a compatible context + */ + // Ugh, public so I can refer to it in javadocs... + public AssemblyResolutionResults resolveRootRecursion(AssemblyResolutionResults temp) { + AssemblyProduction rootRec = getRootRecursion(); + if (rootRec == null) { + return temp; } - return resolveBranchNonRecursive(branch); + try (DbgCtx dc = DBG.start("Resolving root recursion:")) { + AssemblyResolutionResults result = new AssemblyResolutionResults(); + + for (AssemblyResolution ar : temp) { + if (ar.isError()) { + result.add(ar); + continue; + } + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar; + AssemblyPatternBlock dst = rc.getContext(); + // TODO: The desired context may need to be passed in. For now, just take start. + AssemblyPatternBlock src = context; // NOTE: This is only correct for "instruction" + String table = "instruction"; + + DBG.println("Finding paths from " + src + " to " + ar.lineToString()); + Collection> paths = + ctxGraph.computeOptimalApplications(src, table, dst, table); + DBG.println("Found " + paths.size()); + for (Deque path : paths) { + DBG.println(" " + path); + result.absorb(applyRecursionPath(path, tree, rootRec, rc)); + } + } + + return result; + } + } + + /** + * Attempt a second time to solve operands and context changes + * + *

+ * Backfills that depended on {@code inst_next} should now easily be solved, since the + * instruction length is now known. + * + * @param temp the resolved results, with backfill pending + * @return the results without backfill, possible with new errors + */ + protected AssemblyResolutionResults resolvePendingBackfills(AssemblyResolutionResults temp) { + return temp.apply(rc -> { + if (!rc.hasBackfills()) { + return rc; + } + vals.put(INST_NEXT, at.add(rc.getInstructionLength()).getAddressableWordOffset()); + DBG.println("Backfilling: " + rc); + AssemblyResolution ar = rc.backfill(SOLVER, vals); + DBG.println("Backfilled final: " + ar); + return ar; + }).apply(rc -> { + if (rc.hasBackfills()) { + return AssemblyResolution.error("Solution is incomplete", "failed backfill", + List.of(rc), null); + } + return rc; + }); + } + + /** + * Filter out results whose context do not match that requested + * + * @param temp the results whose contexts have not yet been checked + * @return the results that pass. Those that do not are replaced with errors. + */ + protected AssemblyResolutionResults selectContext(AssemblyResolutionResults temp) { + AssemblyResolvedPatterns ctx = + AssemblyResolution.contextOnly(context, "Selecting context"); + return temp.apply(rc -> { + AssemblyResolvedPatterns check = rc.combine(ctx); + if (null == check) { + return AssemblyResolution.error("Incompatible context", "resolving", List.of(rc), + null); + } + return check; + }); + } + + /** + * Filter out results that would certainly be disassembled differently than assembled + * + *

+ * Because of constructor precedence rules, it is possible to assemble a pattern from a + * prototype that would not result in equivalent disassembly. This can be detected in some cases + * via the "forbids" mechanism, where more specific constructors are recorded with the result. + * If the generated pattern matches on of those more-specific constructors, it is forbidden. + * + * @param temp the results whose forbids have not yet been checked + * @return the results that pass. Those that do not are replaced with errors. + */ + protected AssemblyResolutionResults filterForbidden(AssemblyResolutionResults temp) { + return temp.apply(rc -> rc.checkNotForbidden()); + } + + /** + * Filter out results that get disassembled differently than assembled + * + *

+ * The forbids mechanism is not perfect, so as a final fail safe, we disassemble the result and + * compare the prototypes. + * + * @param temp the results whose disassemblies have not yet been checked + * @return the results that pass. Those that do not are replaced with errors. + */ + protected AssemblyResolutionResults filterByDisassembly(AssemblyResolutionResults temp) { + AssemblyDefaultContext asmCtx = new AssemblyDefaultContext(lang); + asmCtx.setContextRegister(context); + return temp.apply(rc -> { + MemBuffer buf = + new ByteMemBufferImpl(at, rc.getInstruction().getVals(), lang.isBigEndian()); + try { + SleighInstructionPrototype ip = + (SleighInstructionPrototype) lang.parse(buf, asmCtx, false); + if (!rc.equivalentConstructState(ip.getRootState())) { + return AssemblyResolution.error("Disassembly prototype mismatch", rc); + } + return rc; + } + catch (InsufficientBytesException | UnknownInstructionException e) { + return AssemblyResolution.error("Disassembly failed: " + e.getMessage(), rc); + } + }); + } + + /** + * Get the state generator for a given operand and parse tree node + * + * @param opSym the operand symbol + * @param node the corresponding parse tree node, possibly null indicating a hidden operand + * @param fromLeft the accumulated patterns from the left sibling or parent + * @return the generator + */ + protected AbstractAssemblyStateGenerator getStateGenerator(OperandSymbol opSym, + AssemblyParseTreeNode node, AssemblyResolvedPatterns fromLeft) { + if (node == null) { + return getHiddenStateGenerator(opSym, fromLeft); + } + if (node.isNumeric()) { + return new AssemblyOperandStateGenerator(this, (AssemblyParseNumericToken) node, opSym, + fromLeft); + } + if (node.isConstructor()) { + return new AssemblyConstructStateGenerator(this, (AssemblyParseBranch) node, fromLeft); + } + throw new AssertionError(); + } + + /** + * Get the state generator for a hidden operand + * + * @param opSym the operand symbol + * @param fromLeft the accumulated patterns from the left sibling or parent + * @return the generator + */ + protected AbstractAssemblyStateGenerator getHiddenStateGenerator(OperandSymbol opSym, + AssemblyResolvedPatterns fromLeft) { + TripleSymbol defSym = opSym.getDefiningSymbol(); + if (defSym instanceof SubtableSymbol) { + return new AssemblyHiddenConstructStateGenerator(this, (SubtableSymbol) defSym, + fromLeft); + } + return new AssemblyNopStateGenerator(this, opSym, fromLeft); + } + + /** + * Apply a constructor pattern + * + *

+ * TODO: This is currently used only for resolving recursion. Could this be factored with + * {@link AssemblyConstructState#resolve(AssemblyResolvedPatterns, Collection)}? + * + * @param sem the SLEIGH constructor + * @param shift the shift + * @param fromChildren the results from the single resolved child + * @return the results + */ + protected AssemblyResolutionResults resolvePatterns(AssemblyConstructorSemantic sem, int shift, + AssemblyResolutionResults fromChildren) { + AssemblyResolutionResults results = fromChildren; + results = applyMutations(sem, results); + results = applyPatterns(sem, shift, results); + results = tryResolveBackfills(results); + return results; + } + + /** + * TODO: Can this be factored? + */ + protected AssemblyResolutionResults parent(String description, AssemblyResolutionResults temp, + int opCount) { + return temp.stream() + .map(r -> r.parent(description, opCount)) + .collect(Collectors.toCollection(AssemblyResolutionResults::new)); + } + + /** + * TODO: This is currently used only for resolving recursion. Could this be factored with + * {@link AssemblyConstructState#resolveMutations(AssemblyResolvedPatterns, Collection)}? + */ + protected AssemblyResolutionResults applyMutations(AssemblyConstructorSemantic sem, + AssemblyResolutionResults temp) { + DBG.println("Applying context mutations:"); + return temp.apply(rc -> { + DBG.println("Current: " + rc.lineToString()); + AssemblyResolution backctx = sem.solveContextChanges(rc, vals); + DBG.println("Mutated: " + backctx.lineToString()); + return backctx; + }).apply(rc -> { + return rc.solveContextChangesForForbids(sem, vals); + }); + } + + /** + * TODO: This is currently used only for resolving recursion. Could this be factored with + * {@link AssemblyConstructState#resolvePatterns(AssemblyResolvedPatterns, Collection)}? + */ + protected AssemblyResolutionResults applyPatterns(AssemblyConstructorSemantic sem, int shift, + AssemblyResolutionResults temp) { + DBG.println("Applying patterns:"); + Collection patterns = + sem.getPatterns().stream().map(p -> p.shift(shift)).collect(Collectors.toList()); + return temp.apply(new Applicator() { + @Override + public Iterable getPatterns( + AssemblyResolvedPatterns cur) { + return patterns; + } + + @Override + public AssemblyResolvedPatterns setRight(AssemblyResolvedPatterns res, + AssemblyResolvedPatterns cur) { + // This is typically applied by parent, so don't insert sibling + return res; + } + + @Override + public String describeError(AssemblyResolvedPatterns rc, AssemblyResolution pat) { + return "The patterns conflict " + pat.lineToString(); + } + + @Override + public AssemblyResolvedPatterns combineBackfill(AssemblyResolvedPatterns cur, + AssemblyResolvedBackfill bf) { + throw new AssertionError(); + } + + @Override + public AssemblyResolution finish(AssemblyResolvedPatterns resolved) { + return resolved.checkNotForbidden(); + } + }); } /** * Apply constructors as indicated by a path returned by the context resolution graph * - * Please note: The path given will be emptied during processing. + *

+ * NOTE: The given path will be emptied during processing. * * @param path the path to apply * @param branch the branch corresponding to the production whose LHS has a purely-recursive @@ -164,265 +416,37 @@ public class AssemblyTreeResolver { * @return the results */ protected AssemblyResolutionResults applyRecursionPath(Deque path, - AssemblyParseBranch branch, AssemblyProduction rec, AssemblyResolvedConstructor child) { + AssemblyParseBranch branch, AssemblyProduction rec, AssemblyResolvedPatterns child) { /* * A constructor may have multiple patterns, so I cannot assume I will get at most one * output at each constructor in the path. Start (1) collecting all the results, then (2) * filter out and report the errors, then (3) feed successful resolutions into the next * constructor in the path (or finish). */ - AssemblyResolutionResults result = new AssemblyResolutionResults(); - AssemblyResolutionResults collected = new AssemblyResolutionResults(); - Set intoNext = new LinkedHashSet<>(); - intoNext.add(child); + AssemblyResolutionResults results = new AssemblyResolutionResults(); + results.add(child); while (!path.isEmpty()) { AssemblyConstructorSemantic sem = path.pollLast(); - List substs = List.of((AssemblyParseTreeNode) branch); - // 1 - for (final AssemblyResolvedConstructor res : intoNext) { - List sel = List.of(res); - collected.absorb(resolveSelectedChildren(rec, substs, sel, List.of(sem))); - } - intoNext.clear(); - // 2 - for (AssemblyResolution res : collected) { - if (res.isError()) { - result.add(res); - } - else { // 3 - intoNext.add((AssemblyResolvedConstructor) res); - } + + int opIdx = sem.getOperandIndex(0); + Constructor cons = sem.getConstructor(); + OperandSymbol opSym = cons.getOperand(opIdx); + if (-1 != opSym.getOffsetBase()) { + throw new AssertionError("TODO"); } + int offset = opSym.getRelativeOffset(); + results = parent("Resolving recursive constructor: " + cons.getSourceFile() + ":" + + cons.getLineno(), results, 1); + results = results.apply(rc -> rc.shift(offset)); + results = resolvePatterns(sem, 0, results).apply(rc -> rc.withConstructor(cons)); } - result.addAll(intoNext); - return result; + return results; } /** - * Resolve a branch where the production's LHS has a purely-recursive definition - * - * @param branch the branch - * @param rec the purely-recursive definition - * @return the results + * TODO: This is currently used only for resolving recursion. It seems its missing from the + * refactor? */ - protected AssemblyResolutionResults resolveBranchRecursive(AssemblyParseBranch branch, - AssemblyProduction rec) { - // TODO: There's probably a clever trick regarding since-constructor productions - // And short-circuiting once a compatible recursive rule is found. - try (DbgCtx dc = dbg.start("Resolving (recursive) branch: " + branch.getProduction())) { - AssemblyResolutionResults result = new AssemblyResolutionResults(); - - for (AssemblyResolution ar : resolveBranchNonRecursive(branch)) { - if (ar.isError()) { - result.add(ar); - continue; - } - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; - AssemblyPatternBlock dst = rc.getContext(); - // TODO: The desired context may need to be passed in. For now, just take start. - AssemblyPatternBlock src = context; // TODO: This is only correct for "instruction" - String table = branch.getProduction().getName(); - - dbg.println("Finding paths from " + context + " to " + ar.lineToString()); - Collection> paths = - ctxGraph.computeOptimalApplications(src, table, dst, table); - dbg.println("Found " + paths.size()); - for (Deque path : paths) { - dbg.println(" " + path); - result.absorb(applyRecursionPath(path, branch, rec, rc)); - } - } - - return result; - } - } - - /** - * Resolve the given branch, having selected a particular combination of subconstructor results - * - * @param prod the production - * @param substs the braches and tokens corrresponding to the symbols of the production's RHS - * @param sel the selected subconstructor results - * @param semantics the collection of possible constructors for this production - * @return the results - */ - protected AssemblyResolutionResults resolveSelectedChildren(AssemblyProduction prod, - List substs, List sel, - Collection semantics) { - - try (DbgCtx dc = dbg.start("Selecting: " + IteratorUtils.toString(sel.iterator(), - (AssemblyResolvedConstructor rc) -> rc.lineToString()))) { - AssemblyResolutionResults results = new AssemblyResolutionResults(); - - // Pre-check the combined contexts - AssemblyPatternBlock combCtx = AssemblyPatternBlock.nop(); - for (AssemblyResolvedConstructor child : sel) { - AssemblyPatternBlock check = combCtx.combine(child.getContext()); - if (null == check) { - results.add(AssemblyResolution.error( - "Incompatible context requirements among selected children", - "Resolving " + prod, sel)); - return results; - } - combCtx = check; - } - dbg.println("Combined context: " + combCtx); - - AssemblyResolvedConstructor res = AssemblyResolution.nop("Resolving " + prod, sel); - - // OK, now that we have a requirement, seek constructors that are compatible. - nextSem: for (AssemblyConstructorSemantic sem : semantics) { - try (DbgCtx dc2 = dbg.start("Trying: " + sem)) { - Constructor cons = sem.getConstructor(); - - // Gather the operand values (from non-constructor semantics) - AssemblyResolvedConstructor subres = - res.copyAppendDescription("Applying constructor: " + sem); - - Map opvals = new HashMap<>(); - Iterator opidxit = sem.getOperandIndexIterator(); - Iterator selit = sel.iterator(); - for (int i = 0; i < prod.size(); i++) { - AssemblyParseTreeNode child = substs.get(i); - AssemblySymbol sym = prod.get(i); - if (sym.takesOperandIndex()) { - int opidx = opidxit.next(); - if (child.isNumeric()) { - AssemblyParseNumericToken num = (AssemblyParseNumericToken) child; - opvals.put(opidx, num.getNumericValue()); - } - else if (child.isConstructor()) { - opvals.put(opidx, selit.next()); - } - } - } - - // Now, work out how to write the operand values in - opidxit = sem.getOperandIndexIterator(); - Iterator subit = sel.iterator(); - for (int i = 0; i < prod.size(); i++) { - AssemblyParseTreeNode child = substs.get(i); - AssemblySymbol sym = prod.get(i); - if (!sym.takesOperandIndex()) { - continue; - } - dbg.println("Current: " + subres.lineToString()); - int opidx = opidxit.next(); - OperandSymbol subsym = cons.getOperand(opidx); - int shift = computeOffset(subsym, cons, opvals); - String symname = subsym.getName(); - dbg.println("Processing symbol: " + symname); - if (child.isNumeric()) { - int bitsize = 0; - if (sym instanceof AssemblyNumericTerminal) { - AssemblyNumericTerminal numeric = (AssemblyNumericTerminal) sym; - bitsize = numeric.getBitSize(); - } - Long opval = (Long) opvals.get(opidx); // delay unboxing until solving - PatternExpression symexp = subsym.getDefiningExpression(); - if (symexp == null) { - symexp = subsym.getDefiningSymbol().getPatternExpression(); - } - String desc = - "Solution to " + sym + " := " + Long.toHexString(opval) + " = " + - symexp + " (immediate op:" + opidx + ",shift:" + shift + ")"; - dbg.println("Writing: " + desc); - AssemblyResolution sol = - solveOrBackfill(symexp, opval, bitsize, vals, opvals, null, desc); - dbg.println("Solution: " + sol); - if (null == sol) { - throw new AssertionError("Who returned a null solution!? " + - "Throw an exception or return an error result, please!"); - } - if (sol.isError()) { - AssemblyResolvedError err = (AssemblyResolvedError) sol; - results.add(AssemblyResolution.error(err.getError(), subres)); - continue nextSem; - } - if (sol instanceof AssemblyResolvedConstructor) { - AssemblyResolvedConstructor solcon = - (AssemblyResolvedConstructor) sol; - AssemblyResolvedConstructor check = - subres.combine(solcon.shift(shift)); - if (null == check) { - results.add(AssemblyResolution.error( - "Conflict: Immediate operand (token " + i + ") " + sol, - subres)); - continue nextSem; - } - subres = check; - } - else { - AssemblyResolvedBackfill solbf = (AssemblyResolvedBackfill) sol; - subres = subres.combine(solbf.shift(shift)); - } - } - else if (child.isConstructor()) { - // Write the instruction pattern in, shifted - AssemblyResolvedConstructor childrc = subit.next(); - dbg.println("Writing subtable(opidx:" + opidx + "): " + symname + ": " + - childrc.lineToString() + " (shift:" + shift + ")"); - // I've already combined the contexts - AssemblyResolvedConstructor check = - subres.combine(childrc.shift(shift)); - if (null == check) { - results.add(AssemblyResolution.error( - "Conflict: Subtable operand (token " + i + ")", subres)); - continue nextSem; - } - subres = check; - } - else { - dbg.println("Probably encountered a varnode production: " + child); - } - } - - // Now, write out the proper requirements based on context mutations - AssemblyResolution backctx = sem.solveContextChanges(subres, vals, opvals); - if (!(backctx instanceof AssemblyResolvedConstructor)) { - results.add(backctx); - continue; - } - subres = (AssemblyResolvedConstructor) backctx; - subres = subres.solveContextChangesForForbids(sem, vals, opvals); - - // Now, write the actual instruction and context requirements from the constructor - // patterns - dbg.println("Writing patterns:"); - for (AssemblyResolvedConstructor pat : sem.getPatterns()) { // use the accessor - AssemblyResolvedConstructor temp = subres; - dbg.println(" Pattern: " + pat.lineToString()); - dbg.println(" Current: " + temp.lineToString()); - AssemblyResolvedConstructor check = temp.combine(pat); - if (null == check) { - results.add( - AssemblyResolution.error("The patterns conflict " + subres, temp)); - continue; - } - temp = check; - - dbg.println(" Final: " + temp.lineToString()); - - AssemblyResolution fcheck = temp.checkNotForbidden(); - if (fcheck.isError()) { - results.add(fcheck); - continue; - } - temp = (AssemblyResolvedConstructor) fcheck; - - results.add(temp); - } - } - catch (Exception e) { - dbg.println("While processing: " + sem); - throw e; - } - } - results = tryResolveBackfills(results); - return results; - } - } - protected AssemblyResolutionResults tryResolveBackfills(AssemblyResolutionResults results) { AssemblyResolutionResults res = new AssemblyResolutionResults(); next_ar: for (AssemblyResolution ar : results) { @@ -431,13 +455,13 @@ public class AssemblyTreeResolver { continue; } while (true) { - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) ar; if (!rc.hasBackfills()) { // finish: The complete solution is known res.add(rc); continue next_ar; } - ar = rc.backfill(solver, vals); + ar = rc.backfill(SOLVER, vals); if (ar.isError() || ar.isBackfill()) { // fail: It is now known that the solution doesn't exist res.add(ar); @@ -454,87 +478,27 @@ public class AssemblyTreeResolver { return res; } - /** - * Resolve a branch without considering any purely-recursive productions - * - * This method is used either when the LHS has no purely-recursive definition, or before - * considering the purely-recursive definition when it is present. - * - * @param branch the branch - * @return the results - */ - protected AssemblyResolutionResults resolveBranchNonRecursive(AssemblyParseBranch branch) { - try (DbgCtx dc = dbg.start("Resolving (non-recursive) branch: " + branch.getProduction())) { - // Resolve children first - AssemblyResolutionResults results = new AssemblyResolutionResults(); - AssemblyProduction prod = branch.getProduction(); - List substs = branch.getSubstitutions(); - assert prod.size() == substs.size(); - - // Sort the wheat and chaff - // The resolved ones need to stay in order for the cross product - List> childRes = new ArrayList<>(); - List childErr = new ArrayList<>(); - for (int i = 0; i < prod.size(); i++) { - AssemblySymbol sym = prod.get(i); - if (!sym.takesOperandIndex()) { - continue; - } - AssemblyParseTreeNode child = substs.get(i); - if (child.isConstructor()) { - AssemblyResolutionResults rr = resolveBranch((AssemblyParseBranch) child); - HashSet childResElem = new HashSet<>(); - for (AssemblyResolution ar : rr) { - if (ar.isError()) { - childErr.add((AssemblyResolvedError) ar); - } - else { - childResElem.add((AssemblyResolvedConstructor) ar); - } - } - childRes.add(childResElem); - } - } - - // Now, search for constructors that are compatible, and resolve them wrt. the - // selected resolved children: - // This is also where the shifting will happen. - Collection semantics = grammar.getSemantics(prod); - for (List sel : Sets.cartesianProduct(childRes)) { - results.absorb(resolveSelectedChildren(prod, substs, - Collections.unmodifiableList(sel), semantics)); - } - if (!childErr.isEmpty()) { - results.add(AssemblyResolution.error("Child errors", "Resolving " + prod, - Collections.unmodifiableList(childErr))); - } - return results; - } - } - /** * Compute the offset of an operand encoded in the instruction block * + *

+ * TODO: Currently, there are duplicate mechanisms for resolving a constructor: 1) The newer + * mechanism implemented in {@link AssemblyConstructState}, and 2) the older one implemented in + * {@link #applyPatterns(AssemblyConstructorSemantic, int, AssemblyResolutionResults)}. The + * latter seems to require this method, since it does not have pre-computed shifts as in the + * former. We should probably remove the latter in favor of the former.... + * * @param opsym the operand symbol * @param cons the constructor containing the operand - * @param res the selected subconstructor encodings * @return the offset (right shift) to apply to the encoded operand */ - public static int computeOffset(OperandSymbol opsym, Constructor cons, - Map res) { + public static int computeOffset(OperandSymbol opsym, Constructor cons) { int offset = opsym.getRelativeOffset(); int baseidx = opsym.getOffsetBase(); if (baseidx != -1) { OperandSymbol baseop = cons.getOperand(baseidx); - Object r = res.get(baseidx); - if (r instanceof AssemblyResolvedConstructor) { - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) r; - offset += rc.getInstructionLength(); - } - else { - offset += baseop.getMinimumLength(); - } - offset += computeOffset(baseop, cons, res); + offset += baseop.getMinimumLength(); + offset += computeOffset(baseop, cons); } return offset; } @@ -545,51 +509,46 @@ public class AssemblyTreeResolver { * @param exp the expression to solve * @param goal the desired value of the expression * @param vals any defined symbols - * @param res the selected subconstructor encodings * @param cur the resolved constructor so far * @param description a description of the result * @return the encoded solution, or a backfill record */ protected static AssemblyResolution solveOrBackfill(PatternExpression exp, MaskedLong goal, - Map vals, Map res, AssemblyResolvedConstructor cur, - String description) { + Map vals, AssemblyResolvedPatterns cur, String description) { try { - return solver.solve(exp, goal, vals, res, cur, description); + return SOLVER.solve(exp, goal, vals, cur, description); } catch (NeedsBackfillException bf) { - int fieldLength = solver.getInstructionLength(exp, res); - return AssemblyResolution.backfill(exp, goal, res, fieldLength, description); + int fieldLength = SOLVER.getInstructionLength(exp); + return AssemblyResolution.backfill(exp, goal, fieldLength, description); } } /** * Attempt to solve an expression * + *

* Converts the given goal to a fully-defined {@link MaskedLong} and then solves as before. * - * @see #solveOrBackfill(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor, - * String) + * @see #solveOrBackfill(PatternExpression, MaskedLong, Map, AssemblyResolvedPatterns, String) */ protected static AssemblyResolution solveOrBackfill(PatternExpression exp, long goal, - Map vals, Map res, AssemblyResolvedConstructor cur, - String description) { - return solveOrBackfill(exp, MaskedLong.fromLong(goal), vals, res, cur, description); + Map vals, AssemblyResolvedPatterns cur, String description) { + return solveOrBackfill(exp, MaskedLong.fromLong(goal), vals, cur, description); } /** * Attempt to solve an expression * + *

* Converts the given goal and bits count to a {@link MaskedLong} and then solves as before. As * a special case, if {@code bits == 0}, the goal is considered fully-defined (as if * {@code bits == 64}). * - * @see #solveOrBackfill(PatternExpression, MaskedLong, Map, Map, AssemblyResolvedConstructor, - * String) - * + * @see #solveOrBackfill(PatternExpression, MaskedLong, Map, AssemblyResolvedPatterns, String) */ protected static AssemblyResolution solveOrBackfill(PatternExpression exp, long goal, int bits, - Map vals, Map res, AssemblyResolvedConstructor cur, - String description) { + Map vals, AssemblyResolvedPatterns cur, String description) { long msk; if (bits == 0 || bits >= 64) { msk = -1L; @@ -597,7 +556,6 @@ public class AssemblyTreeResolver { else { msk = ~(-1L << bits); } - return solveOrBackfill(exp, MaskedLong.fromMaskAndValue(msk, goal), vals, res, cur, - description); + return solveOrBackfill(exp, MaskedLong.fromMaskAndValue(msk, goal), vals, cur, description); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyEOI.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyEOI.java index 6c431ea2fa..de8b400eaa 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyEOI.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyEOI.java @@ -38,7 +38,7 @@ public class AssemblyEOI extends AssemblyTerminal { @Override public Collection match(String buffer, int pos, AssemblyGrammar grammar, - Map labels) { + AssemblyNumericSymbols symbols) { if (pos == buffer.length()) { return Collections.singleton(new AssemblyParseToken(grammar, this, "")); } @@ -46,7 +46,7 @@ public class AssemblyEOI extends AssemblyTerminal { } @Override - public Collection getSuggestions(String got, Map labels) { + public Collection getSuggestions(String got, AssemblyNumericSymbols symbols) { return Collections.singleton(""); } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyExtendedNonTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyExtendedNonTerminal.java index e65e7e7f23..bf0fd0de0d 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyExtendedNonTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyExtendedNonTerminal.java @@ -19,6 +19,7 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyExtendedGrammar; /** * The type of non-terminal for an "extended grammar" + * * @see AssemblyExtendedGrammar */ public class AssemblyExtendedNonTerminal extends AssemblyNonTerminal { @@ -28,6 +29,7 @@ public class AssemblyExtendedNonTerminal extends AssemblyNonTerminal { /** * Construct a new extended non terminal, derived from the given non-terminal + * * @param start the start state for the extended non-terminal * @param nt the non-terminal from which the extended non-terminal is derived * @param end the end state for the extended non-terminal diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyFixedNumericTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyFixedNumericTerminal.java index 3254f03291..113e29bbf9 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyFixedNumericTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyFixedNumericTerminal.java @@ -23,6 +23,7 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken; /** * A terminal that accepts only a particular numeric value * + *

* This is different from a fixed string, because it will accept any encoding of the given numeric * value. */ @@ -31,10 +32,11 @@ public class AssemblyFixedNumericTerminal extends AssemblyNumericTerminal { /** * Construct a terminal that accepts only the given numeric value + * * @param val the value to accept */ public AssemblyFixedNumericTerminal(long val) { - super("" + val, 0); + super("" + val, 0, null); this.val = val; } @@ -44,16 +46,16 @@ public class AssemblyFixedNumericTerminal extends AssemblyNumericTerminal { } @Override - public Collection getSuggestions(String got, Map labels) { + public Collection getSuggestions(String got, AssemblyNumericSymbols symbols) { return Collections.singleton("" + val); } @Override public Collection match(String buffer, int pos, - AssemblyGrammar grammar, Map labels) { + AssemblyGrammar grammar, AssemblyNumericSymbols symbols) { // TODO: Allow label substitution here? For now, no. Collection toks = - new HashSet<>(super.match(buffer, pos, grammar, new HashMap())); + new HashSet<>(super.match(buffer, pos, grammar, AssemblyNumericSymbols.EMPTY)); Iterator tokit = toks.iterator(); while (tokit.hasNext()) { AssemblyParseNumericToken tok = tokit.next(); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNonTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNonTerminal.java index eba85acf32..7b4438317c 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNonTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNonTerminal.java @@ -19,11 +19,13 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; /** * The type of non-terminal for an assembly grammar + * * @see AssemblyGrammar */ public class AssemblyNonTerminal extends AssemblySymbol { /** * Construct a non-terminal having the given name + * * @param name the name */ public AssemblyNonTerminal(String name) { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericMapTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericMapTerminal.java index 52aa21e30b..b0469bc373 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericMapTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericMapTerminal.java @@ -24,7 +24,9 @@ import ghidra.app.plugin.processors.sleigh.symbol.ValueMapSymbol; /** * A terminal that accepts only a particular set of numeric values, mapping each to another value * + *

* This often used for non-conventional numeric encodings. + * * @see ValueMapSymbol */ public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal { @@ -32,20 +34,21 @@ public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal { /** * Construct a terminal with the given name, accepting only the keys of a given map + * * @param name the name * @param map the map from display value to token value */ public AssemblyNumericMapTerminal(String name, Map map) { - super(name, 0); + super(name, 0, null); this.map = map; } @Override public Collection match(String buffer, int pos, - AssemblyGrammar grammar, Map labels) { + AssemblyGrammar grammar, AssemblyNumericSymbols symbols) { // NOTE: No label substitution Collection toks = - new HashSet<>(super.match(buffer, pos, grammar, new HashMap())); + new HashSet<>(super.match(buffer, pos, grammar, AssemblyNumericSymbols.EMPTY)); Collection results = new LinkedHashSet<>(); for (AssemblyParseNumericToken tok : toks) { Integer mapped = map.get(tok.getNumericValue()); @@ -58,7 +61,7 @@ public class AssemblyNumericMapTerminal extends AssemblyNumericTerminal { } @Override - public Collection getSuggestions(String got, Map labels) { + public Collection getSuggestions(String got, AssemblyNumericSymbols symbols) { Set result = new HashSet<>(); for (long k : map.keySet()) { result.add(Long.toString(k)); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericSymbols.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericSymbols.java new file mode 100644 index 0000000000..82d130df95 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericSymbols.java @@ -0,0 +1,285 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh.symbol; + +import java.util.*; +import java.util.Map.Entry; +import java.util.stream.Collectors; + +import ghidra.program.model.address.Address; +import ghidra.program.model.address.AddressSpace; +import ghidra.program.model.lang.Language; +import ghidra.program.model.lang.Register; +import ghidra.program.model.listing.Program; +import ghidra.program.model.symbol.*; + +/** + * A context to hold various symbols offered to the assembler, usable where numbers are expected. + */ +public final class AssemblyNumericSymbols { + public static final AssemblyNumericSymbols EMPTY = + new AssemblyNumericSymbols(Map.of(), Map.of(), Map.of()); + + /** + * Collect labels derived from memory-mapped registers in a language + * + *

+ * TODO: Use of registers should be limited to operands whose size match the register size. + * + * @param labels the destination map + * @param language the language + */ + private static void collectLanguageLabels(Map labels, Language language) { + for (Register reg : language.getRegisters()) { + // TODO/HACK: There ought to be a better mechanism describing suitable symbolic + // substitutions for a given operand. + if (!reg.getAddressSpace().isRegisterSpace()) { + labels.put(reg.getName(), reg.getAddress()); + } + } + } + + /** + * Collect labels from the program's database + * + * @param labels the destination map + * @param program the source program + */ + private static void collectProgramLabels(Map labels, Program program) { + final SymbolIterator it = program.getSymbolTable().getAllSymbols(false); + while (it.hasNext()) { + Symbol sym = it.next(); + if (sym.isExternal()) { + continue; // skip externals - will generally be referenced indirectly not directly + } + SymbolType symbolType = sym.getSymbolType(); + if (symbolType != SymbolType.LABEL && symbolType != SymbolType.FUNCTION) { + continue; + } + labels.put(sym.getName(), sym.getAddress()); + } + } + + /** + * Collect equates from the program's database + * + * @param equates the destination map + * @param programthe source program + */ + private static void collectProgramEquates(Map equates, Program program) { + final Iterator it = program.getEquateTable().getEquates(); + while (it.hasNext()) { + Equate eq = it.next(); + // Thought is: If that's what the user sees, then that's what the user will type! + equates.put(eq.getDisplayName(), eq.getValue()); + } + } + + /** + * Get symbols from a language, when no program is available + * + * @param language the language + * @return the symbols + */ + public static AssemblyNumericSymbols fromLanguage(Language language) { + Map labels = new HashMap<>(); + collectLanguageLabels(labels, language); + return forMaps(Map.of(), labels); + } + + /** + * Get symbols from a program (and its language) + * + *

+ * TODO: It might be nice to cache these and use a listener to keep the maps up to date. Will + * depend on interactive performance. + * + * @param program the program + * @return the symbols + */ + public static AssemblyNumericSymbols fromProgram(Program program) { + Map equates = new HashMap<>(); + Map labels = new HashMap<>(); + collectLanguageLabels(labels, program.getLanguage()); + collectProgramLabels(labels, program); + collectProgramEquates(equates, program); + return forMaps(equates, labels); + } + + /** + * Get symbols for the given equate and label maps + * + * @param equates the equates + * @param labels the labels + * @return the symbols + */ + public static AssemblyNumericSymbols forMaps(Map equates, + Map labels) { + return new AssemblyNumericSymbols(Map.copyOf(equates), Map.copyOf(labels), + groupBySpace(labels)); + } + + private static Map> groupBySpace( + Map labels) { + return Collections.unmodifiableMap(labels.entrySet() + .stream() + .collect(Collectors.groupingBy(ent -> ent.getValue().getAddressSpace(), + Collectors.toUnmodifiableMap(Entry::getKey, Entry::getValue)))); + } + + private final NavigableSet all = new TreeSet<>(); + public final Map equates; + public final Map labels; + public final Map> labelsBySpace; + + private AssemblyNumericSymbols(Map equates, Map labels, + Map> labelsBySpace) { + this.equates = equates; + this.labels = labels; + this.labelsBySpace = labelsBySpace; + all.addAll(equates.keySet()); + all.addAll(labels.keySet()); + } + + /** + * Choose any symbol with the given name + * + *

+ * This will check equates first, then labels. If an equate is found, its value is returned. If + * a label is found, its addressable word offset is returned. + * + * @param name the name + * @return the value, or null + */ + public Long chooseAny(String name) { + Long eq = equates.get(name); + if (eq != null) { + return eq; + } + Address addr = labels.get(name); + if (addr != null) { + return addr.getAddressableWordOffset(); + } + return null; + } + + /** + * Choose a label with the given name in the given space + * + * @param name the name + * @param space the address space + * @return the addressable word offset of the found label, or null + */ + public Long chooseBySpace(String name, AddressSpace space) { + Map forSpace = labelsBySpace.get(space); + if (forSpace == null) { + return null; + } + Address addr = forSpace.get(name); + if (addr == null) { + return null; + } + return addr.getAddressableWordOffset(); + } + + /** + * Choose a symbol with the given name, using the space as a hint + * + *

+ * If a space is not given, or if that space is the constant space, then this will choose from + * all symbols, via {@link #chooseAny(String)}. If a space is given, and it is not the constant + * space, then this will choose from symbols in the given space, via + * {@link #chooseBySpace(String, AddressSpace)}. + * + * @param name the name + * @param space the address space, or null + * @return the equate value, or label addressable word offset, or null + */ + public Long choose(String name, AddressSpace space) { + if (space == null || space.isConstantSpace()) { + return chooseAny(name); + } + return chooseBySpace(name, space); + } + + private Collection suggestFrom(String got, Collection keys, int max, + boolean sorted) { + Set result = new HashSet<>(); + int count = 0; + for (String label : keys) { + if (count >= max) { + break; + } + if (label.startsWith(got)) { + result.add(label); + count++; + } + else if (sorted) { + break; + } + } + return result; + } + + /** + * Suggest up to max symbols having the given prefix + * + * @param got the prefix + * @param max the maximum number of symbols to suggest + * @return the collection of symbol names + */ + public Collection suggestAny(String got, int max) { + return suggestFrom(got, all.tailSet(got), max, true); + } + + /** + * Suggest up to max symbols from the given space having the given prefix + * + * @param got the prefix + * @param space the address space + * @param max the maximum number of symbols to suggest + * @return the collection of symbol names + */ + public Collection suggestBySpace(String got, AddressSpace space, int max) { + Map forSpace = labelsBySpace.get(space); + if (forSpace == null) { + return Set.of(); + } + // TODO: Should I sort these, perhaps lazily, to speed search? + return suggestFrom(got, forSpace.keySet(), max, false); + } + + /** + * Suggest up to max symbols having the given prefix, using space as a hint + * + *

+ * As in {@link #chooseAny(String)}, if space is null or the constant space, then this will + * suggest from all symbols, via {@link #suggestAny(String, int)}. If space is given, and it is + * not the constant space, then this will suggest from symbols in the given space, via + * {@link #suggestBySpace(String, AddressSpace, int)}. + * + * @param got the prefix + * @param space the space, or null + * @param max the maximum number of symbols to suggest + * @return the collection of symbol names + */ + public Collection getSuggestions(String got, AddressSpace space, int max) { + if (space == null || space.isConstantSpace()) { + return suggestAny(got, max); + } + return suggestBySpace(got, space, max); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericTerminal.java index 205a5bb78f..a87bcaed37 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyNumericTerminal.java @@ -18,38 +18,46 @@ package ghidra.app.plugin.assembler.sleigh.symbol; import java.util.*; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; -import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParser; import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken; +import ghidra.program.model.address.AddressSpace; /** - * A terminal that accepts any numeric value or program label + * A terminal that accepts any numeric value or program symbol (label, equate) * + *

* The literal may take any form accepted by UNIX strtol() with base=0. By default, the literal is - * interpreted in base 10, but it may be prefixed such that it's interpreted in an alternative - * base. With the prefix '0x', it is interpreted in hexadecimal. With the prefix '0', it is - * interpreted in octal. + * interpreted in base 10, but it may be prefixed such that it's interpreted in an alternative base. + * With the prefix '0x', it is interpreted in hexadecimal. With the prefix '0', it is interpreted in + * octal. + * + *

+ * It may also take the value of a label. If this operand is an address operand, the acceptable + * labels are restricted to those in the expected address space. */ public class AssemblyNumericTerminal extends AssemblyTerminal { public static final String PREFIX_HEX = "0x"; public static final String PREFIX_OCT = "0"; /** Some suggestions, other than labels, to provide */ - protected static final Collection suggestions = Arrays.asList(new String[] { // - "0", "1", "0x0", "+0x0", "-0x0", "01" // - }); + protected static final Collection SUGGESTIONS = + List.of("0", "1", "0x0", "+0x0", "-0x0", "01"); /** The maximum number of labels to suggest */ protected static final int MAX_LABEL_SUGGESTIONS = 10; protected final int bitsize; + protected final AddressSpace space; - // TODO: Not all numeric literals can be substituted for a label /** * Construct a terminal with the given name, accepting any numeric value or program label + * * @param name the name + * @param bitsize the maximum size of the value in bits + * @param space the address space if this terminal represents an address operand */ - public AssemblyNumericTerminal(String name, int bitsize) { + public AssemblyNumericTerminal(String name, int bitsize, AddressSpace space) { super(name); this.bitsize = bitsize; + this.space = space; } @Override @@ -63,13 +71,16 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { /** * This is only a convenience for testing * - * Please use {@link #match(String, int, AssemblyGrammar, Map) match(String, int, AssemblyGrammar, Map<String, Long>)}. + *

+ * Please use {@link #match(String, int, AssemblyGrammar, Map) match(String, int, + * AssemblyGrammar, Map<String, Long>)}. + * * @param buffer the input buffer * @return the parsed token */ public AssemblyParseNumericToken match(String buffer) { Collection col = - match(buffer, 0, null, AssemblyParser.EMPTY_LABELS); + match(buffer, 0, null, AssemblyNumericSymbols.EMPTY); if (col.isEmpty()) { return null; } @@ -83,7 +94,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { @Override public Collection match(String buffer, int pos, - AssemblyGrammar grammar, Map labels) { + AssemblyGrammar grammar, AssemblyNumericSymbols symbols) { if (pos >= buffer.length()) { return Collections.emptySet(); } @@ -94,20 +105,21 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { return matchLiteral(pos + 1, buffer, pos, true, grammar); } else { - return match(pos, buffer, grammar, labels); + return match(pos, buffer, grammar, symbols); } } /** * Try to match a sign-less numeric literal, or a program label + * * @param s the buffer cursor where the literal or label is expected * @param buffer the input buffer * @param grammar the grammar containing this terminal - * @param labels the program labels, mapped to their values + * @param symbols the program symbols * @return the parsed token, or null */ protected Collection match(int s, String buffer, - AssemblyGrammar grammar, Map labels) { + AssemblyGrammar grammar, AssemblyNumericSymbols symbols) { if (s >= buffer.length()) { return Collections.emptySet(); } @@ -126,7 +138,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { break; } String lab = buffer.substring(s, b); - Long val = labels.get(lab); + Long val = symbols.choose(lab, space); if (val == null) { return Collections.emptySet(); } @@ -135,6 +147,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { /** * Try to match a numeric literal, after the optional sign, encoded in hex, decimal, or octal + * * @param s buffer cursor where the literal is expected * @param buffer the input buffer * @param pos the start offset of the token parsed so far @@ -157,6 +170,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { /** * Construct a numeric token + * * @param str the string value of the token taken verbatim from the buffer * @param num portion of the token following the optional sign and prefix * @param radix the radix of {@code num} @@ -192,6 +206,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { /** * Try to match a hexadecimal literal, following the optional sign and prefix + * * @param s the buffer cursor where the hex portion starts * @param buffer the input buffer * @param pos the start offset of the token parsed so far @@ -215,6 +230,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { /** * Try to match a decimal literal, following the optional sign and optional prefix + * * @param s the buffer cursor where the hex portion starts * @param buffer the input buffer * @param pos the start offset of the token parsed so far @@ -238,6 +254,7 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { /** * Try to match an octal literal, following the optional sign and prefix + * * @param s the buffer cursor where the hex portion starts * @param buffer the input buffer * @param pos the start offset of the token parsed so far @@ -264,18 +281,9 @@ public class AssemblyNumericTerminal extends AssemblyTerminal { } @Override - public Collection getSuggestions(String got, Map labels) { - Set s = new TreeSet<>(suggestions); - int labelcount = 0; - for (String label : labels.keySet()) { - if (labelcount >= MAX_LABEL_SUGGESTIONS) { - break; - } - if (label.startsWith(got)) { - s.add(label); - labelcount++; - } - } + public Collection getSuggestions(String got, AssemblyNumericSymbols symbols) { + Set s = new TreeSet<>(SUGGESTIONS); + s.addAll(symbols.getSuggestions(got, space, MAX_LABEL_SUGGESTIONS)); return s; } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringMapTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringMapTerminal.java index bf06ce98f7..a1f3a89bc8 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringMapTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringMapTerminal.java @@ -35,6 +35,7 @@ public class AssemblyStringMapTerminal extends AssemblyTerminal { /** * Construct a terminal with the given name, accepting only the keys of a given map + * * @param name the name * @param map the map from display text to token value */ @@ -45,7 +46,7 @@ public class AssemblyStringMapTerminal extends AssemblyTerminal { @Override public Collection match(String buffer, int pos, - AssemblyGrammar grammar, Map labels) { + AssemblyGrammar grammar, AssemblyNumericSymbols symbols) { Collection result = new LinkedHashSet<>(); for (Entry ent : map.entries()) { String str = ent.getKey(); @@ -57,7 +58,7 @@ public class AssemblyStringMapTerminal extends AssemblyTerminal { } @Override - public Collection getSuggestions(String string, Map labels) { + public Collection getSuggestions(String string, AssemblyNumericSymbols symbols) { return map.keySet(); } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringTerminal.java index f63ad908b7..25fdaf825c 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyStringTerminal.java @@ -28,6 +28,7 @@ public class AssemblyStringTerminal extends AssemblyTerminal { /** * Construct a terminal that accepts only the given string + * * @param str the string to accept */ public AssemblyStringTerminal(String str) { @@ -42,7 +43,7 @@ public class AssemblyStringTerminal extends AssemblyTerminal { @Override public Collection match(String buffer, int pos, AssemblyGrammar grammar, - Map labels) { + AssemblyNumericSymbols symbols) { if (buffer.regionMatches(pos, str, 0, str.length())) { return Collections.singleton(new AssemblyParseToken(grammar, this, str)); } @@ -50,7 +51,7 @@ public class AssemblyStringTerminal extends AssemblyTerminal { } @Override - public Collection getSuggestions(String got, Map labels) { + public Collection getSuggestions(String got, AssemblyNumericSymbols symbols) { return Collections.singleton(str); } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblySymbol.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblySymbol.java index e18ebac5a7..848d53dbd1 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblySymbol.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblySymbol.java @@ -20,9 +20,11 @@ import ghidra.app.plugin.assembler.sleigh.grammars.AbstractAssemblyGrammar; /** * A symbol in a context-free grammar * + *

* Symbols can be either terminals or non-terminals. Non-terminals must have a defining production, - * i.e., it must appear as the left-hand side of some production in the grammar. + * i.e., it must appear as the left-hand side of some production in the grammar. * + *

* Traditionally, when displayed, non-terminals should be immediately distinguishable from * terminals. In classic CS literature, this usually means non-terminals are in CAPS, and terminals * are in lower-case. Because the assembler doesn't control the names provided by SLEIGH, we @@ -35,6 +37,7 @@ public abstract class AssemblySymbol implements Comparable { /** * Construct a new symbol with the given name + * * @param name the name */ public AssemblySymbol(String name) { @@ -46,6 +49,7 @@ public abstract class AssemblySymbol implements Comparable { /** * Get the name of this symbol + * * @return the name */ public String getName() { @@ -72,6 +76,7 @@ public abstract class AssemblySymbol implements Comparable { /** * Check if this symbol consumes an operand index of its constructor + * * @return true if the symbol represents an operand */ public boolean takesOperandIndex() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyTerminal.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyTerminal.java index 8b07a7285b..d1106545b9 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyTerminal.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/symbol/AssemblyTerminal.java @@ -16,7 +16,6 @@ package ghidra.app.plugin.assembler.sleigh.symbol; import java.util.Collection; -import java.util.Map; import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken; @@ -24,13 +23,16 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseToken; /** * The type of terminal for an assembly grammar * + *

* Unlike classical parsing, each terminal provides its own tokenizer. If multiple tokenizers yield * a token, the parser branches, possibly creating multiple, ambiguous trees. + * * @see AssemblyGrammar */ public abstract class AssemblyTerminal extends AssemblySymbol { /** * Construct a terminal having the give name + * * @param name */ public AssemblyTerminal(String name) { @@ -39,20 +41,22 @@ public abstract class AssemblyTerminal extends AssemblySymbol { /** * Attempt to match a token from the input buffer starting at a given position + * * @param buffer the input buffer * @param pos the cursor position in the buffer * @param grammar the grammar containing this terminal - * @param labels the program labels, if applicable + * @param symbols symbols from the program, suitable for use as numeric terminals * @return the matched token, or null */ public abstract Collection match(String buffer, int pos, - AssemblyGrammar grammar, Map labels); + AssemblyGrammar grammar, AssemblyNumericSymbols symbols); /** * Provide a collection of strings that this terminal would have accepted + * * @param got the remaining contents of the input buffer * @param labels the program labels, if applicable * @return a, possibly empty, collection of suggestions */ - public abstract Collection getSuggestions(String got, Map labels); + public abstract Collection getSuggestions(String got, AssemblyNumericSymbols symbols); } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseBranch.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseBranch.java index 5befdefe86..1459123b01 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseBranch.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseBranch.java @@ -20,8 +20,7 @@ import java.util.*; import org.apache.commons.lang3.StringUtils; -import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyGrammar; -import ghidra.app.plugin.assembler.sleigh.grammars.AssemblyProduction; +import ghidra.app.plugin.assembler.sleigh.grammars.*; import ghidra.app.plugin.assembler.sleigh.sem.AssemblyConstructorSemantic; import ghidra.app.plugin.assembler.sleigh.symbol.AssemblyNonTerminal; import ghidra.app.plugin.assembler.sleigh.symbol.AssemblySymbol; @@ -38,6 +37,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode /** * Construct a branch from the given grammar and production + * * @param grammar the grammar containing the production * @param prod the production applied to create this branch */ @@ -70,12 +70,14 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode /** * Prepend a child to this branch - * @param child the child * + *

* Because LR parsers produce rightmost derivations, they necessarily populate the branches * right to left. During reduction, each child is popped from the stack, traversing them in - * reverse order. This method prepends children so that when reduction is complete, the - * children are aligned to the corresponding symbols from the RHS of the production. + * reverse order. This method prepends children so that when reduction is complete, the children + * are aligned to the corresponding symbols from the RHS of the production. + * + * @param child the child */ public void addChild(AssemblyParseTreeNode child) { assert expects().equals(child.getSym()); @@ -86,22 +88,26 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode /** * See what symbol is expected next * + *

* The child added next must be associated with the token expected next. + * * @return the symbol */ protected AssemblySymbol expects() { if (!isComplete()) { - return prod.get(prod.size() - substs.size() - 1); + AssemblySentential rhs = prod.getRHS(); + return rhs.getSymbol(rhs.size() - substs.size() - 1); } return null; } /** * Check if the branch is full - * @return true if every symbol on the RHS has a corresonding child + * + * @return true if every symbol on the RHS has a corresponding child */ protected boolean isComplete() { - return prod.size() == substs.size(); + return prod.getRHS().size() == substs.size(); } @Override @@ -129,6 +135,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode /** * Get the production applied to create this branch + * * @return */ public AssemblyProduction getProduction() { @@ -137,6 +144,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode /** * Get the list of children, indexed by corresponding symbol from the RHS + * * @return */ public List getSubstitutions() { @@ -150,6 +158,7 @@ public class AssemblyParseBranch extends AssemblyParseTreeNode /** * Get the ith child, corresponding to the ith symbol from the RHS + * * @param i the position * @return the child */ diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseNumericToken.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseNumericToken.java index b65057d26d..39427a0ae1 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseNumericToken.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseNumericToken.java @@ -31,6 +31,7 @@ public class AssemblyParseNumericToken extends AssemblyParseToken { /** * Construct a numeric terminal having the given string and numeric values + * * @param grammar the grammar containing the terminal * @param term the terminal that matched this token * @param str the portion of the input comprising this token @@ -77,6 +78,7 @@ public class AssemblyParseNumericToken extends AssemblyParseToken { /** * Get the numeric value of the token + * * @return the value */ public long getNumericValue() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseToken.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseToken.java index cd3f0df4a5..9e81d66739 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseToken.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseToken.java @@ -32,6 +32,7 @@ public class AssemblyParseToken extends AssemblyParseTreeNode { /** * Construct a new token having the given string value + * * @param grammar the grammar containing the terminal * @param term the terminal that matched this token * @param str the portion of the input comprising this token @@ -67,6 +68,7 @@ public class AssemblyParseToken extends AssemblyParseTreeNode { /** * Get the portion of the input comprising the token + * * @return the string value */ public String getString() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseTreeNode.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseTreeNode.java index f8485a4baa..ca80792d60 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseTreeNode.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseTreeNode.java @@ -29,6 +29,7 @@ public abstract class AssemblyParseTreeNode { /** * Construct a node for a tree parsed by the given grammar + * * @param grammar the grammar */ public AssemblyParseTreeNode(AssemblyGrammar grammar) { @@ -38,14 +39,17 @@ public abstract class AssemblyParseTreeNode { /** * Get the symbol for which this node is substituted * + *

* For a branch, this is the LHS of the corresponding production. For a token, this is the * terminal whose tokenizer matched it. + * * @return the symbol */ public abstract AssemblySymbol getSym(); /** * Get the branch which contains this node + * * @return */ public AssemblyParseBranch getParent() { @@ -54,6 +58,7 @@ public abstract class AssemblyParseTreeNode { /** * Set the branch which contains this node + * * @param parent */ protected void setParent(AssemblyParseBranch parent) { @@ -63,6 +68,7 @@ public abstract class AssemblyParseTreeNode { /** * For debugging: Display this parse tree via the given stream + * * @param out the stream */ public void print(PrintStream out) { @@ -71,13 +77,15 @@ public abstract class AssemblyParseTreeNode { /** * For debugging: Display the tree with the given indent + * * @param out the stream * @param indent the indent */ protected abstract void print(PrintStream out, String indent); /** - * Check if this node yields a subconstructor resolution + * Check if this node yields a subconstructor resolution + * * @return true if this node yields a subconstructor resolution */ public boolean isConstructor() { @@ -86,6 +94,7 @@ public abstract class AssemblyParseTreeNode { /** * Check if this node yields a numeric value + * * @return true if this node yields a numeric value */ public boolean isNumeric() { @@ -94,6 +103,7 @@ public abstract class AssemblyParseTreeNode { /** * Get the grammar used to parse the tree + * * @return the grammar */ public AssemblyGrammar getGrammar() { @@ -102,6 +112,7 @@ public abstract class AssemblyParseTreeNode { /** * Generate the string that this node parsed + * * @return the string */ public abstract String generateString(); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/SleighUtil.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/AsmUtil.java similarity index 69% rename from Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/SleighUtil.java rename to Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/AsmUtil.java index 835f2e559c..0572f81b27 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/SleighUtil.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/AsmUtil.java @@ -15,20 +15,21 @@ */ package ghidra.app.plugin.assembler.sleigh.util; -import java.util.Collection; -import java.util.Iterator; +import java.util.*; /** - * Utilities for {@link Collection}s + * Utilities for the Assembler */ -public class SleighUtil { +public class AsmUtil { /** * Compare two collections by their corresponding elements in order * - * If the collections have differing sizes, the ordering does not matter. The smaller - * collection precedes the larger. Otherwise, each corresponding pair of elements are compared. - * Once an unequal pair is found, the collections are ordered by those elements. This is - * analogous to {@link String} comparison. + *

+ * If the collections have differing sizes, the ordering does not matter. The smaller collection + * precedes the larger. Otherwise, each corresponding pair of elements are compared. Once an + * unequal pair is found, the collections are ordered by those elements. This is analogous to + * {@link String} comparison. + * * @param a the first set * @param b the second set * @return a comparison result as in {@link Comparable#compareTo(Object)} @@ -53,8 +54,10 @@ public class SleighUtil { /** * Compare two byte arrays by their corresponding entries * + *

* If the two arrays have differing lengths, the shorter precedes the longer. Otherwise, they * are compared as in C's {@code memcmp}, except that Java {@code byte}s are signed. + * * @param a the first array * @param b the second array * @return a comparison result as in {@link Comparable#compareTo(Object)} @@ -74,4 +77,22 @@ public class SleighUtil { return 0; } + /** + * Extend a list with the given item + * + *

+ * Used in functional style when the list is immutable. + * + * @param the type of elements + * @param list the list + * @param ext the additional item + * @return an immutable copy of the list with the given item appended + */ + public static List extendList(List list, T ext) { + @SuppressWarnings("unchecked") + T[] arr = (T[]) new Object[list.size() + 1]; + list.toArray(arr); + arr[list.size()] = ext; + return List.of(arr); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/DbgTimer.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/DbgTimer.java index 106c213e57..5e497df6d1 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/DbgTimer.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/DbgTimer.java @@ -21,6 +21,7 @@ import java.util.Stack; /** * A debugging, timing, and diagnostic tool * + *

* TODO: I should probably remove this and rely on the Msg.trace() method, or at the very least, * refactor this to use that. */ @@ -30,6 +31,7 @@ public class DbgTimer extends PrintStream { /** * Create a new debugging timer, wrapping the given output stream + * * @param out the stream */ public DbgTimer(OutputStream out) { @@ -58,6 +60,7 @@ public class DbgTimer extends PrintStream { /** * Create a new stream wrapping another + * * @param out the stream to wrap */ private TabbingOutputStream(OutputStream out) { @@ -66,6 +69,7 @@ public class DbgTimer extends PrintStream { /** * Start a new (indented) line of output + * * @throws IOException */ protected void startln() throws IOException { @@ -78,6 +82,7 @@ public class DbgTimer extends PrintStream { /** * Workaround: Set the time stack reference + * * @param timeStack the stack */ protected void setTimeStack(Stack timeStack) { @@ -172,20 +177,21 @@ public class DbgTimer extends PrintStream { /** * Start a new, possibly long-running, task - * @param message the message to print when the task begins - * @return a context to close when the task ends * * This is meant to be used idiomatically, as in a try-with-resources block: + * *

-	 * {@code
 	 * try (DbgCtx dc = dbg.start("Twiddling the frobs:")) {
-	 *     // do some classy twiddling
+	 * 	// do some classy twiddling
 	 * } // this will automatically print done and the time elapsed within the try block
-	 * }
 	 * 
* * This idiom is preferred because the task will be stopped even if an error occurs, if the * method returns from within the block, etc. + * + * @param message the message to print when the task begins + * @return a context to close when the task ends + * */ public DbgCtx start(Object message) { println(message); @@ -197,6 +203,7 @@ public class DbgTimer extends PrintStream { /** * Stop the current task * + *

* This will print done and the elapsed time since the start of the task. The "current task" is * determined from the stack. */ @@ -208,6 +215,7 @@ public class DbgTimer extends PrintStream { /** * Replace the wrapped output stream (usually temporarily) + * * @see #resetOutputStream(TabbingOutputStream) * @param s the replacement stream * @return the original stream, wrapped in a tabbing stream @@ -223,6 +231,7 @@ public class DbgTimer extends PrintStream { /** * Put the original tabbing stream back + * * @see #setOutputStream(OutputStream) * @param s the original wrapped stream * @return the replacement stream, wrapped in a tabbing stream diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntry.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntry.java index e6fc5db338..0dc7f3098f 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntry.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntry.java @@ -31,6 +31,7 @@ public class TableEntry extends TableEntryKey { /** * Create a new table entry with the given value at the given state and symbol + * * @param state the row * @param sym the column * @param value the value @@ -42,6 +43,7 @@ public class TableEntry extends TableEntryKey { /** * Get the value of the entry + * * @return the value */ public T getValue() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntryKey.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntryKey.java index ab047b56ff..00da052b36 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntryKey.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/util/TableEntryKey.java @@ -31,6 +31,7 @@ public class TableEntryKey implements Comparable { /** * Create a new key for the given state and symbol + * * @param state the row * @param sym the column */ @@ -79,6 +80,7 @@ public class TableEntryKey implements Comparable { /** * Get the state (row) of the key in the table + * * @return the state */ public int getState() { @@ -87,6 +89,7 @@ public class TableEntryKey implements Comparable { /** * Get the symbol (column) of the entry in the table + * * @return the symbol */ public AssemblySymbol getSym() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/ConstructState.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/ConstructState.java index 1f00f85145..2b5adcc992 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/ConstructState.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/ConstructState.java @@ -21,7 +21,7 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; import generic.hash.SimpleCRC32; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; public class ConstructState { private Constructor ct; @@ -41,7 +41,11 @@ public class ConstructState { return resolvedStates.get(index); } - public void addSubState(ConstructState opState) { + public int getNumSubStates() { + return resolvedStates.size(); + } + + void addSubState(ConstructState opState) { resolvedStates.add(opState); } @@ -100,7 +104,8 @@ public class ConstructState { * encoding * * This includes braces to describe the tree structure - * @see AssemblyResolvedConstructor#dumpConstructorTree() + * + * @see AssemblyResolvedPatterns#dumpConstructorTree() * @return the constructor tree */ public String dumpConstructorTree() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/SleighInstructionPrototype.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/SleighInstructionPrototype.java index 6a22a6c1fd..9ab31edf14 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/SleighInstructionPrototype.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/SleighInstructionPrototype.java @@ -21,7 +21,7 @@ package ghidra.app.plugin.processors.sleigh; import java.util.*; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; import ghidra.app.plugin.processors.sleigh.SleighDebugLogger.SleighDebugMode; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; import ghidra.app.plugin.processors.sleigh.symbol.*; @@ -40,10 +40,9 @@ import ghidra.util.exception.NotYetImplementedException; /** * * - * The InstructionPrototype for sleigh languages. - * The prototype is unique up to the tree of Constructors. - * Variations in the bit pattern that none of the Constructor - * mask/values care about get lumped under the same prototype + * The InstructionPrototype for sleigh languages. The prototype is unique up to the tree of + * Constructors. Variations in the bit pattern that none of the Constructor mask/values care about + * get lumped under the same prototype */ public class SleighInstructionPrototype implements InstructionPrototype { // Flowflags for resolving flowType @@ -126,9 +125,8 @@ public class SleighInstructionPrototype implements InstructionPrototype { } /** - * Cache the Constructor state which represents the base - * mnemonic, and the operands to that mnemonic - * Cache the operand states for each operand in printing order + * Cache the Constructor state which represents the base mnemonic, and the operands to that + * mnemonic Cache the operand states for each operand in printing order */ private void cacheMnemonicState() { mnemonicState = rootState; @@ -191,8 +189,8 @@ public class SleighInstructionPrototype implements InstructionPrototype { } /** - * Walk the pcode templates in the order they would be emitted. - * Collect flowFlags FlowRecords + * Walk the pcode templates in the order they would be emitted. Collect flowFlags FlowRecords + * * @param walker the pcode template walker */ public static FlowSummary walkTemplates(OpTplWalker walker) { @@ -286,8 +284,8 @@ public class SleighInstructionPrototype implements InstructionPrototype { } /** - * Walk the Constructor tree gathering ConstructStates which are flow destinations (flowStateList) - * flowFlags and delayslot directives + * Walk the Constructor tree gathering ConstructStates which are flow destinations + * (flowStateList) flowFlags and delayslot directives */ private void cacheTreeInfo() { OpTplWalker walker = new OpTplWalker(rootState, -1); @@ -631,7 +629,9 @@ public class SleighInstructionPrototype implements InstructionPrototype { } /** - * Gather all the flow records (perhaps across multiple InstructionPrototypes via crossbuilds) and convert to Addresses + * Gather all the flow records (perhaps across multiple InstructionPrototypes via crossbuilds) + * and convert to Addresses + * * @param res is the resulting flow Addresses * @param parsecontext is the parsing context for the current instruction * @param context is the context for the particular address so crossbuilds can be resolved @@ -1458,9 +1458,11 @@ public class SleighInstructionPrototype implements InstructionPrototype { } /** - * Reconstruct the ParserContext's internal packed context array and its list of global ContextSet directives - * by walking a previously resolved ConstructState tree - * @param protoContext is the SleighParserContext containing the tree and holding the context results + * Reconstruct the ParserContext's internal packed context array and its list of global + * ContextSet directives by walking a previously resolved ConstructState tree + * + * @param protoContext is the SleighParserContext containing the tree and holding the context + * results * @param debug * @throws MemoryAccessException */ @@ -1589,7 +1591,7 @@ public class SleighInstructionPrototype implements InstructionPrototype { return newContext; } - ConstructState getRootState() { + public ConstructState getRootState() { return rootState; } @@ -1607,7 +1609,8 @@ public class SleighInstructionPrototype implements InstructionPrototype { * encoding * * This includes braces to describe the tree structure - * @see AssemblyResolvedConstructor#dumpConstructorTree() + * + * @see AssemblyResolvedPatterns#dumpConstructorTree() * @return the constructor tree */ public String dumpConstructorTree() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/template/HandleTpl.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/template/HandleTpl.java index 8e43e9b8f2..30a70e7944 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/template/HandleTpl.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/processors/sleigh/template/HandleTpl.java @@ -23,14 +23,13 @@ import ghidra.app.plugin.processors.sleigh.FixedHandle; import ghidra.app.plugin.processors.sleigh.ParserWalker; import ghidra.program.model.address.AddressFactory; import ghidra.program.model.address.AddressSpace; +import ghidra.program.model.lang.InstructionContext; import ghidra.xml.XmlElement; import ghidra.xml.XmlPullParser; /** - * - * - * Placeholder that resolves for a specific InstructionContext into - * a FixedHandle representing the semantic value of a Constructor + * Placeholder that resolves for a specific {@link InstructionContext} into a {@link FixedHandle} + * representing the semantic value of a {@link Constructor} */ public class HandleTpl { @@ -131,6 +130,7 @@ public class HandleTpl { /** * Get the size of the expected value in bits + * * @return the number of bits */ public int getSize() { @@ -144,4 +144,13 @@ public class HandleTpl { return space.getSpaceId().getSize(); } } + + /** + * Get the address space of the value, if applicable + * + * @return the address space, or null if not applicable + */ + public AddressSpace getAddressSpace() { + return space.getSpaceId(); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AARCH64BEAssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AARCH64BEAssemblyTest.java index 7881fd60fc..a5680eea98 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AARCH64BEAssemblyTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AARCH64BEAssemblyTest.java @@ -99,6 +99,18 @@ public class AARCH64BEAssemblyTest extends AbstractAssemblyTest { public void testAssemble_mov_x0_0x8() { assertOneCompatRestExact("mov x0,#0x8", "00:01:80:d2"); } + + @Test + public void testAssemble_mov_x2_0x0() { + // NB: 0 is special here because immediates include a shift. 0 can have any shift + assertOneCompatRestExact("mov x2,#0x0", "02:00:80:d2"); + } + + @Test + public void testAssemble_mov_x1_n0x1() { + // NB: This uses ~(imm16 << (aa_hw * 16)), so -1 becomes 0 when solving the shift + assertOneCompatRestExact("mov x1,#-0x1", "01:00:80:92"); + } @Test public void testAssemble_sbfiz_x1_x2_0x2_0x20() { diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AVR32AssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AVR32AssemblyTest.java index 338139c945..aa4da73840 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AVR32AssemblyTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AVR32AssemblyTest.java @@ -34,4 +34,14 @@ public class AVR32AssemblyTest extends AbstractAssemblyTest { public void testAssemble_STM_nnSP_R7_LR() { assertOneCompatRestExact("STM --SP,R7,LR", "eb:cd:40:80"); } + + @Test + public void testAssemble_LDDSP_PC_SP_m0x38() { + assertOneCompatRestExact("LDDSP PC,SP[0x38]", "40:ef"); + } + + @Test + public void testAssemble_STDSP_SP_m0xem_R10() { + assertOneCompatRestExact("STDSP SP[0xe],R10", "50:ea"); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AbstractAssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AbstractAssemblyTest.java index 77914b0229..6b526504dd 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AbstractAssemblyTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AbstractAssemblyTest.java @@ -164,13 +164,13 @@ public abstract class AbstractAssemblyTest extends AbstractGenericTest { AssemblyPatternBlock ins = AssemblyPatternBlock.fromString(instr); dbg.println("Checking against: " + ins); Set errs = new TreeSet<>(); // Display in order, I guess - Set misses = new TreeSet<>(); + Set misses = new TreeSet<>(); for (AssemblyResolution ar : rr) { if (ar.isError()) { errs.add((AssemblyResolvedError) ar); continue; } - AssemblyResolvedConstructor rescon = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rescon = (AssemblyResolvedPatterns) ar; if (ins.getVals().length == rescon.getInstructionLength() && ins.combine(rescon.getInstruction()) != null) { return; @@ -208,14 +208,14 @@ public abstract class AbstractAssemblyTest extends AbstractGenericTest { boolean failedOne = false; Set errs = new TreeSet<>(); // Display in order, I guess. MultiValuedMap misTxtToCons = new TreeSetValuedTreeMap<>(); - MultiValuedMap misTxtConsToRes = + MultiValuedMap misTxtConsToRes = new TreeSetValuedTreeMap<>(); for (AssemblyResolution ar : rr) { if (ar.isError()) { errs.add((AssemblyResolvedError) ar); continue; } - AssemblyResolvedConstructor rcon = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rcon = (AssemblyResolvedPatterns) ar; try { dbg.println(" " + rcon.lineToString()); for (byte[] ins : rcon.possibleInsVals(ctx)) { @@ -241,7 +241,7 @@ public abstract class AbstractAssemblyTest extends AbstractGenericTest { for (String dis : misTxtToCons.keySet()) { dbg.println(" " + dis); for (String cons : misTxtToCons.get(dis)) { - for (AssemblyResolvedConstructor rc : misTxtConsToRes.get(dis + cons)) { + for (AssemblyResolvedPatterns rc : misTxtConsToRes.get(dis + cons)) { dbg.println(" d:" + cons); dbg.println(" a:" + rc.dumpConstructorTree()); dbg.println(rc.toString(" ")); @@ -358,7 +358,7 @@ public abstract class AbstractAssemblyTest extends AbstractGenericTest { } @Override - public AssemblyResolvedConstructor select(AssemblyResolutionResults rr, + public AssemblyResolvedPatterns select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx) throws AssemblySemanticException { if (checkOneCompat) { checkOneCompat(instr, rr); diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AssemblyTestCase.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AssemblyTestCase.java index c6a0429dcc..ffd56c9349 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AssemblyTestCase.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/AssemblyTestCase.java @@ -171,13 +171,13 @@ public abstract class AssemblyTestCase extends AbstractGenericTest { AssemblyPatternBlock ins = AssemblyPatternBlock.fromString(instr); dbg.println("Checking against: " + ins); Set errs = new TreeSet<>(); // Display in order, I guess - Set misses = new TreeSet<>(); + Set misses = new TreeSet<>(); for (AssemblyResolution ar : rr) { if (ar.isError()) { errs.add((AssemblyResolvedError) ar); continue; } - AssemblyResolvedConstructor rescon = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rescon = (AssemblyResolvedPatterns) ar; if (ins.getVals().length == rescon.getInstructionLength() && ins.combine(rescon.getInstruction()) != null) { return; @@ -214,14 +214,14 @@ public abstract class AssemblyTestCase extends AbstractGenericTest { boolean failedOne = false; Set errs = new TreeSet<>(); // Display in order, I guess. MultiValuedMap misTxtToCons = new TreeSetValuedTreeMap<>(); - MultiValuedMap misTxtConsToRes = + MultiValuedMap misTxtConsToRes = new TreeSetValuedTreeMap<>(); for (AssemblyResolution ar : rr) { if (ar.isError()) { errs.add((AssemblyResolvedError) ar); continue; } - AssemblyResolvedConstructor rcon = (AssemblyResolvedConstructor) ar; + AssemblyResolvedPatterns rcon = (AssemblyResolvedPatterns) ar; try { dbg.println(" " + rcon.lineToString()); for (byte[] ins : rcon.possibleInsVals(ctx)) { @@ -247,7 +247,7 @@ public abstract class AssemblyTestCase extends AbstractGenericTest { for (String dis : misTxtToCons.keySet()) { dbg.println(" " + dis); for (String cons : misTxtToCons.get(dis)) { - for (AssemblyResolvedConstructor rc : misTxtConsToRes.get(dis + cons)) { + for (AssemblyResolvedPatterns rc : misTxtConsToRes.get(dis + cons)) { dbg.println(" d:" + cons); dbg.println(" a:" + rc.dumpConstructorTree()); dbg.println(rc.toString(" ")); @@ -363,7 +363,7 @@ public abstract class AssemblyTestCase extends AbstractGenericTest { } @Override - public AssemblyResolvedConstructor select(AssemblyResolutionResults rr, + public AssemblyResolvedPatterns select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx) throws AssemblySemanticException { if (checkOneCompat) { checkOneCompat(instr, rr); diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/MIPSMicroAssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/MIPSMicroAssemblyTest.java index 049af554b1..54269220dd 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/MIPSMicroAssemblyTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/MIPSMicroAssemblyTest.java @@ -37,4 +37,10 @@ public class MIPSMicroAssemblyTest extends AbstractAssemblyTest { assertOneCompatRestExact("movep a1,a2,s1,s2", "84:52", "40:00:00:00", 0x004286a2, "movep a1,a2,s1,s2"); } + + @Test + public void testAssemble_bc1t_0x1_0x0040bdec() { + assertOneCompatRestExact("bc1t 0x1, 0x0040bdec", "43:a4:00:04", "40:00:00:00", 0x0040bde0, + "bc1t 0x1, 0x0040bdec"); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/SolverTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/SolverTest.java index eba1c145cc..8bc8de7bb4 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/SolverTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/SolverTest.java @@ -17,7 +17,6 @@ package ghidra.app.plugin.assembler.sleigh; import static org.junit.Assert.*; -import java.util.ArrayList; import java.util.Collections; import java.util.concurrent.atomic.AtomicReference; @@ -32,13 +31,12 @@ import ghidra.app.plugin.languages.sleigh.SleighLanguages; import ghidra.app.plugin.processors.sleigh.*; import ghidra.app.plugin.processors.sleigh.expression.PatternExpression; import ghidra.app.plugin.processors.sleigh.pattern.DisjointPattern; -import ghidra.app.plugin.processors.sleigh.symbol.*; +import ghidra.app.plugin.processors.sleigh.symbol.SubtableSymbol; import ghidra.app.plugin.processors.sleigh.template.ConstructTpl; import ghidra.app.plugin.processors.sleigh.template.HandleTpl; import ghidra.framework.Application; import ghidra.framework.ApplicationConfiguration; import ghidra.program.model.lang.LanguageID; -import ghidra.program.model.scalar.Scalar; import ghidra.util.Msg; import ghidra.xml.XmlPullParser; import ghidra.xml.XmlPullParserFactory; @@ -180,8 +178,8 @@ public class SolverTest { RecursiveDescentSolver solver = RecursiveDescentSolver.getSolver(); AssemblyResolution res = solver.solve(exp, MaskedLong.fromLong(0x78), Collections.emptyMap(), - Collections.emptyMap(), AssemblyResolution.nop("NOP", null), "Test"); - AssemblyResolution e = AssemblyResolvedConstructor.fromString("ins:X7:X8", "Test", null); + AssemblyResolution.nop("NOP"), "Test"); + AssemblyResolution e = AssemblyResolvedPatterns.fromString("ins:X7:X8", "Test", null); assertEquals(e, res); } @@ -259,103 +257,6 @@ public class SolverTest { assertEquals(16, htpl.getSize()); } - public void testExperimentGetOperandExportSize1() throws Exception { - if (!Application.isInitialized()) { - Application.initializeApplication(new GhidraApplicationLayout(), - new ApplicationConfiguration()); - } - SleighLanguageProvider provider = new SleighLanguageProvider(); - SleighLanguage lang = - (SleighLanguage) provider.getLanguage(new LanguageID("AARCH64:BE:64:v8A")); - AtomicReference consref = new AtomicReference<>(); - SleighLanguages.traverseConstructors(lang, new ConstructorEntryVisitor() { - @Override - public int visit(SubtableSymbol subtable, DisjointPattern pattern, Constructor cons) { - if ("Imm_logical_imm32_operand".equals(subtable.getName())) { - if ("ins:SS:C[00xx]:[x0xx]X:XX:XX".equals(pattern.toString())) { - consref.set(cons); - return FINISHED; - } - } - return CONTINUE; - } - }); - Constructor ct = consref.get(); - ConstructState st = new ConstructState(null) { - @Override - public Constructor getConstructor() { - return ct; - } - }; - int num = ct.getNumOperands(); - for (int i = 0; i < num; i++) { - ConstructState sub = new ConstructState(st); - st.addSubState(sub); - } - SleighParserContext ctx = new SleighParserContext(null, null, null, null); - - ParserWalker walker = new ParserWalker(ctx); - - walker.subTreeState(st); - while (walker.isState()) { - assert ct == walker.getConstructor(); - int oper = walker.getOperand(); - int numoper = ct.getNumOperands(); - while (oper < numoper) { - OperandSymbol sym = ct.getOperand(oper); - walker.pushOperand(oper); - TripleSymbol triple = sym.getDefiningSymbol(); - if (triple != null) { - if (triple instanceof SubtableSymbol) { - break; - } - FixedHandle handle = walker.getParentHandle(); - triple.getFixedHandle(handle, walker); - } - else { // Must be an expression - //PatternExpression patexp = sym.getDefiningExpression(); - //long res = patexp.getValue(walker); - FixedHandle hand = walker.getParentHandle(); - hand.space = lang.getAddressFactory().getConstantSpace(); - hand.offset_space = null; - hand.offset_offset = 0x1010101010101010L; - hand.size = 0; - } - walker.popOperand(); - oper++; - } - if (oper >= numoper) { - ConstructTpl templ = ct.getTempl(); - if (templ != null) { - HandleTpl res = templ.getResult(); - if (res != null) { - res.fix(walker.getParentHandle(), walker); - } - else { - walker.getParentHandle().setInvalid(); - } - } - walker.popOperand(); - } - } - - walker.subTreeState(st); - - walker.subTreeState(st); - ArrayList list = new ArrayList<>(); - ct.printList(walker, list); - for (Object obj : list) { - if (obj instanceof Character) { - System.out.print(obj); - } - else if (obj instanceof FixedHandle) { - FixedHandle handle = (FixedHandle) obj; - System.out.println( - new Scalar(8 * handle.size, handle.offset_offset) + "(" + handle.size + ")"); - } - } - } - @Test public void testInRange() { // Simple case of zero, signed and unsigned diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/TokensTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/TokensTest.java index 2f9b12d841..b3c0aae89c 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/TokensTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/TokensTest.java @@ -26,7 +26,7 @@ import ghidra.app.plugin.assembler.sleigh.tree.AssemblyParseNumericToken; public class TokensTest { @Test public void testNumeric() { - AssemblyNumericTerminal t = new AssemblyNumericTerminal("test", 0); + AssemblyNumericTerminal t = new AssemblyNumericTerminal("test", 0, null); AssemblyParseNumericToken m; diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/dsPIC30FAssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/dsPIC30FAssemblyTest.java new file mode 100644 index 0000000000..e3cd612c28 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/dsPIC30FAssemblyTest.java @@ -0,0 +1,40 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh; + +import org.junit.Ignore; +import org.junit.Test; + +import ghidra.program.model.lang.LanguageID; + +public class dsPIC30FAssemblyTest extends AbstractAssemblyTest { + + @Override + protected LanguageID getLanguageID() { + return new LanguageID("dsPIC30F:LE:24:default"); + } + + @Test + public void testAssemble_call_W0() { + assertOneCompatRestExact("call W0", "00:00:01:00", 0x000100); + } + + @Test + @Ignore("Fails because W4 is a valid label, but the wrong 'size'") + public void testAssemble_clr_b_W4() { + assertOneCompatRestExact("clr.b W4", "00:42:eb:00", 0x000100); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/parse/ParserTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/parse/ParserTest.java index d3f237f542..c94f354ed7 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/parse/ParserTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/parse/ParserTest.java @@ -27,7 +27,7 @@ import org.junit.Test; import ghidra.app.plugin.assembler.sleigh.grammars.*; import ghidra.app.plugin.assembler.sleigh.symbol.*; import ghidra.app.plugin.assembler.sleigh.tree.*; -import ghidra.app.plugin.assembler.sleigh.util.SleighUtil; +import ghidra.app.plugin.assembler.sleigh.util.AsmUtil; import ghidra.util.NullOutputStream; public class ParserTest { @@ -119,10 +119,10 @@ public class ParserTest { // I don't care the state numbers, but I do want to make sure every state is present Comparator> comp = (Set a, - Set b) -> SleighUtil.compareInOrder(a, b); + Set b) -> AsmUtil.compareInOrder(a, b); TreeSet> states = new TreeSet<>(comp); for (AssemblyParseState pstate : parser.states) { - TreeSet state = new TreeSet<>(pstate); + TreeSet state = new TreeSet<>(pstate.getKernel()); states.add(state); } @@ -558,14 +558,14 @@ public class ParserTest { AssemblySentential rhs = new AssemblySentential<>(); for (Object o : objs) { if (o instanceof AssemblySymbol) { - rhs.add((AssemblySymbol) o); + rhs.addSymbol((AssemblySymbol) o); } else if (o instanceof String) { if (" ".equals(o)) { rhs.addWS(); } else { - rhs.add(new AssemblyStringTerminal((String) o)); + rhs.addSymbol(new AssemblyStringTerminal((String) o)); } } else { diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AVX2AssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x64AVX2AssemblyTest.java similarity index 85% rename from Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AVX2AssemblyTest.java rename to Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x64AVX2AssemblyTest.java index ac9a0c3a5b..087c12678e 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AVX2AssemblyTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x64AVX2AssemblyTest.java @@ -20,7 +20,7 @@ import org.junit.Test; import ghidra.program.model.lang.LanguageID; import ghidra.util.Msg; -public class x86AVX2AssemblyTest extends AbstractAssemblyTest { +public class x64AVX2AssemblyTest extends AbstractAssemblyTest { @Override protected LanguageID getLanguageID() { return new LanguageID("x86:LE:64:default"); @@ -36,4 +36,9 @@ public class x86AVX2AssemblyTest extends AbstractAssemblyTest { assertOneCompatRestExact("VMOVSS dword ptr [-0x4 + RBP],XMM0", "c5:fa:11:45:fc"); } } + + @Test + public void testAssemble_VPSHUFD_YMM0_YMM0_0xd8() { + assertOneCompatRestExact("VPSHUFD YMM0,YMM0,0xd8", "c5:fd:70:c0:d8"); + } } diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x64AssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x64AssemblyTest.java new file mode 100644 index 0000000000..569a820f70 --- /dev/null +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x64AssemblyTest.java @@ -0,0 +1,307 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.assembler.sleigh; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import org.junit.Test; + +import ghidra.app.plugin.assembler.*; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; +import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns; +import ghidra.program.model.address.*; +import ghidra.program.model.lang.*; +import ghidra.program.model.mem.MemoryAccessException; +import ghidra.util.Msg; + +public class x64AssemblyTest extends AbstractAssemblyTest { + @Override + protected LanguageID getLanguageID() { + return new LanguageID("x86:LE:64:default"); + } + + @Test + public void testReasonableErrorMessageLength() throws AssemblySemanticException { + Assembler assembler = Assemblers.getAssembler(lang); + Address addr = lang.getDefaultSpace().getAddress(DEFAULT_ADDR); + try { + assembler.assembleLine(addr, "UNLIKELY qword ptr [RAX],RBX"); + fail(); // The exception must be thrown + } + catch (AssemblySyntaxException e) { + Msg.info(this, "Got expected syntax error: " + e); + assertTrue(e.getMessage().length() < 1000); + } + } + + @Test + public void testAssemble_ADD_m0x12_RAXm_RBX() { + // Again, a little odd. Imm8 does not have the I+R form. + try { + assertOneCompatRestExact("ADD qword ptr [RAX + 0x12],RBX", "48:01:98:12:00:00:00"); + } + catch (DisassemblyMismatchException e) { + Msg.warn(this, "Swapping to test case with [I+R] form"); + assertOneCompatRestExact("ADD qword ptr [0x12 + RAX],RBX", "48:01:98:12:00:00:00"); + } + } + + @Test + public void testAssemble_ADD_m0x1234_RAXm_RBX() { + // Once the operand order is changed back, the catch case will not be necessary + try { + assertOneCompatRestExact("ADD qword ptr [RAX + 0x1234],RBX", "48:01:98:34:12:00:00"); + } + catch (DisassemblyMismatchException e) { + Msg.warn(this, "Swapping to test case with [I+R] form"); + assertOneCompatRestExact("ADD qword ptr [0x1234 + RAX],RBX", "48:01:98:34:12:00:00"); + } + } + + //@Test + //@Ignore("Can no longer isolate Imm8 case as [R+I]") + public void testAssemble_ADD_mRAX_0x1234m_RBX() { + // The spec is a little odd: only imm8 has R+I form. Others are I+R. + assertAllSemanticErrors("ADD qword ptr [RAX+0x1234],RBX"); + } + + @Test + public void testAssemble_ADD_mRAX_0x12m_RBX() { + assertOneCompatRestExact("ADD qword ptr [RAX + 0x12],RBX", "48:01:58:12"); + } + + @Test + public void testAssemble_ADD_mRAX_127m_EBX() { + assertOneCompatRestExact("ADD dword ptr [RAX+127], EBX", "01:58:7f", + "ADD dword ptr [RAX + 0x7f],EBX"); + } + + @Test + public void testAssemble_ADD_mRAX_127m_RBX() { + assertOneCompatRestExact("ADD qword ptr [RAX+127], RBX", "48:01:58:7f", + "ADD qword ptr [RAX + 0x7f],RBX"); + } + + //@Test + //@Ignore("Can no longer isolate Imm8 case as [R+I]") + public void testAssemble_ADD_mRAX_128m_RBX() { + assertAllSemanticErrors("ADD qword ptr [RAX+128],RBX"); + } + + @Test + public void testAssemble_ADD_mRAX_n0x12m_RBX() { + assertOneCompatRestExact("ADD qword ptr [RAX + -0x12],RBX", "48:01:58:ee"); + } + + @Test + public void testAssemble_ADD_mRAX_nx0x12m_RBX() { + assertAllSyntaxErrors("ADD [RAX-0x12],RBX"); + } + + @Test + public void testAssemble_ADD_mRAXm_RBX() { + assertOneCompatRestExact("ADD qword ptr [RAX],RBX", "48:01:18"); + } + + @Test + public void testAssemble_ADD_mRBXm_BL() { + assertOneCompatRestExact("ADD byte ptr [RBX],BL", "48:00:1b"); + } + + @Test + public void testAssemble_ADD_mRDX_RSI__0x04m_EBX() { + assertOneCompatRestExact("ADD dword ptr [RDX+RSI*4], EBX", "01:1c:b2", + "ADD dword ptr [RDX + RSI*0x4],EBX"); + } + + @Test + public void testAssemble_ADD_RAX_mRDI_RDX__0x08m() { + assertOneCompatRestExact("ADD RAX, qword ptr [RDI+RDX*8]", "48:03:04:d7", + "ADD RAX,qword ptr [RDI + RDX*0x8]"); + } + + @Test + public void testAssemble_ADD_RSP_0x8() { + assertOneCompatRestExact("ADD RSP,0x8", "48:83:c4:08"); + } + + @Test + public void testAssemble_AND_EAX_0x80808080() { + assertOneCompatRestExact("AND EAX,0x80808080", "25:80:80:80:80"); + } + + @Test + public void testAssemble_AND_RSP_n0x10() { + assertOneCompatRestExact("AND RSP,-0x10", "48:83:e4:f0"); + } + + @Test + public void testAssemble_CMP_byte_ptr_m0x006dbeefm_0() { + assertOneCompatRestExact("CMP byte ptr [0x006dbeef],0", "80:3d:e8:be:6d:c0:00", + "CMP byte ptr [0x006dbeef],0x0"); + } + + @Test + public void testAssemble_CMP_byte_ptr_mRBPm_0x0() { + assertOneCompatRestExact("CMP byte ptr [RBP],0x0", "80:7d:00:00"); + } + + @Test + public void testAssemble_JG_0x00400047() { + assertOneCompatRestExact("JG 0x00400047", "7f:45", 0x00400000); + } + + @Test + public void testAssemble_JMP_0x34() { + assertOneCompatRestExact("JMP 0x34", "e9:2f:00:00:c0", "JMP 0x00000034"); + } + + @Test + public void testAssemble_MOV_RAX_FSm0x28m() { + // 1823[1834[3232[1141[970[944,928[845]]],774]]] + assertOneCompatRestExact("MOV RAX,qword ptr FS:[0x28]", "64:48:8b:04:25:28:00:00:00"); + } + + @Test + public void testAssemble_MOV_RBX_mRSP_0x8m() { + assertOneCompatRestExact("MOV RBX,qword ptr [RSP + 0x8]", "48:8b:5c:24:08"); + } + + @Test + public void testAssemble_MOV_RCX_mR12m() { + assertOneCompatRestExact("MOV RCX,qword ptr [R12]", "49:8b:0c:24"); + } + + @Test + public void testAssemble_MOV_mRBXm_R14W() { + /* + * Constructor Line #'s: instruction(1825), instruction(1835), MOV(3221), rm16(1128), + * Mem(969), segWide(939), addr64(918), Rmr64(791), Reg16(771) + */ + assertOneCompatRestExact("MOV word ptr [RBX],R14W", "66:44:89:33"); + } + + @Test + public void testAssemble_MOV_mRSP_n0x10m_RBX() { + assertOneCompatRestExact("MOV qword ptr [RSP + -0x10],RBX", "48:89:5c:24:f0"); + } + + @Test + public void testAssemble_NOP() { + assertOneCompatRestExact("NOP", "90"); + } + + @Test + public void testAssemble_NOP_CS_mRAX_RAX__0x1m() { + assertOneCompatRestExact("NOP word ptr CS:[RAX + RAX*0x1]", + "66:2e:0f:1f:84:00:00:00:00:00"); + } + + @Test + public void testAssemble_PUSH_RAX() { + assertOneCompatRestExact("PUSH RAX", "50"); + } + + public void testAssemble_POP_RBX() { + assertOneCompatRestExact("POP RBX", "5b"); + } + + @Test + public void testAssembly_SAR_RBX_1() { + assertOneCompatRestExact("SAR RBX,1", "48:d1:fb", "SAR RBX,1", "SAR RBX,0x1"); + } + + @Test + public void testAssembly_SAR_DL_1() { + assertOneCompatRestExact("SAR DL,1", "d0:fa", "SAR DL,1", "SAR DL,0x1"); + } + + @Test + public void testAssemble_SCASB_RDI() { + assertOneCompatRestExact("SCASB RDI", "ae"); + } + + @Test + public void testAssemble_SCASB_REPE_RDI() { + assertOneCompatRestExact("SCASB.REPE RDI", "f3:ae"); + } + + @Test + public void testAssemble_SCASB_REPNE_RDI() { + assertOneCompatRestExact("SCASB.REPNE RDI", "f2:ae"); + } + + @Test + public void testAssembly_SHR_R13D_1() { + assertOneCompatRestExact("SHR R13D,1", "41:d1:ed", "SHR R13D,1", "SHR R13D,0x1"); + } + + @Test + public void testAssemble_SUB_RSP_0x8() { + assertOneCompatRestExact("SUB RSP,0x8", "48:83:ec:08"); + } + + @Test + public void testAssemble_CVTSI2SD_XMM12_EDX() { + assertOneCompatRestExact("CVTSI2SD XMM12,EDX", "f2:44:0f:2a:e2"); + } + + @Test + public void testAssembly_CALL_0x0041bb80() { + assertOneCompatRestExact("CALL 0x0041bb80", "e8:5f:ba:01:00", 0x0040011c); + } + + @Test + public void testAssembly_AND_mRBP_n0x8m_0xffff0000() { + assertOneCompatRestExact("AND qword ptr [RBP + -0x8],-0x10000", + "48:81:65:f8:00:00:ff:ff"); + } + + //@Ignore("This is a demonstration of an issue with signedness and scalar print pieces.") + //@Test + public void testAssembly_AND_mRBP_n0x8m_0x80() + throws AssemblySyntaxException, AddressOutOfBoundsException, InsufficientBytesException, + UnknownInstructionException, AddressOverflowException, MemoryAccessException { + Assembler assembler = Assemblers.getAssembler(lang); + Address at = lang.getDefaultSpace().getAddress(0x00400000); + for (AssemblyResolution rr : assembler.resolveLine(at, "AND [RBP + -0x8],-0x80")) { + if (rr.isError()) { + //AssemblyResolvedError err = (AssemblyResolvedError) rr; + //System.out.println(err.getError()); + } + else { + AssemblyResolvedPatterns rc = (AssemblyResolvedPatterns) rr; + System.out.print(rc.getInstruction().fillMask()); + System.out.print(" "); + System.out.println(disassemble(0x00400000, rc.getInstruction().getVals(), + assembler.getContextAt(at).getVals())); + } + } + } + + @Test + public void testSuggest_ADD() { + assertAllSyntaxErrors("ADD"); + assertAllSyntaxErrors("ADD "); + assertAllSyntaxErrors("ADD ["); + } + + @Test + public void testAssemblyCompat32_DEC_EAX() { + assertOneCompatRestExact("DEC EAX", "48", "09:00:00:00", 0x00400000, "DEC EAX"); + } +} diff --git a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AssemblyTest.java b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AssemblyTest.java index b080127dd2..9ad07679c1 100644 --- a/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AssemblyTest.java +++ b/Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/x86AssemblyTest.java @@ -15,288 +15,27 @@ */ package ghidra.app.plugin.assembler.sleigh; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - +import org.junit.Ignore; import org.junit.Test; -import ghidra.app.plugin.assembler.*; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolution; -import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedConstructor; -import ghidra.program.model.address.*; -import ghidra.program.model.lang.*; -import ghidra.program.model.mem.MemoryAccessException; +import ghidra.program.model.lang.LanguageID; import ghidra.util.Msg; public class x86AssemblyTest extends AbstractAssemblyTest { @Override protected LanguageID getLanguageID() { - return new LanguageID("x86:LE:64:default"); + return new LanguageID("x86:LE:32:default"); } @Test - public void testReasonableErrorMessageLength() throws AssemblySemanticException { - Assembler assembler = Assemblers.getAssembler(lang); - Address addr = lang.getDefaultSpace().getAddress(DEFAULT_ADDR); + @Ignore("Some results are disassembled with + 0xfffffff8 instead. Wrong but harmless here") + public void testAssemble_ADD_ECX_mEBX_n0x8m() { try { - assembler.assembleLine(addr, "UNLIKELY qword ptr [RAX],RBX"); - fail(); // The exception must be thrown - } - catch (AssemblySyntaxException e) { - Msg.info(this, "Got expected syntax error: " + e); - assertTrue(e.getMessage().length() < 1000); - } - } - - @Test - public void testAssemble_ADD_m0x12_RAXm_RBX() { - // Again, a little odd. Imm8 does not have the I+R form. - try { - assertOneCompatRestExact("ADD qword ptr [RAX + 0x12],RBX", "48:01:98:12:00:00:00"); + assertOneCompatRestExact("ADD ECX,dword ptr [EDX + -0x8]", "03:4a:f8"); } catch (DisassemblyMismatchException e) { Msg.warn(this, "Swapping to test case with [I+R] form"); - assertOneCompatRestExact("ADD qword ptr [0x12 + RAX],RBX", "48:01:98:12:00:00:00"); + assertOneCompatRestExact("ADD ECX,dword ptr [-0x8 + EDX]", "03:4a:f8"); } } - - @Test - public void testAssemble_ADD_m0x1234_RAXm_RBX() { - // Once the operand order is changed back, the catch case will not be necessary - try { - assertOneCompatRestExact("ADD qword ptr [RAX + 0x1234],RBX", "48:01:98:34:12:00:00"); - } - catch (DisassemblyMismatchException e) { - Msg.warn(this, "Swapping to test case with [I+R] form"); - assertOneCompatRestExact("ADD qword ptr [0x1234 + RAX],RBX", "48:01:98:34:12:00:00"); - } - } - - //@Test - //@Ignore("Can no longer isolate Imm8 case as [R+I]") - public void testAssemble_ADD_mRAX_0x1234m_RBX() { - // The spec is a little odd: only imm8 has R+I form. Others are I+R. - assertAllSemanticErrors("ADD qword ptr [RAX+0x1234],RBX"); - } - - @Test - public void testAssemble_ADD_mRAX_0x12m_RBX() { - assertOneCompatRestExact("ADD qword ptr [RAX + 0x12],RBX", "48:01:58:12"); - } - - @Test - public void testAssemble_ADD_mRAX_127m_EBX() { - assertOneCompatRestExact("ADD dword ptr [RAX+127], EBX", "01:58:7f", - "ADD dword ptr [RAX + 0x7f],EBX"); - } - - @Test - public void testAssemble_ADD_mRAX_127m_RBX() { - assertOneCompatRestExact("ADD qword ptr [RAX+127], RBX", "48:01:58:7f", - "ADD qword ptr [RAX + 0x7f],RBX"); - } - - //@Test - //@Ignore("Can no longer isolate Imm8 case as [R+I]") - public void testAssemble_ADD_mRAX_128m_RBX() { - assertAllSemanticErrors("ADD qword ptr [RAX+128],RBX"); - } - - @Test - public void testAssemble_ADD_mRAX_n0x12m_RBX() { - assertOneCompatRestExact("ADD qword ptr [RAX + -0x12],RBX", "48:01:58:ee"); - } - - @Test - public void testAssemble_ADD_mRAX_nx0x12m_RBX() { - assertAllSyntaxErrors("ADD [RAX-0x12],RBX"); - } - - @Test - public void testAssemble_ADD_mRAXm_RBX() { - assertOneCompatRestExact("ADD qword ptr [RAX],RBX", "48:01:18"); - } - - @Test - public void testAssemble_ADD_mRBXm_BL() { - assertOneCompatRestExact("ADD byte ptr [RBX],BL", "48:00:1b"); - } - - @Test - public void testAssemble_ADD_mRDX_RSI__0x04m_EBX() { - assertOneCompatRestExact("ADD dword ptr [RDX+RSI*4], EBX", "01:1c:b2", - "ADD dword ptr [RDX + RSI*0x4],EBX"); - } - - @Test - public void testAssemble_ADD_RAX_mRDI_RDX__0x08m() { - assertOneCompatRestExact("ADD RAX, qword ptr [RDI+RDX*8]", "48:03:04:d7", - "ADD RAX,qword ptr [RDI + RDX*0x8]"); - } - - @Test - public void testAssemble_ADD_RSP_0x8() { - assertOneCompatRestExact("ADD RSP,0x8", "48:83:c4:08"); - } - - @Test - public void testAssemble_AND_EAX_0x80808080() { - assertOneCompatRestExact("AND EAX,0x80808080", "25:80:80:80:80"); - } - - @Test - public void testAssemble_AND_RSP_n0x10() { - assertOneCompatRestExact("AND RSP,-0x10", "48:83:e4:f0"); - } - - @Test - public void testAssemble_CMP_byte_ptr_m0x006dbeefm_0() { - assertOneCompatRestExact("CMP byte ptr [0x006dbeef],0", "80:3d:e8:be:6d:c0:00", - "CMP byte ptr [0x006dbeef],0x0"); - } - - @Test - public void testAssemble_CMP_byte_ptr_mRBPm_0x0() { - assertOneCompatRestExact("CMP byte ptr [RBP],0x0", "80:7d:00:00"); - } - - @Test - public void testAssemble_JG_0x00400047() { - assertOneCompatRestExact("JG 0x00400047", "7f:45", 0x00400000); - } - - @Test - public void testAssemble_JMP_0x34() { - assertOneCompatRestExact("JMP 0x34", "e9:2f:00:00:c0", "JMP 0x00000034"); - } - - @Test - public void testAssemble_MOV_RAX_FSm0x28m() { - // 1823[1834[3232[1141[970[944,928[845]]],774]]] - assertOneCompatRestExact("MOV RAX,qword ptr FS:[0x28]", "64:48:8b:04:25:28:00:00:00"); - } - - @Test - public void testAssemble_MOV_RBX_mRSP_0x8m() { - assertOneCompatRestExact("MOV RBX,qword ptr [RSP + 0x8]", "48:8b:5c:24:08"); - } - - @Test - public void testAssemble_MOV_RCX_mR12m() { - assertOneCompatRestExact("MOV RCX,qword ptr [R12]", "49:8b:0c:24"); - } - - @Test - public void testAssemble_MOV_mRBXm_R14W() { - /* - * Constructor Line #'s: instruction(1825), instruction(1835), MOV(3221), rm16(1128), - * Mem(969), segWide(939), addr64(918), Rmr64(791), Reg16(771) - */ - assertOneCompatRestExact("MOV word ptr [RBX],R14W", "66:44:89:33"); - } - - @Test - public void testAssemble_MOV_mRSP_n0x10m_RBX() { - assertOneCompatRestExact("MOV qword ptr [RSP + -0x10],RBX", "48:89:5c:24:f0"); - } - - @Test - public void testAssemble_NOP() { - assertOneCompatRestExact("NOP", "90"); - } - - @Test - public void testAssemble_NOP_CS_mRAX_RAX__0x1m() { - assertOneCompatRestExact("NOP word ptr CS:[RAX + RAX*0x1]", - "66:2e:0f:1f:84:00:00:00:00:00"); - } - - @Test - public void testAssemble_PUSH_RAX() { - assertOneCompatRestExact("PUSH RAX", "50"); - } - - public void testAssemble_POP_RBX() { - assertOneCompatRestExact("POP RBX", "5b"); - } - - @Test - public void testAssembly_SAR_RBX_1() { - assertOneCompatRestExact("SAR RBX,1", "48:d1:fb", "SAR RBX,1", "SAR RBX,0x1"); - } - - @Test - public void testAssembly_SAR_DL_1() { - assertOneCompatRestExact("SAR DL,1", "d0:fa", "SAR DL,1", "SAR DL,0x1"); - } - - @Test - public void testAssemble_SCASB_RDI() { - assertOneCompatRestExact("SCASB RDI", "ae"); - } - - @Test - public void testAssemble_SCASB_REPE_RDI() { - assertOneCompatRestExact("SCASB.REPE RDI", "f3:ae"); - } - - @Test - public void testAssemble_SCASB_REPNE_RDI() { - assertOneCompatRestExact("SCASB.REPNE RDI", "f2:ae"); - } - - @Test - public void testAssembly_SHR_R13D_1() { - assertOneCompatRestExact("SHR R13D,1", "41:d1:ed", "SHR R13D,1", "SHR R13D,0x1"); - } - - @Test - public void testAssemble_SUB_RSP_0x8() { - assertOneCompatRestExact("SUB RSP,0x8", "48:83:ec:08"); - } - - @Test - public void testAssemble_CVTSI2SD_XMM12_EDX() { - assertOneCompatRestExact("CVTSI2SD XMM12,EDX", "f2:44:0f:2a:e2"); - } - - @Test - public void testAssembly_CALL_0x0041bb80() { - assertOneCompatRestExact("CALL 0x0041bb80", "e8:5f:ba:01:00", 0x0040011c); - } - - @Test - public void testAssembly_AND_mRBP_n0x8m_0xffff0000() { - assertOneCompatRestExact("AND qword ptr [RBP + -0x8],-0x10000", - "48:81:65:f8:00:00:ff:ff"); - } - - //@Ignore("This is a demonstration of an issue with signedness and scalar print pieces.") - //@Test - public void testAssembly_AND_mRBP_n0x8m_0x80() - throws AssemblySyntaxException, AddressOutOfBoundsException, InsufficientBytesException, - UnknownInstructionException, AddressOverflowException, MemoryAccessException { - Assembler assembler = Assemblers.getAssembler(lang); - Address at = lang.getDefaultSpace().getAddress(0x00400000); - for (AssemblyResolution rr : assembler.resolveLine(at, "AND [RBP + -0x8],-0x80")) { - if (rr.isError()) { - //AssemblyResolvedError err = (AssemblyResolvedError) rr; - //System.out.println(err.getError()); - } - else { - AssemblyResolvedConstructor rc = (AssemblyResolvedConstructor) rr; - System.out.print(rc.getInstruction().fillMask()); - System.out.print(" "); - System.out.println(disassemble(0x00400000, rc.getInstruction().getVals(), - assembler.getContextAt(at).getVals())); - } - } - } - - @Test - public void testSuggest_ADD() { - assertAllSyntaxErrors("ADD"); - assertAllSyntaxErrors("ADD "); - assertAllSyntaxErrors("ADD ["); - } } diff --git a/Ghidra/Processors/Atmel/data/languages/avr32a.pspec b/Ghidra/Processors/Atmel/data/languages/avr32a.pspec index 696dbb45d7..52ebccab53 100644 --- a/Ghidra/Processors/Atmel/data/languages/avr32a.pspec +++ b/Ghidra/Processors/Atmel/data/languages/avr32a.pspec @@ -2,7 +2,7 @@ - + diff --git a/Ghidra/Processors/PIC/data/languages/PIC24.pspec b/Ghidra/Processors/PIC/data/languages/PIC24.pspec index 0357e6bb6a..f132054c40 100644 --- a/Ghidra/Processors/PIC/data/languages/PIC24.pspec +++ b/Ghidra/Processors/PIC/data/languages/PIC24.pspec @@ -2,8 +2,7 @@ - - + diff --git a/Ghidra/Processors/x86/data/languages/x86.pspec b/Ghidra/Processors/x86/data/languages/x86.pspec index fbd58ffbf1..595d1e2da4 100644 --- a/Ghidra/Processors/x86/data/languages/x86.pspec +++ b/Ghidra/Processors/x86/data/languages/x86.pspec @@ -3,7 +3,7 @@ - +