From e7458ed08ba8d7b5b230c4a4bec8bfb0c39cee4c Mon Sep 17 00:00:00 2001
From: Dan <46821332+nsadeveloper789@users.noreply.github.com>
Date: Fri, 3 Nov 2023 17:38:07 -0400
Subject: [PATCH] GP-4185: Make Assembler more extensible
---
.../core/debug/stack/StackUnwinderTest.java | 7 +-
.../AssemblyThrasherDevScript.java | 4 +-
.../app/plugin/assembler/Assembler.java | 201 +----
.../plugin/assembler/AssemblerBuilder.java | 34 +-
.../plugin/assembler/AssemblySelector.java | 19 +-
.../plugin/assembler/GenericAssembler.java | 218 +++++
.../assembler/GenericAssemblerBuilder.java | 55 ++
.../sleigh/AbstractSleighAssembler.java | 279 ++++++
.../AbstractSleighAssemblerBuilder.java | 397 ++++++++
.../assembler/sleigh/SleighAssembler.java | 242 +----
.../sleigh/SleighAssemblerBuilder.java | 350 +------
.../expr/AbstractBinaryExpressionSolver.java | 52 +-
.../sleigh/expr/AbstractExpressionSolver.java | 9 +-
.../expr/AbstractUnaryExpressionSolver.java | 13 +-
.../sleigh/expr/ConstantValueSolver.java | 22 +-
.../sleigh/expr/ContextFieldSolver.java | 13 +-
.../expr/EndInstructionValueSolver.java | 18 +-
.../expr/LeftShiftExpressionSolver.java | 24 +-
.../sleigh/expr/MultExpressionSolver.java | 35 +-
.../expr/Next2InstructionValueSolver.java | 7 +-
.../sleigh/expr/OperandValueSolver.java | 28 +-
.../sleigh/expr/OrExpressionSolver.java | 49 +-
.../sleigh/expr/RecursiveDescentSolver.java | 21 +-
.../expr/RightShiftExpressionSolver.java | 20 +-
.../expr/StartInstructionValueSolver.java | 6 +-
.../sleigh/expr/TokenFieldSolver.java | 13 +-
.../sleigh/grammars/AssemblyGrammar.java | 12 +-
.../sleigh/grammars/AssemblyProduction.java | 4 +
.../sleigh/grammars/AssemblySentential.java | 11 +-
.../sem/AbstractAssemblyResolution.java | 170 ++++
.../AbstractAssemblyResolutionFactory.java | 442 +++++++++
.../sleigh/sem/AbstractAssemblyState.java | 29 +-
.../sem/AbstractAssemblyStateGenerator.java | 10 +-
.../sem/AbstractAssemblyTreeResolver.java | 527 +++++++++++
.../sleigh/sem/AssemblyConstructState.java | 10 +-
.../sem/AssemblyConstructStateGenerator.java | 16 +-
.../sem/AssemblyConstructorSemantic.java | 24 +-
.../sleigh/sem/AssemblyContextGraph.java | 10 +-
...AssemblyHiddenConstructStateGenerator.java | 18 +-
.../sleigh/sem/AssemblyNopState.java | 32 +-
.../sleigh/sem/AssemblyNopStateGenerator.java | 9 +-
.../sleigh/sem/AssemblyOperandState.java | 53 +-
.../sem/AssemblyOperandStateGenerator.java | 4 +-
.../sleigh/sem/AssemblyPatternBlock.java | 156 +++-
.../sleigh/sem/AssemblyResolution.java | 343 +------
.../sleigh/sem/AssemblyResolutionResults.java | 52 +-
.../sleigh/sem/AssemblyResolvedBackfill.java | 111 +--
.../sleigh/sem/AssemblyResolvedError.java | 79 +-
.../sleigh/sem/AssemblyResolvedPatterns.java | 853 ++++--------------
.../sem/AssemblyStringStateGenerator.java | 39 +
.../sleigh/sem/AssemblyTreeResolver.java | 550 +----------
.../sem/DefaultAssemblyResolutionFactory.java | 30 +
.../sem/DefaultAssemblyResolvedBackfill.java | 158 ++++
.../sem/DefaultAssemblyResolvedError.java | 111 +++
.../sem/DefaultAssemblyResolvedPatterns.java | 775 ++++++++++++++++
.../symbol/AssemblyFixedNumericTerminal.java | 6 +-
.../symbol/AssemblyNumericMapTerminal.java | 4 +
.../symbol/AssemblyNumericTerminal.java | 4 +
.../symbol/AssemblyStringMapTerminal.java | 7 +-
.../sleigh/symbol/AssemblyStringTerminal.java | 20 +-
.../sleigh/tree/AssemblyParseBranch.java | 3 +-
.../sleigh/tree/AssemblyParseHiddenNode.java | 42 +
.../tree/AssemblyParseNumericToken.java | 5 -
.../sleigh/tree/AssemblyParseTreeNode.java | 18 -
.../assembler/sleigh/ARMAssemblyTest.java | 7 +-
.../sleigh/AbstractAssemblyTest.java | 16 +-
.../assembler/sleigh/AssemblyTestCase.java | 6 +-
.../assembler/sleigh/MIPSAssemblyTest.java | 15 +
.../plugin/assembler/sleigh/SolverTest.java | 123 ++-
.../assembler/sleigh/parse/ParserTest.java | 26 +-
.../sleigh/sem/AssemblyPatternBlockTest.java | 30 +
.../assembler/sleigh/x86AssemblyTest.java | 10 +
72 files changed, 4303 insertions(+), 2813 deletions(-)
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/GenericAssembler.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/GenericAssemblerBuilder.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/AbstractSleighAssembler.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/AbstractSleighAssemblerBuilder.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyResolution.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyResolutionFactory.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AbstractAssemblyTreeResolver.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyStringStateGenerator.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/DefaultAssemblyResolutionFactory.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/DefaultAssemblyResolvedBackfill.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/DefaultAssemblyResolvedError.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/sem/DefaultAssemblyResolvedPatterns.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/sleigh/tree/AssemblyParseHiddenNode.java
create mode 100644 Ghidra/Framework/SoftwareModeling/src/test/java/ghidra/app/plugin/assembler/sleigh/sem/AssemblyPatternBlockTest.java
diff --git a/Ghidra/Debug/Debugger/src/test/java/ghidra/app/plugin/core/debug/stack/StackUnwinderTest.java b/Ghidra/Debug/Debugger/src/test/java/ghidra/app/plugin/core/debug/stack/StackUnwinderTest.java
index ca28ab6fe9..534d1b1a4e 100644
--- a/Ghidra/Debug/Debugger/src/test/java/ghidra/app/plugin/core/debug/stack/StackUnwinderTest.java
+++ b/Ghidra/Debug/Debugger/src/test/java/ghidra/app/plugin/core/debug/stack/StackUnwinderTest.java
@@ -85,8 +85,8 @@ public class StackUnwinderTest extends AbstractGhidraHeadedDebuggerTest {
public static final AssemblySelector NO_16BIT_CALLS = new AssemblySelector() {
@Override
- public AssemblyResolvedPatterns select(AssemblyResolutionResults rr,
- AssemblyPatternBlock ctx) throws AssemblySemanticException {
+ public Selection select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx)
+ throws AssemblySemanticException {
for (AssemblyResolvedPatterns res : filterCompatibleAndSort(rr, ctx)) {
byte[] ins = res.getInstruction().getVals();
// HACK to avoid 16-bit CALL.... TODO: Why does this happen?
@@ -95,8 +95,7 @@ public class StackUnwinderTest extends AbstractGhidraHeadedDebuggerTest {
"Filtered 16-bit call " + NumericUtilities.convertBytesToString(ins));
continue;
}
- return AssemblyResolution.resolved(res.getInstruction().fillMask(),
- res.getContext(), "Selected", null, null, null);
+ return new Selection(res.getInstruction().fillMask(), res.getContext());
}
throw new AssemblySemanticException(semanticErrors);
}
diff --git a/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java b/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java
index ea96dff9b4..e5f6806b42 100644
--- a/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java
+++ b/Ghidra/Features/Base/ghidra_scripts/AssemblyThrasherDevScript.java
@@ -63,8 +63,8 @@ public class AssemblyThrasherDevScript extends GhidraScript {
}
@Override
- public AssemblyResolvedPatterns select(AssemblyResolutionResults rr,
- AssemblyPatternBlock ctx) throws AssemblySemanticException {
+ public Selection select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx)
+ throws AssemblySemanticException {
StringBuilder sb = new StringBuilder();
boolean gotOne = false;
boolean failedOne = false;
diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java
index 0c0e30d50b..9218d58c9b 100644
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/Assembler.java
@@ -15,15 +15,7 @@
*/
package ghidra.app.plugin.assembler;
-import java.util.Collection;
-
-import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
-import ghidra.app.plugin.assembler.sleigh.sem.*;
-import ghidra.program.model.address.Address;
-import ghidra.program.model.address.AddressOverflowException;
-import ghidra.program.model.listing.Instruction;
-import ghidra.program.model.listing.InstructionIterator;
-import ghidra.program.model.mem.MemoryAccessException;
+import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
/**
* The primary interface for performing assembly in Ghidra.
@@ -32,194 +24,5 @@ import ghidra.program.model.mem.MemoryAccessException;
* Use the {@link Assemblers} class to obtain a suitable implementation for a given program or
* language.
*/
-public interface Assembler {
- /**
- * Assemble a sequence of instructions and place them at the given address.
- *
- *
- * This method is only valid if the assembler is bound to a program. An instance may optionally
- * implement this method without a program binding. In that case, the returned iterator will
- * refer to pseudo instructions.
- *
- *
- * NOTE: There must be an active transaction on the bound program for this method to
- * succeed.
- *
- * @param at the location where the resulting instructions should be placed
- * @param listing a new-line separated or array sequence of instructions
- * @return an iterator over the resulting instructions
- * @throws AssemblySyntaxException a textual instruction is non well-formed
- * @throws AssemblySemanticException a well-formed instruction cannot be assembled
- * @throws MemoryAccessException there is an issue writing the result to program memory
- * @throws AddressOverflowException the resulting block is beyond the valid address range
- */
- public InstructionIterator assemble(Address at, String... listing)
- throws AssemblySyntaxException,
- AssemblySemanticException, MemoryAccessException, AddressOverflowException;
-
- /**
- * Assemble a line instruction at the given address.
- *
- *
- * This method is valid with or without a bound program. Even if bound, the program is not
- * modified; however, the appropriate context information is taken from the bound program.
- * Without a program, the language's default context is taken at the given location.
- *
- * @param at the location of the start of the instruction
- * @param line the textual assembly code
- * @return the binary machine code, suitable for placement at the given address
- * @throws AssemblySyntaxException the textual instruction is not well-formed
- * @throws AssemblySemanticException the the well-formed instruction cannot be assembled
- */
- public byte[] assembleLine(Address at, String line)
- throws AssemblySyntaxException, AssemblySemanticException;
-
- /**
- * Assemble a line instruction at the given address, assuming the given context.
- *
- *
- * This method works like {@link #assembleLine(Address, String)} except that it allows you to
- * override the assumed context at that location.
- *
- * @param at the location of the start of the instruction
- * @param line the textual assembly code
- * @param ctx the context register value at the start of the instruction
- * @return the results of semantic resolution (from all parse results)
- * @throws AssemblySyntaxException the textual instruction is not well-formed
- * @throws AssemblySemanticException the well-formed instruction cannot be assembled
- */
- public byte[] assembleLine(Address at, String line, AssemblyPatternBlock ctx)
- throws AssemblySemanticException, AssemblySyntaxException;
-
- /**
- * Parse a line instruction.
- *
- *
- * Generally, you should just use {@link #assembleLine(Address, String)}, but if you'd like
- * access to the parse trees outside of an {@link AssemblySelector}, then this may be an
- * acceptable option. Most notably, this is an excellent way to obtain suggestions for
- * auto-completion.
- *
- *
- * Each item in the returned collection is either a complete parse tree, or a syntax error
- * Because all parse paths are attempted, it's possible to get many mixed results. For example,
- * The input line may be a valid instruction; however, there may be suggestions to continue the
- * line toward another valid instruction.
- *
- * @param line the line (or partial line) to parse
- * @return the results of parsing
- */
- public Collection parseLine(String line);
-
- /**
- * Resolve a given parse tree at the given address, assuming the given context
- *
- *
- * Each item in the returned collection is either a completely resolved instruction, or a
- * semantic error. Because all resolutions are attempted, it's possible to get many mixed
- * results.
- *
- *
- * NOTE: The resolved instructions are given as masks and values. Where the mask does not
- * cover, you can choose any value.
- *
- * @param parse a parse result giving a valid tree
- * @param at the location of the start of the instruction
- * @param ctx the context register value at the start of the instruction
- * @return the results of semantic resolution
- */
- public AssemblyResolutionResults resolveTree(AssemblyParseResult parse, Address at,
- AssemblyPatternBlock ctx);
-
- /**
- * Resolve a given parse tree at the given address.
- *
- *
- * Each item in the returned collection is either a completely resolved instruction, or a
- * semantic error. Because all resolutions are attempted, it's possible to get many mixed
- * results.
- *
- *
- * NOTE: The resolved instructions are given as masks and values. Where the mask does not
- * cover, you can choose any value.
- *
- * @param parse a parse result giving a valid tree
- * @param at the location of the start of the instruction
- * @return the results of semantic resolution
- */
- public AssemblyResolutionResults resolveTree(AssemblyParseResult parse, Address at);
-
- /**
- * Assemble a line instruction at the given address.
- *
- *
- * This method works like {@link #resolveLine(Address, String, AssemblyPatternBlock)}, except
- * that it derives the context using {@link #getContextAt(Address)}.
- *
- * @param at the location of the start of the instruction
- * @param line the textual assembly code
- * @return the collection of semantic resolution results
- * @throws AssemblySyntaxException the textual instruction is not well-formed
- */
- public AssemblyResolutionResults resolveLine(Address at, String line)
- throws AssemblySyntaxException;
-
- /**
- * Assemble a line instruction at the given address, assuming the given context.
- *
- *
- * This method works like {@link #assembleLine(Address, String, AssemblyPatternBlock)}, except
- * that it returns all possible resolutions for the parse trees that pass the
- * {@link AssemblySelector}.
- *
- * @param at the location of the start of the instruction
- * @param line the textual assembly code
- * @param ctx the context register value at the start of the instruction
- * @return the collection of semantic resolution results
- * @throws AssemblySyntaxException the textual instruction is not well-formed
- */
- public AssemblyResolutionResults resolveLine(Address at, String line, AssemblyPatternBlock ctx)
- throws AssemblySyntaxException;
-
- /**
- * Place a resolved (and fully-masked) instruction into the bound program.
- *
- *
- * This method is not valid without a program binding. Also, this method must be called during a
- * program database transaction.
- *
- * @param res the resolved and fully-masked instruction
- * @param at the location of the start of the instruction
- * @return the new {@link Instruction} code unit
- * @throws MemoryAccessException there is an issue writing the result to program memory
- */
- public Instruction patchProgram(AssemblyResolvedPatterns res, Address at)
- throws MemoryAccessException;
-
- /**
- * Place instruction bytes into the bound program.
- *
- *
- * This method is not valid without a program binding. Also, this method must be called during a
- * program database transaction.
- *
- * @param insbytes the instruction data
- * @param at the location of the start of the instruction
- * @return an iterator over the disassembled instructions
- * @throws MemoryAccessException there is an issue writing the result to program memory
- */
- public InstructionIterator patchProgram(byte[] insbytes, Address at)
- throws MemoryAccessException;
-
- /**
- * Get the context at a given address
- *
- *
- * If there is a program binding, this will extract the actual context at the given address.
- * Otherwise, it will obtain the default context at the given address for the language.
- *
- * @param addr the address
- * @return the context
- */
- public AssemblyPatternBlock getContextAt(Address addr);
+public interface Assembler extends GenericAssembler {
}
diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java
index c29b8cee42..1f816f17ac 100644
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblerBuilder.java
@@ -15,42 +15,18 @@
*/
package ghidra.app.plugin.assembler;
-import ghidra.program.model.lang.Language;
-import ghidra.program.model.lang.LanguageID;
+import ghidra.app.plugin.assembler.sleigh.sem.AssemblyResolvedPatterns;
import ghidra.program.model.listing.Program;
/**
* An interface to build an assembler for a given language
*/
-public interface AssemblerBuilder {
- /**
- * Get the ID of the language for which this instance builds an assembler
- *
- * @return the language ID
- */
- public LanguageID getLanguageID();
+public interface AssemblerBuilder
+ extends GenericAssemblerBuilder {
- /**
- * Get the language for which this instance builds an assembler
- *
- * @return the language
- */
- public Language getLanguage();
-
- /**
- * Build an assembler with the given selector callback
- *
- * @param selector the selector callback
- * @return the built assembler
- */
+ @Override
public Assembler getAssembler(AssemblySelector selector);
- /**
- * Build an assembler with the given selector callback and program binding
- *
- * @param selector the selector callback
- * @param program the bound program
- * @return the built assembler
- */
+ @Override
public Assembler getAssembler(AssemblySelector selector, Program program);
}
diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java
index 5733ba229c..279bc0dce1 100644
--- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/AssemblySelector.java
@@ -118,6 +118,16 @@ public class AssemblySelector {
return sorted;
}
+ /**
+ * A resolved selection from the results given to
+ * {@link AssemblySelector#select(AssemblyResolutionResults, AssemblyPatternBlock)}
+ *
+ * @param ins the resolved instructions bytes, ideally with a full mask
+ * @param ctx the resolved context bytes for compatibility checks
+ */
+ public record Selection(AssemblyPatternBlock ins, AssemblyPatternBlock ctx) {
+ }
+
/**
* Select an instruction from the possible results.
*
@@ -134,16 +144,15 @@ public class AssemblySelector {
* @param rr the collection of resolved constructors
* @param ctx the applicable context.
* @return a single resolved constructor with a full instruction mask.
- * @throws AssemblySemanticException
+ * @throws AssemblySemanticException if all the given results are semantic errors
*/
- public AssemblyResolvedPatterns select(AssemblyResolutionResults rr,
- AssemblyPatternBlock ctx) throws AssemblySemanticException {
+ public Selection select(AssemblyResolutionResults rr, AssemblyPatternBlock ctx)
+ throws AssemblySemanticException {
List sorted = filterCompatibleAndSort(rr, ctx);
// Pick just the first
AssemblyResolvedPatterns res = sorted.get(0);
// Just set the mask to ffs (effectively choosing 0 for the omitted bits)
- return AssemblyResolution.resolved(res.getInstruction().fillMask(), res.getContext(),
- "Selected", null, null, null);
+ return new Selection(res.getInstruction().fillMask(), res.getContext());
}
}
diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/GenericAssembler.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/GenericAssembler.java
new file mode 100644
index 0000000000..af059a3c69
--- /dev/null
+++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/app/plugin/assembler/GenericAssembler.java
@@ -0,0 +1,218 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package ghidra.app.plugin.assembler;
+
+import java.util.Collection;
+
+import ghidra.app.plugin.assembler.sleigh.parse.AssemblyParseResult;
+import ghidra.app.plugin.assembler.sleigh.sem.*;
+import ghidra.program.model.address.Address;
+import ghidra.program.model.address.AddressOverflowException;
+import ghidra.program.model.listing.Instruction;
+import ghidra.program.model.listing.InstructionIterator;
+import ghidra.program.model.mem.MemoryAccessException;
+
+public interface GenericAssembler