GP-1208: Implement linux-x86/-amd64 system call simulators

This commit is contained in:
Dan
2022-05-20 11:05:53 -04:00
parent 4b600847eb
commit b33800ecba
144 changed files with 12712 additions and 804 deletions
@@ -0,0 +1,211 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example emulation script that integrates well with the Debgger UI.
//It provides the set-up code and then demonstrates some use cases.
//It should work with any x64 program, but some snippets may require specific conditions.
//It should be easily ported to other platforms just by adjusting register names.
//@author
//@category Emulation
//@keybinding
//@menupath
//@toolbar
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.assembler.Assembler;
import ghidra.app.plugin.assembler.Assemblers;
import ghidra.app.plugin.core.debug.service.emulation.DebuggerTracePcodeEmulator;
import ghidra.app.plugin.core.debug.service.emulation.ProgramEmulationUtils;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.app.services.DebuggerTraceManagerService;
import ghidra.app.services.ProgramManager;
import ghidra.framework.plugintool.PluginTool;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.trace.TraceSleighUtils;
import ghidra.pcode.utils.Utils;
import ghidra.program.database.ProgramDB;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.LanguageID;
import ghidra.program.model.listing.InstructionIterator;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.Memory;
import ghidra.trace.model.Trace;
import ghidra.trace.model.thread.TraceThread;
import ghidra.trace.model.time.TraceSnapshot;
import ghidra.trace.model.time.TraceTimeManager;
import ghidra.util.database.UndoableTransaction;
public class DebuggerEmuExampleScript extends GhidraScript {
private final static Charset UTF8 = Charset.forName("utf8");
@Override
protected void run() throws Exception {
/*
* First, get all the services and stuff:
*/
PluginTool tool = state.getTool();
ProgramManager programManager = tool.getService(ProgramManager.class);
DebuggerTraceManagerService traceManager =
tool.getService(DebuggerTraceManagerService.class);
SleighLanguage language = (SleighLanguage) getLanguage(new LanguageID("x86:LE:64:default"));
/*
* I'll generate a new program, because I don't want to require the user to pick something
* specific.
*/
Address entry;
Address injectHere;
Program program = null;
try {
program =
new ProgramDB("emu_example", language, language.getDefaultCompilerSpec(), this);
// Save the program into the project so it has a URL for the trace's static mapping
tool.getProject()
.getProjectData()
.getRootFolder()
.createFile("emu_example", program, monitor);
try (UndoableTransaction tid = UndoableTransaction.start(program, "Init", true)) {
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
entry = space.getAddress(0x00400000);
Address dataEntry = space.getAddress(0x00600000);
Memory memory = program.getMemory();
memory.createInitializedBlock(".text", entry, 0x1000, (byte) 0, monitor, false);
Assembler asm = Assemblers.getAssembler(program);
InstructionIterator ii = asm.assemble(entry,
"MOV RCX, 0x" + dataEntry,
"MOV RAX, 1",
"SYSCALL",
"MOV RAX, 2",
"SYSCALL");
ii.next(); // drop MOV RCX
injectHere = ii.next().getAddress();
memory.createInitializedBlock(".data", dataEntry, 0x1000, (byte) 0, monitor, false);
memory.setBytes(dataEntry, "Hello, World!\n".getBytes(UTF8));
}
program.save("Init", monitor);
// Display the program in the UI
programManager.openProgram(program);
}
finally {
if (program != null) {
program.release(this);
}
}
/*
* Now, load the program into a trace. This doesn't copy any bytes, it just sets up a static
* mapping. The emulator will know how to read through to the mapped program. We use a
* utility, which is the same used by the "Emulate Program" action in the UI. It will load
* the program, allocate a stack, and initialize the first thread to the given entry.
*/
Trace trace = null;
try {
trace = ProgramEmulationUtils.launchEmulationTrace(program, entry, this);
// Display the trace in the UI
traceManager.openTrace(trace);
traceManager.activateTrace(trace);
}
finally {
if (trace != null) {
trace.release(this);
}
}
// Get the initial thread
TraceThread traceThread = trace.getThreadManager().getAllThreads().iterator().next();
traceManager.activateThread(traceThread);
/*
* Instead of using the UI's emulator, this script will create its own with a custom
* library. This emulator will still know how to integrate with the UI, reading through to
* open programs and writing state back into the trace.
*/
DebuggerTracePcodeEmulator emulator = new DebuggerTracePcodeEmulator(tool, trace, 0, null) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return new DemoPcodeUseropLibrary(language, DebuggerEmuExampleScript.this);
}
};
// Conventionally, emulator threads are named after their trace thread's path.
PcodeThread<byte[]> thread = emulator.getThread(traceThread.getPath(), true);
/*
* Inject a call to our custom print userop. Otherwise, the language itself will never
* invoke it.
*/
emulator.inject(injectHere, List.of(
"print_utf8(RCX);",
"emu_exec_decoded();"));
/*
* Run the experiment: This should interrupt on the second SYSCALL, because any value other
* than 1 calls emu_swi.
*
* For demonstration, we'll record a trace snapshot for every step of emulation. This is not
* ordinarily recommended except for very small experiments. A more reasonable approach in
* practice may be to snapshot on specific breakpoints.
*/
TraceTimeManager time = trace.getTimeManager();
TraceSnapshot snapshot = time.getSnapshot(0, true);
try (UndoableTransaction tid = UndoableTransaction.start(trace, "Emulate", true)) {
for (int i = 0; i < 10; i++) {
println("Executing: " + thread.getCounter());
thread.stepInstruction();
snapshot =
time.createSnapshot("Stepped to " + thread.getCounter());
emulator.writeDown(trace, snapshot.getKey(), 0, false);
}
printerr("We should not have completed 10 steps!");
}
catch (InterruptPcodeExecutionException e) {
println("Terminated via interrupt. Good.");
}
// Display the final snapshot in the UI
traceManager.activateSnap(snapshot.getKey());
/*
* Inspect the machine. You can always do this by accessing the state directly, but for
* anything other than simple variables, you may find compiling an expression more
* convenient.
*
* This works the same as in the stand-alone case.
*/
println("RCX = " +
Utils.bytesToLong(thread.getState().getVar(language.getRegister("RCX")), 8,
language.isBigEndian()));
println("RCX = " + Utils.bytesToLong(
SleighProgramCompiler.compileExpression(language, "RCX").evaluate(thread.getExecutor()),
8, language.isBigEndian()));
println("RCX+4 = " +
Utils.bytesToLong(SleighProgramCompiler.compileExpression(language, "RCX+4")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
/*
* To evaluate a Sleigh expression against the trace: The result is the same as evaluating
* directly against the emulator, but these work with any trace, no matter the original data
* source (live target, emulated, imported, etc.) It's also built into utilities, making it
* easier to use.
*/
println("RCX+4 (trace) = " +
TraceSleighUtils.evaluate("RCX+4", trace, snapshot.getKey(), traceThread, 0));
}
}
@@ -0,0 +1,131 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.exec.*;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.CompilerSpec;
import ghidra.program.model.pcode.Varnode;
/**
* A userop library for the emulator
*
* <p>
* If you do not have need of a custom userop library, use {@link PcodeUseropLibrary#NIL}. These
* libraries allow you to implement userop, including those declared by the language. Without these,
* the emulator must interrupt whenever a userop ({@code CALLOTHER}) is encountered. You can also
* define new userops, which can be invoked from Sleigh code injected into the emulator.
*
* <p>
* These libraries can have both Java-callback and p-code implementations of userops. If only using
* p-code implementations, the library can be parameterized with type {@code <T>} and just pass that
* over to {@link AnnotatedPcodeUseropLibrary}. Because this will demo a Java callback that assumes
* concrete bytes, we will fix the library's type to {@code byte[]}.
*
* <p>
* Methods in this class (not including those in its nested classes) are implemented as Java
* callbacks.
*/
public class DemoPcodeUseropLibrary extends AnnotatedPcodeUseropLibrary<byte[]> {
private final static Charset UTF8 = Charset.forName("utf8");
private final SleighLanguage language;
private final GhidraScript script;
private final AddressSpace space;
public DemoPcodeUseropLibrary(SleighLanguage language, GhidraScript script) {
this.language = language;
this.script = script;
this.space = language.getDefaultSpace();
new DemoStructuredPart(language.getDefaultCompilerSpec()).generate(ops);
}
/**
* Treats the input as an offset to a C-style string and prints it to the console
*
* <p>
* Because we want to dereference start, we will need access to the emulator's state, so we
* employ the {@link OpState} annotation. {@code start} takes the one input we expect. Because
* its type is the value type rather than {@link Varnode}, we will get the input's value.
* Similarly, we can just return the resulting value, and the emulator will place that into the
* output variable for us.
*
* @param state the calling thread's state
* @param start the offset of the first character
* @return the length of the string in bytes
*/
@PcodeUserop
public byte[] print_utf8(@OpState PcodeExecutorStatePiece<byte[], byte[]> state,
byte[] start) {
long offset = Utils.bytesToLong(start, start.length, language.isBigEndian());
long end = offset;
while (state.getVar(space, end, 1, true)[0] != 0) {
end++;
}
if (end == offset) {
script.println("");
return Utils.longToBytes(0, Long.BYTES, language.isBigEndian());
}
byte[] bytes = state.getVar(space, offset, (int) (end - offset), true);
String str = new String(bytes, UTF8);
script.println(str);
return Utils.longToBytes(end - offset, Long.BYTES, language.isBigEndian());
}
/**
* Methods in this class are implemented using p-code compiled from Structured Sleigh
*/
public class DemoStructuredPart extends StructuredSleigh {
final Var RAX = lang("RAX", type("long"));
final Var RCX = lang("RAX", type("byte *"));
final UseropDecl emu_swi = userop(type("void"), "emu_swi", List.of());
protected DemoStructuredPart(CompilerSpec cs) {
super(cs);
}
/**
* Not really a syscall dispatcher
*
* <p>
* In cases where the userop expects parameters, you would annotate them with {@link Param}
* and use them just like other {@link Var}s. See the javadocs.
*
* <p>
* This is just a cheesy demo: If RAX is 1, then this method computes the number of bytes in
* the C-style string pointed to by RCX and stores the result in RAX. Otherwise, interrupt
* the emulator. See {@link DemoSyscallLibrary} for actual system call simulation.
*/
@StructuredUserop
public void syscall() {
_if(RAX.eq(1), () -> {
Var i = local("i", RCX);
_while(i.deref().neq(0), () -> {
i.inc();
});
RAX.set(i.subi(RAX));
})._else(() -> {
emu_swi.call();
});
}
}
}
@@ -0,0 +1,211 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.nio.charset.Charset;
import java.util.Collection;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.linux.EmuLinuxAmd64SyscallUseropLibrary;
import ghidra.pcode.emu.linux.EmuLinuxX86SyscallUseropLibrary;
import ghidra.pcode.emu.sys.AnnotatedEmuSyscallUseropLibrary;
import ghidra.pcode.emu.sys.EmuSyscallLibrary;
import ghidra.pcode.exec.*;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.Program;
/**
* A userop library that includes system call simulation
*
* <p>
* Such a library needs to implement {@link EmuSyscallLibrary}. Here we extend
* {@link AnnotatedEmuSyscallUseropLibrary}, which allows us to implement it using annotated
* methods. {@link EmuSyscallLibrary#syscall(PcodeExecutor, PcodeUseropLibrary)} is the system call
* dispatcher, and it requires that each system call implement {@link EmuSyscallDefinition}. System
* call libraries typically implement that interface by annotating p-code userops with
* {@link EmuSyscall}. This allows system calls to be implemented via Java callback or Structured
* Sleigh. Conventionally, the Java method names of system calls should be
* <em>platform</em>_<em>name</em>. This is to prevent name-space pollution of userops.
*
* <p>
* Stock implementations for a limited set of Linux system calls are provided for x86 and amd64 in
* {@link EmuLinuxX86SyscallUseropLibrary} and {@link EmuLinuxAmd64SyscallUseropLibrary},
* respectively. The type hierarchy is designed to facilitate the implementation of related systems
* without (too much) code duplication. Because they derive from the annotation-based
* implementations, you can add missing system calls by extending one and adding annotated methods
* as needed.
*
* <p>
* For demonstration, this will implement one from scratch for no particular operating system, but
* it will borrow many conventions from linux-amd64.
*/
public class DemoSyscallLibrary extends AnnotatedEmuSyscallUseropLibrary<byte[]> {
private final static Charset UTF8 = Charset.forName("utf8");
// Implement all the required plumbing first:
/**
* An exception type for "user errors." These errors should be communicated back to the target
* program rather than causing the emulator to interrupt. This is a bare minimum implementation.
* In practice more information should be communicated internally, in case things go further
* wrong. Also, a hierarchy of exceptions may be appropriate.
*/
static class UserError extends PcodeExecutionException {
private final int errno;
public UserError(int errno) {
super("errno: " + errno);
this.errno = errno;
}
}
private final Register regRAX;
private final GhidraScript script;
/**
* Because the system call numbering is derived from the "syscall" overlay on OTHER space, a
* program is required. The system call analyzer must be applied to it. The program and its
* compiler spec are also used to derive (what it can of) the system call ABI. Notably, it
* applies the calling convention of the functions placed in syscall overlay. Those parts which
* cannot (yet) be derived from the program are instead implemented as abstract methods of this
* class, e.g., {@link #readSyscallNumber(PcodeExecutorStatePiece)} and
* {@link #handleError(PcodeExecutor, PcodeExecutionException)}.
*
* @param machine the emulator
* @param program the program being emulated
*/
public DemoSyscallLibrary(PcodeMachine<byte[]> machine, Program program, GhidraScript script) {
super(machine, program);
this.script = script;
this.regRAX = machine.getLanguage().getRegister("RAX");
if (regRAX == null) {
throw new AssertionError("This library only works on x64 targets");
}
}
/**
* The dispatcher doesn't know where the system call number is stored. It relies on this method
* to read that number from the state. Here we'll assume the target is x64 and RAX contains the
* syscall number.
*/
@Override
public long readSyscallNumber(PcodeExecutorStatePiece<byte[], byte[]> state) {
return Utils.bytesToLong(state.getVar(regRAX), regRAX.getNumBytes(),
machine.getLanguage().isBigEndian());
}
/**
* If the error is a user error, put the errno into the machine as expected by the target
* program. Here we negate the errno and put it into RAX. If it's not a user error, we return
* false letting the dispatcher know it should interrupt the emulator.
*/
@Override
public boolean handleError(PcodeExecutor<byte[]> executor, PcodeExecutionException err) {
if (err instanceof UserError) {
executor.getState()
.setVar(regRAX, executor.getArithmetic()
.fromConst(-((UserError) err).errno, regRAX.getNumBytes()));
return true;
}
return false;
}
/**
* Support for Structured Sleigh is built-in. To enable it, override this method and instantiate
* the appropriate (usually nested) class.
*/
@Override
protected StructuredPart newStructuredPart() {
return new DemoStructuredPart();
}
@Override
protected Collection<DataTypeManager> getAdditionalArchives() {
// Add platform-specific data type archives, if needed
return super.getAdditionalArchives();
}
// Now, implement some system calls!
// First, a Java callback example
/**
* Write a buffer of utf-8 characters to the console
*
* <p>
* The {@link EmuSyscall} annotation allows us to specify the system call name, because the
* userop name should be prefixed with the platform name, to avoid naming collisions among
* userops.
*
* <p>
* For demonstration, we will export this as a system call, though that is not required for
* {@link DemoStructuredPart#demo_console(StructuredSleigh.Var)} to invoke it. It does need to
* be a userop, but it doesn't need to be a syscall.
*
* @param str a pointer to the start of the buffer
* @param end a pointer to the end (exclusive) of the buffer
*/
@PcodeUserop
@EmuSyscall("write")
public void demo_write(byte[] str, byte[] end) {
AddressSpace space = machine.getLanguage().getDefaultSpace();
/**
* Because we have concrete {@code byte[]}, we could use Utils.bytesToLong, but for
* demonstration, here's how it can be done if we extended
* {@link AnnotatedEmuSyscallUseropLibrary}{@code <T>} instead. If the value cannot be made
* concrete, an exception will be thrown. For abstract types, it's a good idea to save a
* copy of the arithmetic as a field at library construction time.
*/
PcodeArithmetic<byte[]> arithmetic = machine.getArithmetic();
long strLong = arithmetic.toConcrete(str).longValue();
long endLong = arithmetic.toConcrete(end).longValue();
byte[] stringBytes =
machine.getSharedState().getVar(space, strLong, (int) (endLong - strLong), true);
String string = new String(stringBytes, UTF8);
script.println(string);
}
// Second, a Structured Sleigh example
/**
* The nested class for syscall implemented using StructuredSleigh. Note that no matter the
* implementation type, the Java method is annotated with {@link EmuSyscall}. We declare it
* public so that the annotation processor can access the methods. Alternatively, we could
* override {@link #getMethodLookup()}.
*/
public class DemoStructuredPart extends StructuredPart {
UseropDecl write = userop(type("void"), "demo_write", types("char *", "char *"));
/**
* Write a C-style string to the console
*
* @param str the null-terminated utf-8 string
*/
@StructuredUserop
@EmuSyscall("console")
public void demo_console(@Param(type = "char *") Var str) {
// Measure the string's length and then invoke write
Var end = local("end", type("char *"));
_for(end.set(str), end.deref().neq(0), end.inc(), () -> {
});
write.call(str, end);
}
}
}
@@ -0,0 +1,179 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example emulation script that uses a stand-alone emulator.
//It provides the set-up code and then demonstrates some use cases.
//@author
//@category Emulation
//@keybinding
//@menupath
//@toolbar
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.assembler.*;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.exec.*;
import ghidra.pcode.utils.Utils;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.LanguageID;
public class StandAloneEmuExampleScript extends GhidraScript {
private final static Charset UTF8 = Charset.forName("utf8");
private SleighLanguage language;
private PcodeEmulator emulator;
@Override
protected void run() throws Exception {
/*
* Create an emulator and start a thread
*/
language = (SleighLanguage) getLanguage(new LanguageID("x86:LE:64:default"));
emulator = new PcodeEmulator(language) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return new DemoPcodeUseropLibrary(language, StandAloneEmuExampleScript.this);
}
// Uncomment this to see instructions printed as they are decoded
/*
protected BytesPcodeThread createThread(String name) {
return new BytesPcodeThread(name, this) {
@Override
protected SleighInstructionDecoder createInstructionDecoder(
PcodeExecutorState<byte[]> sharedState) {
return new SleighInstructionDecoder(language, sharedState) {
@Override
public Instruction decodeInstruction(Address address,
RegisterValue context) {
Instruction instruction = super.decodeInstruction(address, context);
println("Decoded " + address + ": " + instruction);
return instruction;
}
};
}
};
}
*/
};
PcodeThread<byte[]> thread = emulator.newThread();
// The emulator composes the full library for each thread
PcodeUseropLibrary<byte[]> library = thread.getUseropLibrary();
AddressSpace dyn = language.getDefaultSpace();
/*
* Assemble a little test program and write it into the emulator
*
* We're not really going to implement system calls here. We're just using it to demonstrate
* the implementation of a language-defined userop.
*/
Address entry = dyn.getAddress(0x00400000);
Assembler asm = Assemblers.getAssembler(language);
CodeBuffer buffer = new CodeBuffer(asm, entry);
buffer.assemble("MOV RCX, 0xdeadbeef");
Address injectHere = buffer.getNext();
buffer.assemble("MOV RAX, 1");
buffer.assemble("SYSCALL");
buffer.assemble("MOV RAX, 2"); // Induce the interrupt we need to terminate
buffer.assemble("SYSCALL");
byte[] code = buffer.getBytes();
emulator.getSharedState().setVar(dyn, entry.getOffset(), code.length, true, code);
/*
* Initialize other parts of the emulator and thread state. Note the use of the L suffix on
* 0xdeadbeefL, because Java with sign extend the (negative) int to a long otherwise.
*/
byte[] hw = "Hello, World!\n".getBytes(UTF8);
emulator.getSharedState().setVar(dyn, 0xdeadbeefL, hw.length, true, hw);
PcodeProgram init = SleighProgramCompiler.compileProgram(language, "init", List.of(
"RIP = 0x" + entry + ";",
"RSP = 0x00001000;"),
library);
thread.getExecutor().execute(init, library);
thread.overrideContextWithDefault();
thread.reInitialize();
/*
* Inject a call to our custom print userop. Otherwise, the language itself will never
* invoke it.
*/
emulator.inject(injectHere, List.of(
"print_utf8(RCX);",
"emu_exec_decoded();"));
/*
* Run the experiment: This should interrupt on the second SYSCALL, because any value other
* than 1 calls emu_swi.
*/
try {
thread.stepInstruction(10);
printerr("We should not have completed 10 steps!");
}
catch (InterruptPcodeExecutionException e) {
println("Terminated via interrupt. Good.");
}
/*
* Inspect the machine. You can always do this by accessing the state directly, but for
* anything other than simple variables, you may find compiling an expression more
* convenient.
*/
println("RCX = " +
Utils.bytesToLong(thread.getState().getVar(language.getRegister("RCX")), 8,
language.isBigEndian()));
println("RCX = " + Utils.bytesToLong(
SleighProgramCompiler.compileExpression(language, "RCX").evaluate(thread.getExecutor()),
8, language.isBigEndian()));
println("RCX+4 = " +
Utils.bytesToLong(SleighProgramCompiler.compileExpression(language, "RCX+4")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
}
public static class CodeBuffer {
private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
private final Assembler asm;
private final Address entry;
public CodeBuffer(Assembler asm, Address entry) {
this.asm = asm;
this.entry = entry;
}
public Address getNext() {
return entry.add(baos.size());
}
public byte[] assemble(String line)
throws AssemblySyntaxException, AssemblySemanticException, IOException {
byte[] bytes = asm.assembleLine(getNext(), line);
baos.write(bytes);
return bytes;
}
public byte[] getBytes() {
return baos.toByteArray();
}
}
}
@@ -0,0 +1,112 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example script for using Structured Sleigh stand alone
//@author
//@category Sleigh
//@keybinding
//@menupath
//@toolbar
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodHandles.Lookup;
import java.util.Map;
import java.util.stream.Collectors;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.exec.SleighPcodeUseropDefinition;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.program.model.lang.LanguageID;
public class StandAloneStructuredSleighScript extends GhidraScript {
private SleighLanguage language;
/**
* This exists mostly so we can access the methods of anonymous nested classes deriving from
* this one. The "compiler" will need to be able to access the methods, and that's not
* ordinarily allowed since anonymous classes are implicitly "private." Conveniently, it also
* allows us to implement a default constructor, so that can be elided where used, too.
*/
class LookupStructuredSleigh extends StructuredSleigh {
protected LookupStructuredSleigh() {
super(language.getDefaultCompilerSpec());
}
@Override
protected Lookup getMethodLookup() {
return MethodHandles.lookup();
}
}
@Override
protected void run() throws Exception {
/*
* If you have a target language in mind, perhaps use it, but DATA provides a minimal
* context
*/
language = (SleighLanguage) getLanguage(new LanguageID("DATA:BE:64:default"));
Map<String, SleighPcodeUseropDefinition<Object>> ops = new LookupStructuredSleigh() {
/**
* Add two in-memory vectors of 16 longs and store the result in memory
*
* @param d pointer to the destination vector
* @param s1 pointer to the first operand vector
* @param s2 pointer to the second operand vector
*/
@StructuredUserop
public void vector_add(
@Param(name = "d", type = "int *") Var d,
@Param(name = "s1", type = "int *") Var s1,
@Param(name = "s2", type = "int *") Var s2) {
// Use Java's "for" to generate an unrolled loop
// We could choose a Sleigh loop, instead. Consider both emu and analysis tradeoffs
for (int i = 0; i < 16; i++) {
// This will generate +0 on the first elements, but whatever
d.index(i).deref().set(s1.index(i).deref().addi(s2.index(i).deref()));
}
}
@StructuredUserop
public void memcpy(
@Param(name = "d", type = "void *") Var d,
@Param(name = "s", type = "void *") Var s,
@Param(name = "n", type = "long") Var n) { // size_t is not built-in
Var i = local("i", type("long"));
// Note that these 2 casts don't generate Sleigh statements
Var db = d.cast(type("byte *"));
Var sb = s.cast(type("byte *"));
// Must use a Sleigh loop here
_for(i.set(0), i.ltiu(n), i.inc(), () -> {
db.index(i).deref().set(sb.index(i).deref());
});
}
}.generate();
/*
* Now, dump the generated Sleigh source
*/
for (SleighPcodeUseropDefinition<?> userop : ops.values()) {
print(userop.getName() + "(");
print(userop.getInputs().stream().collect(Collectors.joining(",")));
print(") {\n");
for (String line : userop.getLines()) {
print(line);
}
print("}\n\n");
}
}
}
@@ -0,0 +1,233 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//An example emulation script that uses a stand-alone emulator with syscalls.
//It provides the set-up code and then demonstrates some use cases.
//@author
//@category Emulation
//@keybinding
//@menupath
//@toolbar
import java.nio.charset.Charset;
import java.util.List;
import ghidra.app.plugin.assembler.Assembler;
import ghidra.app.plugin.assembler.Assemblers;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.app.script.GhidraScript;
import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.emu.sys.EmuInvalidSystemCallException;
import ghidra.pcode.emu.sys.EmuSyscallLibrary;
import ghidra.pcode.exec.*;
import ghidra.pcode.utils.Utils;
import ghidra.program.database.ProgramDB;
import ghidra.program.model.address.*;
import ghidra.program.model.data.DataTypeConflictHandler;
import ghidra.program.model.data.PointerDataType;
import ghidra.program.model.lang.*;
import ghidra.program.model.listing.Program;
import ghidra.program.model.mem.Memory;
import ghidra.program.model.mem.MemoryBlock;
import ghidra.program.model.symbol.SourceType;
import ghidra.util.database.UndoableTransaction;
public class StandAloneSyscallEmuExampleScript extends GhidraScript {
private final static Charset UTF8 = Charset.forName("utf8");
Program program = null;
@Override
protected void run() throws Exception {
/*
* First, get all the services and stuff:
*/
SleighLanguage language = (SleighLanguage) getLanguage(new LanguageID("x86:LE:64:default"));
/*
* I'll generate a new program, because I don't want to require the user to pick something
* specific. It won't be displayed, though, so we'll just release it when we're done.
*/
Address entry;
try {
/*
* "gcc" is the name of the compiler spec, but we're really interested in the Linux
* syscall calling conventions.
*/
program =
new ProgramDB("syscall_example", language,
language.getCompilerSpecByID(new CompilerSpecID("gcc")), this);
try (UndoableTransaction tid = UndoableTransaction.start(program, "Init", true)) {
AddressSpace space = program.getAddressFactory().getDefaultAddressSpace();
entry = space.getAddress(0x00400000);
Address dataEntry = space.getAddress(0x00600000);
Memory memory = program.getMemory();
memory.createInitializedBlock(".text", entry, 0x1000, (byte) 0, monitor, false);
Assembler asm = Assemblers.getAssembler(program);
asm.assemble(entry,
"MOV RDI, 0x" + dataEntry,
"MOV RAX, 1",
"SYSCALL",
"MOV RAX, 20",
"SYSCALL");
memory.createInitializedBlock(".data", dataEntry, 0x1000, (byte) 0, monitor, false);
memory.setBytes(dataEntry, "Hello, World!\n".getBytes(UTF8));
/*
* Because "pointer" is a built-in type, and the emulator does not modify the
* program, we must ensure it has been resolved on the program's data type manager.
*/
program.getDataTypeManager()
.resolve(PointerDataType.dataType, DataTypeConflictHandler.DEFAULT_HANDLER);
/*
* We must also populate the system call numbering map. Ordinarily, this would be done
* using the system call analyzer or another script. Here, we'll just fake it out.
*/
AddressSpace other =
program.getAddressFactory().getAddressSpace(SpaceNames.OTHER_SPACE_NAME);
MemoryBlock blockSyscall = program.getMemory()
.createUninitializedBlock(EmuSyscallLibrary.SYSCALL_SPACE_NAME,
other.getAddress(0), 0x1000, true);
blockSyscall.setPermissions(true, false, true);
AddressSpace syscall = program.getAddressFactory()
.getAddressSpace(EmuSyscallLibrary.SYSCALL_SPACE_NAME);
/*
* The system call names must match those from the EmuSyscall annotations in the
* system call library, in our case from DemoSyscallLibrary. Because the x64
* compiler specs define a "syscall" convention, we'll apply it. The syscall
* dispatcher will use that convention to fetch the parameters out of the machine
* state, pass them into the system call defintion, and store the result back into
* the machine.
*/
// Map system call 0 to "write"
program.getFunctionManager()
.createFunction("write", syscall.getAddress(0),
new AddressSet(syscall.getAddress(0)), SourceType.USER_DEFINED)
.setCallingConvention(EmuSyscallLibrary.SYSCALL_CONVENTION_NAME);
// Map system call 1 to "console"
program.getFunctionManager()
.createFunction("console", syscall.getAddress(1),
new AddressSet(syscall.getAddress(1)), SourceType.USER_DEFINED)
.setCallingConvention(EmuSyscallLibrary.SYSCALL_CONVENTION_NAME);
}
/*
* Create an emulator and start a thread
*/
PcodeEmulator emulator = new PcodeEmulator(language) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return new DemoSyscallLibrary(this, program,
StandAloneSyscallEmuExampleScript.this);
}
// Uncomment this to see instructions printed as they are decoded
/*
@Override
protected BytesPcodeThread createThread(String name) {
return new BytesPcodeThread(name, this) {
@Override
protected SleighInstructionDecoder createInstructionDecoder(
PcodeExecutorState<byte[]> sharedState) {
return new SleighInstructionDecoder(language, sharedState) {
@Override
public Instruction decodeInstruction(Address address,
RegisterValue context) {
Instruction instruction = super.decodeInstruction(address, context);
println("Decoded " + address + ": " + instruction);
return instruction;
}
};
}
};
}
*/
};
PcodeThread<byte[]> thread = emulator.newThread();
// The emulator composes the full library for each thread
PcodeUseropLibrary<byte[]> library = thread.getUseropLibrary();
/*
* The library has a reference to the program and uses it to derive types and the system
* call numbering. However, the emulator itself does not have access to the program. If we
* followed the pattern in DebuggerEmuExampleScript, the emulator would have its state bound
* (indirectly) to the program. We'll need to copy the bytes in. Because we created blocks
* that were 0x1000 bytes in size, we can be fast and loose with our buffer. Ordinarily, you
* may want to copy in chunks rather than taking entire memory blocks at a time.
*/
byte[] data = new byte[0x1000];
for (MemoryBlock block : program.getMemory().getBlocks()) {
if (!block.isInitialized()) {
continue; // Skip the syscall/OTHER block
}
Address addr = block.getStart();
block.getBytes(addr, data);
emulator.getSharedState()
.setVar(addr.getAddressSpace(), addr.getOffset(), data.length, true, data);
}
/*
* Initialize the thread
*/
PcodeProgram init = SleighProgramCompiler.compileProgram(language, "init", List.of(
"RIP = 0x" + entry + ";",
"RSP = 0x00001000;"),
library);
thread.getExecutor().execute(init, library);
thread.overrideContextWithDefault();
thread.reInitialize();
/*
* Run the experiment: This should interrupt on the second SYSCALL, because we didn't
* provide a system call name in OTHER space for 20.
*/
try {
thread.stepInstruction(10);
printerr("We should not have completed 10 steps!");
}
catch (EmuInvalidSystemCallException e) {
println("Terminated via invalid syscall. Good.");
}
/*
* Inspect the machine. You can always do this by accessing the state directly, but for
* anything other than simple variables, you may find compiling an expression more
* convenient.
*/
println("RDI = " +
Utils.bytesToLong(thread.getState().getVar(language.getRegister("RDI")), 8,
language.isBigEndian()));
println("RDI = " + Utils.bytesToLong(
SleighProgramCompiler.compileExpression(language, "RDI")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
println("RDI+4 = " +
Utils.bytesToLong(SleighProgramCompiler.compileExpression(language, "RDI+4")
.evaluate(thread.getExecutor()),
8, language.isBigEndian()));
}
finally {
if (program != null) {
program.release(this);
}
}
}
}
@@ -88,11 +88,11 @@ public abstract class DebuggerGoToTrait {
if (space == null) { if (space == null) {
throw new IllegalArgumentException("No such address space: " + spaceName); throw new IllegalArgumentException("No such address space: " + spaceName);
} }
SleighExpression expr = SleighProgramCompiler.compileExpression(slang, expression); PcodeExpression expr = SleighProgramCompiler.compileExpression(slang, expression);
return goToSleigh(space, expr); return goToSleigh(space, expr);
} }
public CompletableFuture<Boolean> goToSleigh(AddressSpace space, SleighExpression expression) { public CompletableFuture<Boolean> goToSleigh(AddressSpace space, PcodeExpression expression) {
AsyncPcodeExecutor<byte[]> executor = TracePcodeUtils.executorForCoordinates(current); AsyncPcodeExecutor<byte[]> executor = TracePcodeUtils.executorForCoordinates(current);
CompletableFuture<byte[]> result = expression.evaluate(executor); CompletableFuture<byte[]> result = expression.evaluate(executor);
return result.thenApply(offset -> { return result.thenApply(offset -> {
@@ -58,7 +58,7 @@ public class WatchRow {
private String typePath; private String typePath;
private DataType dataType; private DataType dataType;
private SleighExpression compiled; private PcodeExpression compiled;
private TraceMemoryState state; private TraceMemoryState state;
private Address address; private Address address;
private AddressSet reads; private AddressSet reads;
@@ -208,7 +208,7 @@ public class WatchRow {
@Override @Override
public PcodeFrame execute(PcodeProgram program, public PcodeFrame execute(PcodeProgram program,
SleighUseropLibrary<Pair<byte[], Address>> library) { PcodeUseropLibrary<Pair<byte[], Address>> library) {
depsState.reset(); depsState.reset();
return super.execute(program, library); return super.execute(program, library);
} }
@@ -36,8 +36,8 @@ public abstract class AbstractReadsTargetPcodeExecutorState
abstract class AbstractReadsTargetCachedSpace extends CachedSpace { abstract class AbstractReadsTargetCachedSpace extends CachedSpace {
public AbstractReadsTargetCachedSpace(Language language, AddressSpace space, public AbstractReadsTargetCachedSpace(Language language, AddressSpace space,
TraceMemorySpace source, long snap) { TraceMemorySpace backing, long snap) {
super(language, space, source, snap); super(language, space, backing, snap);
} }
protected abstract void fillUninitialized(AddressSet uninitialized); protected abstract void fillUninitialized(AddressSet uninitialized);
@@ -47,15 +47,15 @@ public abstract class AbstractReadsTargetPcodeExecutorState
} }
protected AddressSet computeUnknown(AddressSet uninitialized) { protected AddressSet computeUnknown(AddressSet uninitialized) {
return uninitialized.subtract(source.getAddressesWithState(snap, uninitialized, return uninitialized.subtract(backing.getAddressesWithState(snap, uninitialized,
s -> s != null && s != TraceMemoryState.UNKNOWN)); s -> s != null && s != TraceMemoryState.UNKNOWN));
} }
@Override @Override
public byte[] read(long offset, int size) { public byte[] read(long offset, int size) {
if (source != null) { if (backing != null) {
AddressSet uninitialized = AddressSet uninitialized =
addrSet(cache.getUninitialized(offset, offset + size - 1)); addrSet(bytes.getUninitialized(offset, offset + size - 1));
if (uninitialized.isEmpty()) { if (uninitialized.isEmpty()) {
return super.read(offset, size); return super.read(offset, size);
} }
@@ -63,7 +63,7 @@ public abstract class AbstractReadsTargetPcodeExecutorState
fillUninitialized(uninitialized); fillUninitialized(uninitialized);
AddressSet unknown = AddressSet unknown =
computeUnknown(addrSet(cache.getUninitialized(offset, offset + size - 1))); computeUnknown(addrSet(bytes.getUninitialized(offset, offset + size - 1)));
if (!unknown.isEmpty()) { if (!unknown.isEmpty()) {
warnUnknown(unknown); warnUnknown(unknown);
} }
@@ -40,8 +40,8 @@ public class ReadsTargetMemoryPcodeExecutorState
protected class ReadsTargetMemoryCachedSpace extends AbstractReadsTargetCachedSpace { protected class ReadsTargetMemoryCachedSpace extends AbstractReadsTargetCachedSpace {
public ReadsTargetMemoryCachedSpace(Language language, AddressSpace space, public ReadsTargetMemoryCachedSpace(Language language, AddressSpace space,
TraceMemorySpace source, long snap) { TraceMemorySpace backing, long snap) {
super(language, space, source, snap); super(language, space, backing, snap);
} }
@Override @Override
@@ -108,7 +108,7 @@ public class ReadsTargetMemoryPcodeExecutorState
" bytes"); " bytes");
} }
// write(lower - shift, data, 0 ,read); // write(lower - shift, data, 0 ,read);
cache.putData(lower - shift, data, 0, read); bytes.putData(lower - shift, data, 0, read);
} }
catch (MemoryAccessException | AddressOutOfBoundsException e) { catch (MemoryAccessException | AddressOutOfBoundsException e) {
throw new AssertionError(e); throw new AssertionError(e);
@@ -28,7 +28,7 @@ import ghidra.program.model.pcode.Varnode;
* An executor which can perform (some of) its work asynchronously * An executor which can perform (some of) its work asynchronously
* *
* <p> * <p>
* Note that a future returned from, e.g., {@link #executeAsync(SleighProgram, SleighUseropLibrary)} * Note that a future returned from, e.g., {@link #executeAsync(SleighProgram, PcodeUseropLibrary)}
* may complete before the computation has actually been performed. They complete when all of the * may complete before the computation has actually been performed. They complete when all of the
* operations have been scheduled, and the last future has been written into the state. (This * operations have been scheduled, and the last future has been written into the state. (This
* typically happens when any branch conditions have completed). Instead, a caller should read from * typically happens when any branch conditions have completed). Instead, a caller should read from
@@ -46,7 +46,7 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
} }
public CompletableFuture<Void> stepOpAsync(PcodeOp op, PcodeFrame frame, public CompletableFuture<Void> stepOpAsync(PcodeOp op, PcodeFrame frame,
SleighUseropLibrary<CompletableFuture<T>> library) { PcodeUseropLibrary<CompletableFuture<T>> library) {
if (op.getOpcode() == PcodeOp.CBRANCH) { if (op.getOpcode() == PcodeOp.CBRANCH) {
return executeConditionalBranchAsync(op, frame); return executeConditionalBranchAsync(op, frame);
} }
@@ -55,7 +55,7 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
} }
public CompletableFuture<Void> stepAsync(PcodeFrame frame, public CompletableFuture<Void> stepAsync(PcodeFrame frame,
SleighUseropLibrary<CompletableFuture<T>> library) { PcodeUseropLibrary<CompletableFuture<T>> library) {
try { try {
return stepOpAsync(frame.nextOp(), frame, library); return stepOpAsync(frame.nextOp(), frame, library);
} }
@@ -80,12 +80,12 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
} }
public CompletableFuture<Void> executeAsync(PcodeProgram program, public CompletableFuture<Void> executeAsync(PcodeProgram program,
SleighUseropLibrary<CompletableFuture<T>> library) { PcodeUseropLibrary<CompletableFuture<T>> library) {
return executeAsync(program.code, program.useropNames, library); return executeAsync(program.code, program.useropNames, library);
} }
protected CompletableFuture<Void> executeAsyncLoop(PcodeFrame frame, protected CompletableFuture<Void> executeAsyncLoop(PcodeFrame frame,
SleighUseropLibrary<CompletableFuture<T>> library) { PcodeUseropLibrary<CompletableFuture<T>> library) {
if (frame.isFinished()) { if (frame.isFinished()) {
return AsyncUtils.NIL; return AsyncUtils.NIL;
} }
@@ -94,7 +94,7 @@ public class AsyncPcodeExecutor<T> extends PcodeExecutor<CompletableFuture<T>> {
} }
public CompletableFuture<Void> executeAsync(List<PcodeOp> code, public CompletableFuture<Void> executeAsync(List<PcodeOp> code,
Map<Integer, String> useropNames, SleighUseropLibrary<CompletableFuture<T>> library) { Map<Integer, String> useropNames, PcodeUseropLibrary<CompletableFuture<T>> library) {
PcodeFrame frame = new PcodeFrame(language, code, useropNames); PcodeFrame frame = new PcodeFrame(language, code, useropNames);
return executeAsyncLoop(frame, library); return executeAsyncLoop(frame, library);
} }
@@ -83,4 +83,9 @@ public class AsyncWrappedPcodeArithmetic<T> implements PcodeArithmetic<Completab
} }
return arithmetic.toConcrete(cond.getNow(null), isContextreg); return arithmetic.toConcrete(cond.getNow(null), isContextreg);
} }
@Override
public CompletableFuture<T> sizeOf(CompletableFuture<T> value) {
return value.thenApply(v -> arithmetic.sizeOf(v));
}
} }
@@ -169,7 +169,7 @@ public class DebuggerPcodeStepperProviderTest extends AbstractGhidraHeadedDebugg
protected List<PcodeRow> format(List<String> sleigh) { protected List<PcodeRow> format(List<String> sleigh) {
SleighLanguage language = (SleighLanguage) getToyBE64Language(); SleighLanguage language = (SleighLanguage) getToyBE64Language();
PcodeProgram prog = SleighProgramCompiler.compileProgram(language, "test", sleigh, PcodeProgram prog = SleighProgramCompiler.compileProgram(language, "test", sleigh,
SleighUseropLibrary.nil()); PcodeUseropLibrary.nil());
PcodeExecutor<byte[]> executor = PcodeExecutor<byte[]> executor =
new PcodeExecutor<>(language, PcodeArithmetic.BYTES_BE, null); new PcodeExecutor<>(language, PcodeArithmetic.BYTES_BE, null);
PcodeFrame frame = executor.begin(prog); PcodeFrame frame = executor.begin(prog);
@@ -55,7 +55,7 @@ public class TraceRecorderAsyncPcodeExecTest extends AbstractGhidraHeadedDebugge
Trace trace = recorder.getTrace(); Trace trace = recorder.getTrace();
SleighLanguage language = (SleighLanguage) trace.getBaseLanguage(); SleighLanguage language = (SleighLanguage) trace.getBaseLanguage();
SleighExpression expr = SleighProgramCompiler PcodeExpression expr = SleighProgramCompiler
.compileExpression(language, "r0 + r1"); .compileExpression(language, "r0 + r1");
Register r0 = language.getRegister("r0"); Register r0 = language.getRegister("r0");
@@ -99,7 +99,7 @@ public class TraceRecorderAsyncPcodeExecTest extends AbstractGhidraHeadedDebugge
SleighLanguage language = (SleighLanguage) trace.getBaseLanguage(); SleighLanguage language = (SleighLanguage) trace.getBaseLanguage();
PcodeProgram prog = SleighProgramCompiler.compileProgram(language, "test", PcodeProgram prog = SleighProgramCompiler.compileProgram(language, "test",
List.of("r2 = r0 + r1;"), SleighUseropLibrary.NIL); List.of("r2 = r0 + r1;"), PcodeUseropLibrary.NIL);
Register r0 = language.getRegister("r0"); Register r0 = language.getRegister("r0");
Register r1 = language.getRegister("r1"); Register r1 = language.getRegister("r1");
@@ -119,7 +119,7 @@ public class TraceRecorderAsyncPcodeExecTest extends AbstractGhidraHeadedDebugge
AsyncPcodeExecutor<byte[]> executor = new AsyncPcodeExecutor<>( AsyncPcodeExecutor<byte[]> executor = new AsyncPcodeExecutor<>(
language, AsyncWrappedPcodeArithmetic.forLanguage(language), asyncState); language, AsyncWrappedPcodeArithmetic.forLanguage(language), asyncState);
waitOn(executor.executeAsync(prog, SleighUseropLibrary.nil())); waitOn(executor.executeAsync(prog, PcodeUseropLibrary.nil()));
waitOn(asyncState.getVar(language.getRegister("r2"))); waitOn(asyncState.getVar(language.getRegister("r2")));
assertEquals(BigInteger.valueOf(11), new BigInteger(1, regs.regVals.get("r2"))); assertEquals(BigInteger.valueOf(11), new BigInteger(1, regs.regVals.get("r2")));
@@ -36,16 +36,12 @@ public abstract class AbstractCheckedTraceCachedWriteBytesPcodeExecutorState
@Override @Override
public byte[] read(long offset, int size) { public byte[] read(long offset, int size) {
RangeSet<UnsignedLong> uninitialized = RangeSet<UnsignedLong> uninitialized =
cache.getUninitialized(offset, offset + size - 1); bytes.getUninitialized(offset, offset + size - 1);
if (!uninitialized.isEmpty()) { if (!uninitialized.isEmpty()) {
size = checkUninitialized(source, space.getAddress(offset), size, size = checkUninitialized(backing, space.getAddress(offset), size,
addrSet(uninitialized)); addrSet(uninitialized));
if (source != null) {
readUninitializedFromSource(uninitialized);
}
} }
return readCached(offset, size); return super.read(offset, size);
} }
} }
@@ -55,10 +51,10 @@ public abstract class AbstractCheckedTraceCachedWriteBytesPcodeExecutorState
} }
@Override @Override
protected CachedSpace newSpace(AddressSpace space, TraceMemorySpace source, long snap) { protected CachedSpace newSpace(AddressSpace space, TraceMemorySpace backing) {
return new CheckedCachedSpace(language, space, source, snap); return new CheckedCachedSpace(language, space, backing, snap);
} }
protected abstract int checkUninitialized(TraceMemorySpace source, Address start, int size, protected abstract int checkUninitialized(TraceMemorySpace backing, Address start, int size,
AddressSet uninitialized); AddressSet uninitialized);
} }
@@ -39,16 +39,16 @@ public class RequireIsKnownTraceCachedWriteBytesPcodeExecutorState
} }
@Override @Override
protected int checkUninitialized(TraceMemorySpace source, Address start, int size, protected int checkUninitialized(TraceMemorySpace backing, Address start, int size,
AddressSet uninitialized) { AddressSet uninitialized) {
if (source == null) { if (backing == null) {
if (!uninitialized.contains(start)) { if (!uninitialized.contains(start)) {
return (int) uninitialized.getMinAddress().subtract(start); return (int) uninitialized.getMinAddress().subtract(start);
} }
throw excFor(uninitialized); throw excFor(uninitialized);
} }
// TODO: Could find first instead? // TODO: Could find first instead?
AddressSetView unknown = uninitialized.subtract(getKnown(source)); AddressSetView unknown = uninitialized.subtract(getKnown(backing));
if (unknown.isEmpty()) { if (unknown.isEmpty()) {
return size; return size;
} }
@@ -16,27 +16,20 @@
package ghidra.pcode.exec.trace; package ghidra.pcode.exec.trace;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.*;
import com.google.common.collect.*; import com.google.common.collect.*;
import com.google.common.primitives.UnsignedLong; import com.google.common.primitives.UnsignedLong;
import ghidra.generic.util.datastruct.SemisparseByteArray; import ghidra.pcode.exec.AbstractBytesPcodeExecutorState;
import ghidra.pcode.exec.AbstractLongOffsetPcodeExecutorState; import ghidra.pcode.exec.BytesPcodeExecutorStateSpace;
import ghidra.pcode.exec.BytesPcodeArithmetic;
import ghidra.pcode.exec.trace.TraceCachedWriteBytesPcodeExecutorState.CachedSpace; import ghidra.pcode.exec.trace.TraceCachedWriteBytesPcodeExecutorState.CachedSpace;
import ghidra.pcode.utils.Utils; import ghidra.program.model.address.AddressSet;
import ghidra.program.model.address.*; import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.Language; import ghidra.program.model.lang.Language;
import ghidra.program.model.lang.Register;
import ghidra.program.model.mem.MemBuffer;
import ghidra.program.model.mem.Memory;
import ghidra.trace.model.Trace; import ghidra.trace.model.Trace;
import ghidra.trace.model.memory.TraceMemorySpace; import ghidra.trace.model.memory.TraceMemorySpace;
import ghidra.trace.model.thread.TraceThread; import ghidra.trace.model.thread.TraceThread;
import ghidra.trace.util.MemBufferAdapter;
import ghidra.util.MathUtilities; import ghidra.util.MathUtilities;
import ghidra.util.Msg;
/** /**
* A state which reads bytes from a trace, but caches writes internally. * A state which reads bytes from a trace, but caches writes internally.
@@ -47,41 +40,7 @@ import ghidra.util.Msg;
* later time. * later time.
*/ */
public class TraceCachedWriteBytesPcodeExecutorState public class TraceCachedWriteBytesPcodeExecutorState
extends AbstractLongOffsetPcodeExecutorState<byte[], CachedSpace> { extends AbstractBytesPcodeExecutorState<TraceMemorySpace, CachedSpace> {
protected class StateMemBuffer implements MemBufferAdapter {
protected final Address address;
protected final CachedSpace source;
public StateMemBuffer(Address address, CachedSpace source) {
this.address = address;
this.source = source;
}
@Override
public Address getAddress() {
return address;
}
@Override
public Memory getMemory() {
return null;
}
@Override
public boolean isBigEndian() {
return trace.getBaseLanguage().isBigEndian();
}
@Override
public int getBytes(ByteBuffer buffer, int addressOffset) {
byte[] data = source.read(address.getOffset() + addressOffset, buffer.remaining());
buffer.put(data);
return data.length;
}
}
protected final Map<AddressSpace, CachedSpace> spaces = new HashMap<>();
protected final Trace trace; protected final Trace trace;
protected final long snap; protected final long snap;
@@ -90,136 +49,53 @@ public class TraceCachedWriteBytesPcodeExecutorState
public TraceCachedWriteBytesPcodeExecutorState(Trace trace, long snap, TraceThread thread, public TraceCachedWriteBytesPcodeExecutorState(Trace trace, long snap, TraceThread thread,
int frame) { int frame) {
super(trace.getBaseLanguage(), BytesPcodeArithmetic.forLanguage(trace.getBaseLanguage())); super(trace.getBaseLanguage());
this.trace = trace; this.trace = trace;
this.snap = snap; this.snap = snap;
this.thread = thread; this.thread = thread;
this.frame = frame; this.frame = frame;
} }
protected static class CachedSpace { public static class CachedSpace extends BytesPcodeExecutorStateSpace<TraceMemorySpace> {
protected final SemisparseByteArray cache = new SemisparseByteArray();
protected final RangeSet<UnsignedLong> written = TreeRangeSet.create(); protected final RangeSet<UnsignedLong> written = TreeRangeSet.create();
protected final Language language; // For logging diagnostic
protected final AddressSpace space;
protected final TraceMemorySpace source;
protected final long snap; protected final long snap;
public CachedSpace(Language language, AddressSpace space, TraceMemorySpace source, public CachedSpace(Language language, AddressSpace space, TraceMemorySpace backing,
long snap) { long snap) {
this.language = language; super(language, space, backing);
this.space = space;
this.source = source;
this.snap = snap; this.snap = snap;
} }
public void write(long offset, byte[] buffer, int srcOffset, int length) { @Override
cache.putData(offset, buffer, srcOffset, length); public void write(long offset, byte[] val, int srcOffset, int length) {
super.write(offset, val, srcOffset, length);
UnsignedLong uLoc = UnsignedLong.fromLongBits(offset); UnsignedLong uLoc = UnsignedLong.fromLongBits(offset);
UnsignedLong uEnd = UnsignedLong.fromLongBits(offset + length); UnsignedLong uEnd = UnsignedLong.fromLongBits(offset + length);
written.add(Range.closedOpen(uLoc, uEnd)); written.add(Range.closedOpen(uLoc, uEnd));
} }
public static long lower(Range<UnsignedLong> rng) { @Override
return rng.lowerBoundType() == BoundType.CLOSED protected void readUninitializedFromBacking(RangeSet<UnsignedLong> uninitialized) {
? rng.lowerEndpoint().longValue()
: rng.lowerEndpoint().longValue() + 1;
}
public static long upper(Range<UnsignedLong> rng) {
return rng.upperBoundType() == BoundType.CLOSED
? rng.upperEndpoint().longValue()
: rng.upperEndpoint().longValue() - 1;
}
protected void readUninitializedFromSource(RangeSet<UnsignedLong> uninitialized) {
if (!uninitialized.isEmpty()) { if (!uninitialized.isEmpty()) {
// TODO: Warn or bail when reading UNKNOWN bytes
// NOTE: Read without regard to gaps
// NOTE: Cannot write those gaps, though!!!
Range<UnsignedLong> toRead = uninitialized.span(); Range<UnsignedLong> toRead = uninitialized.span();
assert toRead.hasUpperBound() && toRead.hasLowerBound(); assert toRead.hasUpperBound() && toRead.hasLowerBound();
long lower = lower(toRead); long lower = lower(toRead);
long upper = upper(toRead); long upper = upper(toRead);
ByteBuffer buf = ByteBuffer.allocate((int) (upper - lower + 1)); ByteBuffer buf = ByteBuffer.allocate((int) (upper - lower + 1));
source.getBytes(snap, space.getAddress(lower), buf); backing.getBytes(snap, space.getAddress(lower), buf);
for (Range<UnsignedLong> rng : uninitialized.asRanges()) { for (Range<UnsignedLong> rng : uninitialized.asRanges()) {
long l = lower(rng); long l = lower(rng);
long u = upper(rng); long u = upper(rng);
cache.putData(l, buf.array(), (int) (l - lower), (int) (u - l + 1)); bytes.putData(l, buf.array(), (int) (l - lower), (int) (u - l + 1));
} }
} }
} }
protected byte[] readCached(long offset, int size) {
byte[] data = new byte[size];
cache.getData(offset, data);
return data;
}
protected AddressRange addrRng(Range<UnsignedLong> rng) {
Address start = space.getAddress(lower(rng));
Address end = space.getAddress(upper(rng));
return new AddressRangeImpl(start, end);
}
protected AddressSet addrSet(RangeSet<UnsignedLong> set) {
AddressSet result = new AddressSet();
for (Range<UnsignedLong> rng : set.asRanges()) {
result.add(addrRng(rng));
}
return result;
}
protected Set<Register> getRegs(AddressSet set) {
Set<Register> regs = new TreeSet<>();
for (AddressRange rng : set) {
Register r = language.getRegister(rng.getMinAddress(), (int) rng.getLength());
if (r != null) {
regs.add(r);
}
else {
regs.addAll(Arrays.asList(language.getRegisters(rng.getMinAddress())));
}
}
return regs;
}
protected void warnState(AddressSet set, String message) {
Set<Register> regs = getRegs(set);
if (regs.isEmpty()) {
Msg.warn(this, message + ": " + set);
}
else {
Msg.warn(this, message + ": " + set + " (registers " + regs + ")");
}
}
protected void warnUninit(RangeSet<UnsignedLong> uninit) {
AddressSet uninitialized = addrSet(uninit);
Set<Register> regs = getRegs(uninitialized);
if (regs.isEmpty()) {
Msg.warn(this, "Emulator read from uninitialized state: " + uninit);
}
Msg.warn(this, "Emulator read from uninitialized state: " + uninit +
" (includes registers: " + regs + ")");
}
protected void warnUnknown(AddressSet unknown) { protected void warnUnknown(AddressSet unknown) {
Set<Register> regs = getRegs(unknown); warnAddressSet("Emulator state initialized from UNKNOWN", unknown);
Msg.warn(this, "Emulator state initialized from UNKNOWN: " + unknown +
"(includes registers: " + regs + ")");
}
public byte[] read(long offset, int size) {
if (source != null) {
// TODO: Warn or bail when reading UNKNOWN bytes
// NOTE: Read without regard to gaps
// NOTE: Cannot write those gaps, though!!!
readUninitializedFromSource(cache.getUninitialized(offset, offset + size - 1));
}
RangeSet<UnsignedLong> stillUninit = cache.getUninitialized(offset, offset + size - 1);
if (!stillUninit.isEmpty()) {
warnUninit(stillUninit);
}
return readCached(offset, size);
} }
// Must already have started a transaction // Must already have started a transaction
@@ -238,7 +114,7 @@ public class TraceCachedWriteBytesPcodeExecutorState
long fullLen = range.upperEndpoint().longValue() - lower; long fullLen = range.upperEndpoint().longValue() - lower;
while (fullLen > 0) { while (fullLen > 0) {
int len = MathUtilities.unsignedMin(data.length, fullLen); int len = MathUtilities.unsignedMin(data.length, fullLen);
cache.getData(lower, data, 0, len); bytes.getData(lower, data, 0, len);
buf.position(0); buf.position(0);
buf.limit(len); buf.limit(len);
mem.putBytes(snap, space.getAddress(lower), buf); mem.putBytes(snap, space.getAddress(lower), buf);
@@ -288,47 +164,12 @@ public class TraceCachedWriteBytesPcodeExecutorState
} }
@Override @Override
protected long offsetToLong(byte[] offset) { protected TraceMemorySpace getBacking(AddressSpace space) {
return Utils.bytesToLong(offset, offset.length, language.isBigEndian()); return TraceSleighUtils.getSpaceForExecution(space, trace, thread, frame, false);
} }
@Override @Override
public byte[] longToOffset(AddressSpace space, long l) { protected CachedSpace newSpace(AddressSpace space, TraceMemorySpace backing) {
return arithmetic.fromConst(l, space.getPointerSize()); return new CachedSpace(language, space, backing, snap);
}
protected CachedSpace newSpace(AddressSpace space, TraceMemorySpace source, long snap) {
return new CachedSpace(language, space, source, snap);
}
@Override
protected CachedSpace getForSpace(AddressSpace space, boolean toWrite) {
return spaces.computeIfAbsent(space, s -> {
TraceMemorySpace tms = s.isUniqueSpace() ? null
: TraceSleighUtils.getSpaceForExecution(s, trace, thread, frame, false);
return newSpace(s, tms, snap);
});
}
@Override
protected void setInSpace(CachedSpace space, long offset, int size, byte[] val) {
assert size == val.length;
space.write(offset, val, 0, val.length);
}
@Override
protected byte[] getFromSpace(CachedSpace space, long offset, int size) {
byte[] read = space.read(offset, size);
if (read.length != size) {
Address addr = space.space.getAddress(offset);
throw new UnknownStatePcodeExecutionException("Incomplete read (" + read.length +
" of " + size + " bytes)", language, addr.add(read.length), size - read.length);
}
return read;
}
@Override
public MemBuffer getConcreteBuffer(Address address) {
return new StateMemBuffer(address, getForSpace(address.getAddressSpace(), false));
} }
} }
@@ -59,4 +59,9 @@ public enum TraceMemoryStatePcodeArithmetic implements PcodeArithmetic<TraceMemo
public BigInteger toConcrete(TraceMemoryState value, boolean isContextreg) { public BigInteger toConcrete(TraceMemoryState value, boolean isContextreg) {
throw new AssertionError("Cannot make TraceMemoryState a 'concrete value'"); throw new AssertionError("Cannot make TraceMemoryState a 'concrete value'");
} }
@Override
public TraceMemoryState sizeOf(TraceMemoryState value) {
throw new AssertionError("Cannot get size of a TraceMemoryState");
}
} }
@@ -18,10 +18,9 @@ package ghidra.pcode.exec.trace;
import com.google.common.collect.Range; import com.google.common.collect.Range;
import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.AbstractPcodeEmulator; import ghidra.pcode.emu.PcodeEmulator;
import ghidra.pcode.emu.PcodeThread; import ghidra.pcode.emu.PcodeThread;
import ghidra.pcode.exec.PcodeExecutorState; import ghidra.pcode.exec.PcodeExecutorState;
import ghidra.pcode.exec.SleighUseropLibrary;
import ghidra.program.model.lang.Language; import ghidra.program.model.lang.Language;
import ghidra.trace.model.Trace; import ghidra.trace.model.Trace;
import ghidra.trace.model.stack.TraceStack; import ghidra.trace.model.stack.TraceStack;
@@ -31,7 +30,7 @@ import ghidra.trace.model.thread.TraceThreadManager;
/** /**
* An emulator that can read initial state from a trace * An emulator that can read initial state from a trace
*/ */
public class TracePcodeEmulator extends AbstractPcodeEmulator { public class TracePcodeEmulator extends PcodeEmulator {
private static SleighLanguage assertSleigh(Language language) { private static SleighLanguage assertSleigh(Language language) {
if (!(language instanceof SleighLanguage)) { if (!(language instanceof SleighLanguage)) {
throw new IllegalArgumentException("Emulation requires a sleigh language"); throw new IllegalArgumentException("Emulation requires a sleigh language");
@@ -42,16 +41,12 @@ public class TracePcodeEmulator extends AbstractPcodeEmulator {
protected final Trace trace; protected final Trace trace;
protected final long snap; protected final long snap;
public TracePcodeEmulator(Trace trace, long snap, SleighUseropLibrary<byte[]> library) { public TracePcodeEmulator(Trace trace, long snap) {
super(assertSleigh(trace.getBaseLanguage()), library); super(assertSleigh(trace.getBaseLanguage()));
this.trace = trace; this.trace = trace;
this.snap = snap; this.snap = snap;
} }
public TracePcodeEmulator(Trace trace, long snap) {
this(trace, snap, SleighUseropLibrary.nil());
}
protected PcodeExecutorState<byte[]> newState(TraceThread thread) { protected PcodeExecutorState<byte[]> newState(TraceThread thread) {
return new TraceCachedWriteBytesPcodeExecutorState(trace, snap, thread, 0); return new TraceCachedWriteBytesPcodeExecutorState(trace, snap, thread, 0);
} }
@@ -73,7 +73,7 @@ public enum TraceSleighUtils {
paired); paired);
} }
public static byte[] evaluateBytes(SleighExpression expr, Trace trace, long snap, public static byte[] evaluateBytes(PcodeExpression expr, Trace trace, long snap,
TraceThread thread, int frame) { TraceThread thread, int frame) {
SleighLanguage language = expr.getLanguage(); SleighLanguage language = expr.getLanguage();
if (trace.getBaseLanguage() != language) { if (trace.getBaseLanguage() != language) {
@@ -84,14 +84,14 @@ public enum TraceSleighUtils {
return expr.evaluate(executor); return expr.evaluate(executor);
} }
public static BigInteger evaluate(SleighExpression expr, Trace trace, long snap, public static BigInteger evaluate(PcodeExpression expr, Trace trace, long snap,
TraceThread thread, int frame) { TraceThread thread, int frame) {
byte[] bytes = evaluateBytes(expr, trace, snap, thread, frame); byte[] bytes = evaluateBytes(expr, trace, snap, thread, frame);
return Utils.bytesToBigInteger(bytes, bytes.length, expr.getLanguage().isBigEndian(), return Utils.bytesToBigInteger(bytes, bytes.length, expr.getLanguage().isBigEndian(),
false); false);
} }
public static Pair<byte[], TraceMemoryState> evaluateBytesWithState(SleighExpression expr, public static Pair<byte[], TraceMemoryState> evaluateBytesWithState(PcodeExpression expr,
Trace trace, long snap, TraceThread thread, int frame) { Trace trace, long snap, TraceThread thread, int frame) {
SleighLanguage language = expr.getLanguage(); SleighLanguage language = expr.getLanguage();
if (trace.getBaseLanguage() != language) { if (trace.getBaseLanguage() != language) {
@@ -104,7 +104,7 @@ public enum TraceSleighUtils {
return expr.evaluate(executor); return expr.evaluate(executor);
} }
public static Pair<BigInteger, TraceMemoryState> evaluateWithState(SleighExpression expr, public static Pair<BigInteger, TraceMemoryState> evaluateWithState(PcodeExpression expr,
Trace trace, long snap, TraceThread thread, int frame) { Trace trace, long snap, TraceThread thread, int frame) {
Pair<byte[], TraceMemoryState> bytesPair = Pair<byte[], TraceMemoryState> bytesPair =
evaluateBytesWithState(expr, trace, snap, thread, frame); evaluateBytesWithState(expr, trace, snap, thread, frame);
@@ -28,8 +28,7 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressRangeImpl; import ghidra.program.model.address.AddressRangeImpl;
import ghidra.program.model.lang.Register; import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.CodeUnit; import ghidra.program.model.listing.CodeUnit;
import ghidra.program.model.mem.Memory; import ghidra.program.model.mem.*;
import ghidra.program.model.mem.MemoryAccessException;
import ghidra.program.model.symbol.*; import ghidra.program.model.symbol.*;
import ghidra.trace.database.DBTrace; import ghidra.trace.database.DBTrace;
import ghidra.trace.database.symbol.DBTraceReference; import ghidra.trace.database.symbol.DBTraceReference;
@@ -40,7 +39,6 @@ import ghidra.trace.model.program.TraceProgramView;
import ghidra.trace.model.symbol.TraceReference; import ghidra.trace.model.symbol.TraceReference;
import ghidra.trace.model.symbol.TraceSymbol; import ghidra.trace.model.symbol.TraceSymbol;
import ghidra.trace.model.thread.TraceThread; import ghidra.trace.model.thread.TraceThread;
import ghidra.trace.util.MemBufferAdapter;
import ghidra.util.LockHold; import ghidra.util.LockHold;
import ghidra.util.Saveable; import ghidra.util.Saveable;
import ghidra.util.exception.NoValueException; import ghidra.util.exception.NoValueException;
@@ -20,8 +20,8 @@ import java.nio.ByteOrder;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressOverflowException; import ghidra.program.model.address.AddressOverflowException;
import ghidra.program.model.mem.MemBufferAdapter;
import ghidra.program.model.mem.Memory; import ghidra.program.model.mem.Memory;
import ghidra.trace.util.MemBufferAdapter;
public class DBTraceMemBuffer implements MemBufferAdapter { public class DBTraceMemBuffer implements MemBufferAdapter {
private final DBTraceMemorySpace space; private final DBTraceMemorySpace space;
@@ -370,7 +370,7 @@ public class PatchStep implements Step {
protected Map<AddressSpace, SemisparseByteArray> getPatches(Language language) { protected Map<AddressSpace, SemisparseByteArray> getPatches(Language language) {
PcodeProgram prog = SleighProgramCompiler.compileProgram((SleighLanguage) language, PcodeProgram prog = SleighProgramCompiler.compileProgram((SleighLanguage) language,
"schedule", List.of(sleigh + ";"), SleighUseropLibrary.nil()); "schedule", List.of(sleigh + ";"), PcodeUseropLibrary.nil());
// SemisparseArray is a bit overkill, no? // SemisparseArray is a bit overkill, no?
Map<AddressSpace, SemisparseByteArray> result = new TreeMap<>(); Map<AddressSpace, SemisparseByteArray> result = new TreeMap<>();
for (PcodeOp op : prog.getCode()) { for (PcodeOp op : prog.getCode()) {
@@ -94,8 +94,8 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
TraceSleighUtils.buildByteExecutor(tb.trace, 0, thread, 0); TraceSleighUtils.buildByteExecutor(tb.trace, 0, thread, 0);
PcodeProgram initProg = SleighProgramCompiler.compileProgram( PcodeProgram initProg = SleighProgramCompiler.compileProgram(
(SleighLanguage) tb.language, "test", stateInit, (SleighLanguage) tb.language, "test", stateInit,
SleighUseropLibrary.nil()); PcodeUseropLibrary.nil());
exec.execute(initProg, SleighUseropLibrary.nil()); exec.execute(initProg, PcodeUseropLibrary.nil());
} }
return thread; return thread;
} }
@@ -476,13 +476,13 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
public void testInject() throws Throwable { public void testInject() throws Throwable {
try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "x86:LE:64:default")) { try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "x86:LE:64:default")) {
final StringBuilder dumped = new StringBuilder(); final StringBuilder dumped = new StringBuilder();
SleighUseropLibrary<byte[]> library = new AnnotatedSleighUseropLibrary<byte[]>() { PcodeUseropLibrary<byte[]> hexLib = new AnnotatedPcodeUseropLibrary<byte[]>() {
@Override @Override
protected Lookup getMethodLookup() { protected Lookup getMethodLookup() {
return MethodHandles.lookup(); return MethodHandles.lookup();
} }
@SleighUserop @PcodeUserop
public void hexdump(byte[] in) { public void hexdump(byte[] in) {
dumped.append(NumericUtilities.convertBytesToString(in)); dumped.append(NumericUtilities.convertBytesToString(in));
} }
@@ -495,7 +495,12 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
"PUSH 0xdeadbeef", "PUSH 0xdeadbeef",
"PUSH 0xbaadf00d")); "PUSH 0xbaadf00d"));
TracePcodeEmulator emu = new TracePcodeEmulator(tb.trace, 0, library); TracePcodeEmulator emu = new TracePcodeEmulator(tb.trace, 0) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return hexLib;
}
};
emu.inject(tb.addr(0x00400006), List.of("hexdump(RSP);")); emu.inject(tb.addr(0x00400006), List.of("hexdump(RSP);"));
PcodeThread<byte[]> emuThread = emu.newThread(thread.getPath()); PcodeThread<byte[]> emuThread = emu.newThread(thread.getPath());
emuThread.overrideContextWithDefault(); emuThread.overrideContextWithDefault();
@@ -519,13 +524,13 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
public void testInjectedInterrupt() throws Throwable { public void testInjectedInterrupt() throws Throwable {
try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "x86:LE:64:default")) { try (ToyDBTraceBuilder tb = new ToyDBTraceBuilder("Test", "x86:LE:64:default")) {
final StringBuilder dumped = new StringBuilder(); final StringBuilder dumped = new StringBuilder();
SleighUseropLibrary<byte[]> library = new AnnotatedSleighUseropLibrary<byte[]>() { PcodeUseropLibrary<byte[]> hexLib = new AnnotatedPcodeUseropLibrary<byte[]>() {
@Override @Override
protected Lookup getMethodLookup() { protected Lookup getMethodLookup() {
return MethodHandles.lookup(); return MethodHandles.lookup();
} }
@SleighUserop @PcodeUserop
public void hexdump(byte[] in) { public void hexdump(byte[] in) {
dumped.append(NumericUtilities.convertBytesToString(in)); dumped.append(NumericUtilities.convertBytesToString(in));
} }
@@ -538,7 +543,12 @@ public class TracePcodeEmulatorTest extends AbstractGhidraHeadlessIntegrationTes
"PUSH 0xdeadbeef", "PUSH 0xdeadbeef",
"PUSH 0xbaadf00d")); "PUSH 0xbaadf00d"));
TracePcodeEmulator emu = new TracePcodeEmulator(tb.trace, 0, library); TracePcodeEmulator emu = new TracePcodeEmulator(tb.trace, 0) {
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return hexLib;
}
};
emu.inject(tb.addr(0x00400006), List.of( emu.inject(tb.addr(0x00400006), List.of(
"hexdump(RSP);", "hexdump(RSP);",
"emu_swi();", "emu_swi();",
@@ -214,7 +214,7 @@ public class TraceSleighUtilsTest extends AbstractGhidraHeadlessIntegrationTest
"<else>", "<else>",
" r1 = 7;", " r1 = 7;",
"<done>"), "<done>"),
SleighUseropLibrary.NIL); PcodeUseropLibrary.NIL);
TraceThread thread; TraceThread thread;
try (UndoableTransaction tid = b.startTransaction()) { try (UndoableTransaction tid = b.startTransaction()) {
thread = b.getOrAddThread("Thread1", 0); thread = b.getOrAddThread("Thread1", 0);
@@ -222,7 +222,7 @@ public class TraceSleighUtilsTest extends AbstractGhidraHeadlessIntegrationTest
new PcodeExecutor<>(sp.getLanguage(), new PcodeExecutor<>(sp.getLanguage(),
BytesPcodeArithmetic.forLanguage(b.language), BytesPcodeArithmetic.forLanguage(b.language),
new TraceBytesPcodeExecutorState(b.trace, 0, thread, 0)); new TraceBytesPcodeExecutorState(b.trace, 0, thread, 0));
sp.execute(executor, SleighUseropLibrary.nil()); sp.execute(executor, PcodeUseropLibrary.nil());
} }
Register r1 = b.language.getRegister("r1"); Register r1 = b.language.getRegister("r1");
@@ -85,9 +85,9 @@ public class ToyDBTraceBuilder implements AutoCloseable {
public void exec(long snap, int frame, TraceThread thread, List<String> sleigh) { public void exec(long snap, int frame, TraceThread thread, List<String> sleigh) {
PcodeProgram program = SleighProgramCompiler.compileProgram((SleighLanguage) language, PcodeProgram program = SleighProgramCompiler.compileProgram((SleighLanguage) language,
"builder", sleigh, SleighUseropLibrary.nil()); "builder", sleigh, PcodeUseropLibrary.nil());
TraceSleighUtils.buildByteExecutor(trace, snap, thread, frame) TraceSleighUtils.buildByteExecutor(trace, snap, thread, frame)
.execute(program, SleighUseropLibrary.nil()); .execute(program, PcodeUseropLibrary.nil());
} }
public Address addr(AddressSpace space, long offset) { public Address addr(AddressSpace space, long offset) {
@@ -367,7 +367,7 @@ public class TraceScheduleTest extends AbstractGhidraHeadlessIntegrationTest {
@Override @Override
public PcodeExecutor<Void> getExecutor() { public PcodeExecutor<Void> getExecutor() {
return new PcodeExecutor<>(TOY_BE_64_LANG, machine.getArithmetic(), getState()) { return new PcodeExecutor<>(TOY_BE_64_LANG, machine.getArithmetic(), getState()) {
public PcodeFrame execute(PcodeProgram program, SleighUseropLibrary<Void> library) { public PcodeFrame execute(PcodeProgram program, PcodeUseropLibrary<Void> library) {
machine.record.add("x:" + name); machine.record.add("x:" + name);
// TODO: Verify the actual effect // TODO: Verify the actual effect
return null; //super.execute(program, library); return null; //super.execute(program, library);
@@ -376,7 +376,7 @@ public class TraceScheduleTest extends AbstractGhidraHeadlessIntegrationTest {
} }
@Override @Override
public SleighUseropLibrary<Void> getUseropLibrary() { public PcodeUseropLibrary<Void> getUseropLibrary() {
return null; return null;
} }
@@ -402,7 +402,7 @@ public class TraceScheduleTest extends AbstractGhidraHeadlessIntegrationTest {
protected final List<String> record = new ArrayList<>(); protected final List<String> record = new ArrayList<>();
public TestMachine() { public TestMachine() {
super(TOY_BE_64_LANG, null, null); super(TOY_BE_64_LANG, null);
} }
@Override @Override
@@ -419,6 +419,11 @@ public class TraceScheduleTest extends AbstractGhidraHeadlessIntegrationTest {
protected PcodeExecutorState<Void> createLocalState(PcodeThread<Void> thread) { protected PcodeExecutorState<Void> createLocalState(PcodeThread<Void> thread) {
return null; return null;
} }
@Override
protected PcodeUseropLibrary<Void> createUseropLibrary() {
return PcodeUseropLibrary.nil();
}
} }
@Test @Test
@@ -17,6 +17,7 @@ package ghidra.pcode.emu;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import ghidra.app.emulator.Emulator;
import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emulate.*; import ghidra.pcode.emulate.*;
import ghidra.pcode.exec.*; import ghidra.pcode.exec.*;
@@ -29,9 +30,23 @@ import ghidra.util.Msg;
/** /**
* A p-code thread which incorporates per-architecture state modifiers on concrete bytes * A p-code thread which incorporates per-architecture state modifiers on concrete bytes
*
* <p>
* For a complete example of a p-code emulator, see {@link PcodeEmulator}.
*
* <p>
* TODO: "State modifiers" are a feature of the older {@link Emulator}. They are crudely
* incorporated into threads extended from this abstract class, so that they do not yet need to be
* ported to this emulator.
*/ */
public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<T> { public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<T> {
/**
* Glue for incorporating state modifiers
*
* <p>
* This allows the modifiers to change the context and counter of the thread.
*/
protected class GlueEmulate extends Emulate { protected class GlueEmulate extends Emulate {
public GlueEmulate(SleighLanguage lang, MemoryState s, BreakTable b) { public GlueEmulate(SleighLanguage lang, MemoryState s, BreakTable b) {
super(lang, s, b); super(lang, s, b);
@@ -63,6 +78,12 @@ public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<
} }
} }
/**
* Glue for incorporating state modifiers
*
* <p>
* This allows the modifiers to access the thread's state (memory and registers).
*/
protected class GlueMemoryState extends MemoryState { protected class GlueMemoryState extends MemoryState {
public GlueMemoryState(Language language) { public GlueMemoryState(Language language) {
super(language); super(language);
@@ -85,6 +106,12 @@ public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<
} }
} }
/**
* Glue for incorporating state modifiers
*
* <p>
* This allows the modifiers to provider userop definitions.
*/
protected class GluePcodeThreadExecutor extends PcodeThreadExecutor { protected class GluePcodeThreadExecutor extends PcodeThreadExecutor {
public GluePcodeThreadExecutor(SleighLanguage language, PcodeArithmetic<T> arithmetic, public GluePcodeThreadExecutor(SleighLanguage language, PcodeArithmetic<T> arithmetic,
PcodeExecutorStatePiece<T, T> state) { PcodeExecutorStatePiece<T, T> state) {
@@ -93,7 +120,7 @@ public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<
@Override @Override
public void executeCallother(PcodeOp op, PcodeFrame frame, public void executeCallother(PcodeOp op, PcodeFrame frame,
SleighUseropLibrary<T> library) { PcodeUseropLibrary<T> library) {
// Prefer one in the library. Fall-back to state modifier's impl // Prefer one in the library. Fall-back to state modifier's impl
try { try {
super.executeCallother(op, frame, library); super.executeCallother(op, frame, library);
@@ -112,12 +139,19 @@ public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<
protected Address savedCounter; protected Address savedCounter;
/**
* Construct a new thread with the given name belonging to the given machine
*
* @see PcodeMachine#newThread(String)
* @param name the name of the new thread
* @param machine the machine to which the new thread belongs
*/
public AbstractModifiedPcodeThread(String name, AbstractPcodeMachine<T> machine) { public AbstractModifiedPcodeThread(String name, AbstractPcodeMachine<T> machine) {
super(name, machine); super(name, machine);
/** /**
* These two exist as a way to integrate the language-specific injects that are already * These two exist as a way to integrate the language-specific injects that are already
* written for the established concrete emulator. * written for {@link Emulator}.
*/ */
emulate = new GlueEmulate(language, new GlueMemoryState(language), emulate = new GlueEmulate(language, new GlueMemoryState(language),
new BreakTableCallBack(language)); new BreakTableCallBack(language));
@@ -162,7 +196,7 @@ public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<
} }
/** /**
* Called by the legacy state modifier to retrieve concrete bytes from the thread's state * Called by a state modifier to read concrete bytes from the thread's state
* *
* @see {@link MemoryState#getChunk(byte[], AddressSpace, long, int, boolean)} * @see {@link MemoryState#getChunk(byte[], AddressSpace, long, int, boolean)}
*/ */
@@ -170,7 +204,7 @@ public abstract class AbstractModifiedPcodeThread<T> extends DefaultPcodeThread<
boolean stopOnUnintialized); boolean stopOnUnintialized);
/** /**
* Called by the legacy state modifier to set concrete bytes in the thread's state * Called by a state modifier to write concrete bytes to the thread's state
* *
* @see {@link MemoryState#setChunk(byte[], AddressSpace, long, int)} * @see {@link MemoryState#setChunk(byte[], AddressSpace, long, int)}
*/ */
@@ -1,35 +0,0 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.exec.BytesPcodeArithmetic;
import ghidra.pcode.exec.SleighUseropLibrary;
/**
* A p-code machine which executes on concrete bytes and incorporates per-architecture state
* modifiers
*/
public abstract class AbstractPcodeEmulator extends AbstractPcodeMachine<byte[]> {
public AbstractPcodeEmulator(SleighLanguage language, SleighUseropLibrary<byte[]> library) {
super(language, BytesPcodeArithmetic.forLanguage(language), library);
}
@Override
protected BytesPcodeThread createThread(String name) {
return new BytesPcodeThread(name, this);
}
}
@@ -25,13 +25,16 @@ import ghidra.util.classfinder.ClassSearcher;
/** /**
* An abstract implementation of {@link PcodeMachine} suitable as a base for most implementations * An abstract implementation of {@link PcodeMachine} suitable as a base for most implementations
*
* <p>
* For a complete example of a p-code emulator, see {@link PcodeEmulator}.
*/ */
public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> { public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
protected final SleighLanguage language; protected final SleighLanguage language;
protected final PcodeArithmetic<T> arithmetic; protected final PcodeArithmetic<T> arithmetic;
protected final SleighUseropLibrary<T> library; protected final PcodeUseropLibrary<T> library;
protected final SleighUseropLibrary<T> stubLibrary; protected final PcodeUseropLibrary<T> stubLibrary;
/* for abstract thread access */ PcodeStateInitializer initializer; /* for abstract thread access */ PcodeStateInitializer initializer;
private PcodeExecutorState<T> sharedState; private PcodeExecutorState<T> sharedState;
@@ -41,12 +44,17 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
protected final Map<Address, PcodeProgram> injects = new HashMap<>(); protected final Map<Address, PcodeProgram> injects = new HashMap<>();
public AbstractPcodeMachine(SleighLanguage language, PcodeArithmetic<T> arithmetic, /**
SleighUseropLibrary<T> library) { * Construct a p-code machine with the given language and arithmetic
*
* @param language the processor language to be emulated
* @param arithmetic the definition of arithmetic p-code ops to be used in emulation
*/
public AbstractPcodeMachine(SleighLanguage language, PcodeArithmetic<T> arithmetic) {
this.language = language; this.language = language;
this.arithmetic = arithmetic; this.arithmetic = arithmetic;
this.library = library;
this.library = createUseropLibrary();
this.stubLibrary = createThreadStubLibrary().compose(library); this.stubLibrary = createThreadStubLibrary().compose(library);
/** /**
@@ -57,6 +65,13 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
this.initializer = getPluggableInitializer(language); this.initializer = getPluggableInitializer(language);
} }
/**
* A factory method to create the userop library shared by all threads in this machine
*
* @return the library
*/
protected abstract PcodeUseropLibrary<T> createUseropLibrary();
@Override @Override
public SleighLanguage getLanguage() { public SleighLanguage getLanguage() {
return language; return language;
@@ -68,26 +83,50 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
} }
@Override @Override
public SleighUseropLibrary<T> getUseropLibrary() { public PcodeUseropLibrary<T> getUseropLibrary() {
return library; return library;
} }
@Override @Override
public SleighUseropLibrary<T> getStubUseropLibrary() { public PcodeUseropLibrary<T> getStubUseropLibrary() {
return stubLibrary; return stubLibrary;
} }
/**
* A factory method to create the (memory) state shared by all threads in this machine
*
* @return the shared state
*/
protected abstract PcodeExecutorState<T> createSharedState(); protected abstract PcodeExecutorState<T> createSharedState();
/**
* A factory method to create the (register) state local to the given thread
*
* @param thread the thread
* @return the thread-local state
*/
protected abstract PcodeExecutorState<T> createLocalState(PcodeThread<T> thread); protected abstract PcodeExecutorState<T> createLocalState(PcodeThread<T> thread);
protected SleighUseropLibrary<T> createThreadStubLibrary() { /**
return new DefaultPcodeThread.SleighEmulationLibrary<T>(null); * A factory method to create a stub library for compiling thread-local SLEIGH source
*
* <p>
* Because threads may introduce p-code userops using libraries unique to that thread, it
* becomes necessary to at least export stub symbols, so that p-code programs can be compiled
* from SLEIGH source before the thread has necessarily been created. A side effect of this
* strategy is that all threads, though they may have independent libraries, must export
* identically-named symbols.
*
* @return the stub library for all threads
*/
protected PcodeUseropLibrary<T> createThreadStubLibrary() {
return new DefaultPcodeThread.PcodeEmulationLibrary<T>(null);
} }
/** /**
* Extension point to override construction of this machine's threads * A factory method to create a new thread in this machine
* *
* @see #newThread(String)
* @param name the name of the new thread * @param name the name of the new thread
* @return the new thread * @return the new thread
*/ */
@@ -95,6 +134,26 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
return new DefaultPcodeThread<>(name, this); return new DefaultPcodeThread<>(name, this);
} }
/**
* Search the classpath for an applicable state initializer
*
* <p>
* If found, the initializer is executed immediately upon creating this machine's shared state
* and upon creating each thread.
*
* <p>
* TODO: This isn't really being used. At one point in development it was used to initialize
* x86's FS_OFFSET and GS_OFFSET registers. Those only exist in p-code, not the real processor,
* and replace what might have been {@code segment(FS)}. There seems more utility in detecting
* when those registers are uninitialized, requiring the user to initialize them, than it is to
* silently initialize them to 0. Unless we find utility in this, it will likely be removed in
* the near future.
*
* @see #doPluggableInitialization()
* @see DefaultPcodeThread#doPluggableInitialization()
* @param language the language requiring pluggable initialization
* @return the initializer
*/
protected static PcodeStateInitializer getPluggableInitializer(Language language) { protected static PcodeStateInitializer getPluggableInitializer(Language language) {
for (PcodeStateInitializer init : ClassSearcher.getInstances(PcodeStateInitializer.class)) { for (PcodeStateInitializer init : ClassSearcher.getInstances(PcodeStateInitializer.class)) {
if (init.isApplicable(language)) { if (init.isApplicable(language)) {
@@ -104,6 +163,11 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
return null; return null;
} }
/**
* Execute the initializer upon this machine, if applicable
*
* @see #getPluggableInitializer(Language)
*/
protected void doPluggableInitialization() { protected void doPluggableInitialization() {
if (initializer != null) { if (initializer != null) {
initializer.initializeMachine(this); initializer.initializeMachine(this);
@@ -148,6 +212,12 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
return sharedState; return sharedState;
} }
/**
* Check for a p-code injection (override) at the given address
*
* @param address the address, usually the program counter
* @return the injected program, most likely {@code null}
*/
protected PcodeProgram getInject(Address address) { protected PcodeProgram getInject(Address address) {
return injects.get(address); return injects.get(address);
} }
@@ -184,7 +254,9 @@ public abstract class AbstractPcodeMachine<T> implements PcodeMachine<T> {
/** /**
* TODO: The template build idea is probably more pertinent here. If a user places a * TODO: The template build idea is probably more pertinent here. If a user places a
* breakpoint with the purpose of single-stepping the p-code of that instruction, it won't * breakpoint with the purpose of single-stepping the p-code of that instruction, it won't
* work, because that p-code is occluded by emu_exec_decoded(). * work, because that p-code is occluded by emu_exec_decoded(). I suppose this could also be
* addressed by formalizing and better exposing the notion of p-code stacks (of p-code
* frames)
*/ */
PcodeProgram pcode = compileSleigh("breakpoint:" + address, List.of( PcodeProgram pcode = compileSleigh("breakpoint:" + address, List.of(
"if (!(" + sleighCondition + ")) goto <nobreak>;", "if (!(" + sleighCondition + ")) goto <nobreak>;",
@@ -17,7 +17,21 @@ package ghidra.pcode.emu;
import ghidra.program.model.address.AddressSpace; import ghidra.program.model.address.AddressSpace;
/**
* A simple p-code thread that operates on concrete bytes
*
* <p>
* For a complete example of a p-code emulator, see {@link PcodeEmulator}. This is the default
* thread for that emulator.
*/
public class BytesPcodeThread extends AbstractModifiedPcodeThread<byte[]> { public class BytesPcodeThread extends AbstractModifiedPcodeThread<byte[]> {
/**
* Construct a new thread
*
* @see PcodeMachine#newThread(String)
* @param name the thread's name
* @param machine the machine to which the thread belongs
*/
public BytesPcodeThread(String name, AbstractPcodeMachine<byte[]> machine) { public BytesPcodeThread(String name, AbstractPcodeMachine<byte[]> machine) {
super(name, machine); super(name, machine);
} }
@@ -18,11 +18,11 @@ package ghidra.pcode.emu;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.*; import java.util.*;
import ghidra.app.emulator.Emulator;
import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.exec.*; import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Register; import ghidra.program.model.lang.*;
import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.Instruction;
import ghidra.program.model.pcode.PcodeOp; import ghidra.program.model.pcode.PcodeOp;
import ghidra.program.util.ProgramContextImpl; import ghidra.program.util.ProgramContextImpl;
@@ -30,16 +30,51 @@ import ghidra.util.Msg;
/** /**
* The default implementation of {@link PcodeThread} suitable for most applications * The default implementation of {@link PcodeThread} suitable for most applications
*
* <p>
* When emulating on concrete state, consider using {@link AbstractModifiedPcodeThread}, so that
* state modifiers from the older {@link Emulator} are incorporated. In either case, it may be
* worthwhile to examine existing state modifiers to ensure they are appropriately represented in
* any abstract state. It may be necessary to port them.
*
* <p>
* This class implements the control-flow logic of the target machine, cooperating with the p-code
* program flow implemented by the {@link PcodeExecutor}. This implementation exists primarily in
* {@link #beginInstructionOrInject()} and {@link #advanceAfterFinished()}.
*/ */
public class DefaultPcodeThread<T> implements PcodeThread<T> { public class DefaultPcodeThread<T> implements PcodeThread<T> {
protected static class SleighEmulationLibrary<T> extends AnnotatedSleighUseropLibrary<T> {
/**
* A userop library exporting some methods for emulated thread control
*
* <p>
* TODO: Since p-code userops can now receive the executor, it may be better to receive it, cast
* it, and obtain the thread, rather than binding a library to each thread.
*
* @param <T> no particular type, except to match the thread's
*/
public static class PcodeEmulationLibrary<T> extends AnnotatedPcodeUseropLibrary<T> {
private final DefaultPcodeThread<T> thread; private final DefaultPcodeThread<T> thread;
public SleighEmulationLibrary(DefaultPcodeThread<T> thread) { /**
* Construct a library to control the given thread
*
* @param thread the thread
*/
public PcodeEmulationLibrary(DefaultPcodeThread<T> thread) {
this.thread = thread; this.thread = thread;
} }
@SleighUserop /**
* Execute the actual machine instruction at the current program counter
*
* <p>
* Because "injects" override the machine instruction, injects which need to defer to the
* machine instruction must invoke this userop.
*
* @see #emu_skip_decoded()
*/
@PcodeUserop
public void emu_exec_decoded() { public void emu_exec_decoded() {
/** /**
* TODO: This idea of "pushing" a frame could be formalized, and the full stack made * TODO: This idea of "pushing" a frame could be formalized, and the full stack made
@@ -53,7 +88,18 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
thread.frame = saved; thread.frame = saved;
} }
@SleighUserop /**
* Advance the program counter beyond the current machine instruction
*
* <p>
* Because "injects" override the machine instruction, they must specify the effect on the
* program counter, lest the thread become caught in an infinite loop on the inject. To
* emulate fall-through without executing the machine instruction, the inject must invoke
* this userop.
*
* @see #emu_exec_decoded()
*/
@PcodeUserop
public void emu_skip_decoded() { public void emu_skip_decoded() {
PcodeFrame saved = thread.frame; PcodeFrame saved = thread.frame;
thread.dropInstruction(); thread.dropInstruction();
@@ -61,22 +107,46 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
thread.frame = saved; thread.frame = saved;
} }
@SleighUserop /**
* Interrupt execution
*
* <p>
* This immediately throws an {@link InterruptPcodeExecutionException}. To implement
* out-of-band breakpoints, inject an invocation of this userop at the desired address.
*
* @see PcodeMachine#addBreakpoint(Address, String)
*/
@PcodeUserop
public void emu_swi() { public void emu_swi() {
throw new InterruptPcodeExecutionException(null, null); throw new InterruptPcodeExecutionException(null, null);
} }
} }
protected class PcodeThreadExecutor extends PcodeExecutor<T> { /**
* An executor for the p-code thread
*
* <p>
* This executor checks for thread suspension and updates the program counter register upon
* execution of (external) branches.
*/
public class PcodeThreadExecutor extends PcodeExecutor<T> {
volatile boolean suspended = false; volatile boolean suspended = false;
/**
* Construct the executor
*
* @see DefaultPcodeThread#createExecutor()
* @param language the language of the containing machine
* @param arithmetic the arithmetic of the containing machine
* @param state the composite state assigned to the thread
*/
public PcodeThreadExecutor(SleighLanguage language, PcodeArithmetic<T> arithmetic, public PcodeThreadExecutor(SleighLanguage language, PcodeArithmetic<T> arithmetic,
PcodeExecutorStatePiece<T, T> state) { PcodeExecutorStatePiece<T, T> state) {
super(language, arithmetic, state); super(language, arithmetic, state);
} }
@Override @Override
public void stepOp(PcodeOp op, PcodeFrame frame, SleighUseropLibrary<T> library) { public void stepOp(PcodeOp op, PcodeFrame frame, PcodeUseropLibrary<T> library) {
if (suspended) { if (suspended) {
throw new SuspendedPcodeExecutionException(frame, null); throw new SuspendedPcodeExecutionException(frame, null);
} }
@@ -87,6 +157,10 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
protected void branchToAddress(Address target) { protected void branchToAddress(Address target) {
overrideCounter(target); overrideCounter(target);
} }
public Instruction getInstruction() {
return instruction;
}
} }
private final String name; private final String name;
@@ -95,7 +169,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
protected final PcodeArithmetic<T> arithmetic; protected final PcodeArithmetic<T> arithmetic;
protected final ThreadPcodeExecutorState<T> state; protected final ThreadPcodeExecutorState<T> state;
protected final InstructionDecoder decoder; protected final InstructionDecoder decoder;
protected final SleighUseropLibrary<T> library; protected final PcodeUseropLibrary<T> library;
protected final PcodeThreadExecutor executor; protected final PcodeThreadExecutor executor;
protected final Register pc; protected final Register pc;
@@ -110,6 +184,13 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
protected final ProgramContextImpl defaultContext; protected final ProgramContextImpl defaultContext;
protected final Map<Address, PcodeProgram> injects = new HashMap<>(); protected final Map<Address, PcodeProgram> injects = new HashMap<>();
/**
* Construct a new thread
*
* @see AbstractPcodeMachine#createThread(String)
* @param name the name of the thread
* @param machine the machine containing the thread
*/
public DefaultPcodeThread(String name, AbstractPcodeMachine<T> machine) { public DefaultPcodeThread(String name, AbstractPcodeMachine<T> machine) {
this.name = name; this.name = name;
this.machine = machine; this.machine = machine;
@@ -136,14 +217,34 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
this.reInitialize(); this.reInitialize();
} }
/**
* A factory method for the instruction decoder
*
* @param sharedState the machine's shared (memory state)
* @return
*/
protected SleighInstructionDecoder createInstructionDecoder(PcodeExecutorState<T> sharedState) { protected SleighInstructionDecoder createInstructionDecoder(PcodeExecutorState<T> sharedState) {
return new SleighInstructionDecoder(language, sharedState); return new SleighInstructionDecoder(language, sharedState);
} }
protected SleighUseropLibrary<T> createUseropLibrary() { /**
return new SleighEmulationLibrary<>(this).compose(machine.library); * A factory method to create the complete userop library for this thread
*
* <p>
* The returned library must compose the containing machine's shared userop library. See
* {@link PcodeUseropLibrary#compose(PcodeUseropLibrary)}.
*
* @return the thread's complete userop library
*/
protected PcodeUseropLibrary<T> createUseropLibrary() {
return new PcodeEmulationLibrary<>(this).compose(machine.library);
} }
/**
* A factory method to create the executor for this thread
*
* @return the executor
*/
protected PcodeThreadExecutor createExecutor() { protected PcodeThreadExecutor createExecutor() {
return new PcodeThreadExecutor(language, arithmetic, state); return new PcodeThreadExecutor(language, arithmetic, state);
} }
@@ -203,6 +304,11 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
} }
/**
* Execute the initializer upon this thread, if applicable
*
* @see AbstractPcodeMachine#getPluggableInitializer(Language)
*/
protected void doPluggableInitialization() { protected void doPluggableInitialization() {
if (machine.initializer != null) { if (machine.initializer != null) {
machine.initializer.initializeThread(this); machine.initializer.initializeThread(this);
@@ -258,6 +364,9 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
} }
/**
* Start execution of the instruction or inject at the program counter
*/
protected void beginInstructionOrInject() { protected void beginInstructionOrInject() {
PcodeProgram inj = getInject(counter); PcodeProgram inj = getInject(counter);
if (inj != null) { if (inj != null) {
@@ -271,6 +380,9 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
} }
/**
* Resolve a finished instruction, advancing the program counter if necessary
*/
protected void advanceAfterFinished() { protected void advanceAfterFinished() {
if (instruction == null) { // Frame resulted from an inject if (instruction == null) { // Frame resulted from an inject
frame = null; frame = null;
@@ -297,12 +409,19 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
return instruction; return instruction;
} }
/**
* A sanity-checking measure: Cannot start a new instruction while one is still being executed
*/
protected void assertCompletedInstruction() { protected void assertCompletedInstruction() {
if (frame != null) { if (frame != null) {
throw new IllegalStateException("The current instruction or inject has not finished."); throw new IllegalStateException("The current instruction or inject has not finished.");
} }
} }
/**
* A sanity-checking measure: Cannot finish an instruction unless one is currently being
* executed
*/
protected void assertMidInstruction() { protected void assertMidInstruction() {
if (frame == null) { if (frame == null) {
throw new IllegalStateException("There is no current instruction to finish."); throw new IllegalStateException("There is no current instruction to finish.");
@@ -311,6 +430,10 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
/** /**
* An extension point for hooking instruction execution before the fact * An extension point for hooking instruction execution before the fact
*
* <p>
* This is currently used for incorporating state modifiers from the older {@link Emulator}
* framework. There is likely utility here when porting those to this framework.
*/ */
protected void preExecuteInstruction() { protected void preExecuteInstruction() {
// Extension point // Extension point
@@ -318,6 +441,10 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
/** /**
* An extension point for hooking instruction execution after the fact * An extension point for hooking instruction execution after the fact
*
* <p>
* This is currently used for incorporating state modifiers from the older {@link Emulator}
* framework. There is likely utility here when porting those to this framework.
*/ */
protected void postExecuteInstruction() { protected void postExecuteInstruction() {
// Extension point // Extension point
@@ -380,7 +507,7 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
} }
@Override @Override
public SleighUseropLibrary<T> getUseropLibrary() { public PcodeUseropLibrary<T> getUseropLibrary() {
return library; return library;
} }
@@ -389,6 +516,15 @@ public class DefaultPcodeThread<T> implements PcodeThread<T> {
return state; return state;
} }
/**
* Check for a p-code injection (override) at the given address
*
* <p>
* This checks this thread's particular injects and then defers to the machine's injects.
*
* @param address the address, usually the program counter
* @return the injected program, most likely {@code null}
*/
protected PcodeProgram getInject(Address address) { protected PcodeProgram getInject(Address address) {
PcodeProgram inj = injects.get(address); PcodeProgram inj = injects.get(address);
if (inj != null) { if (inj != null) {
@@ -19,9 +19,12 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.lang.RegisterValue; import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.Instruction;
/**
* A means of decoding machine instructions from the bytes contained in the machine state
*/
public interface InstructionDecoder { public interface InstructionDecoder {
/** /**
* Decode the instruction at the given address using the given context * Decode the instruction starting at the given address using the given context
* *
* <p> * <p>
* This method cannot return null. If a decode error occurs, it must throw an exception. * This method cannot return null. If a decode error occurs, it must throw an exception.
@@ -0,0 +1,143 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu;
import java.util.List;
import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.sys.EmuSyscallLibrary;
import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address;
/**
* A p-code machine which executes on concrete bytes and incorporates per-architecture state
* modifiers
*
* <p>
* This is a simple concrete bytes emulator suitable for unit testing and scripting. More complex
* use cases likely benefit by extending this or one of its super types. Likewise, the factory
* methods will likely instantiate classes which extend the default or one of its super types. When
* creating such an extension, it helps to refer to this default implementation to understand the
* overall architecture of an emulator. The emulator was designed using hierarchies of abstract
* classes each extension incorporating more complexity (and restrictions) finally culminating here.
* Every class should be extensible and have overridable factory methods so that those extensions
* can be incorporated into even more capable emulators. Furthermore, many components, e.g.,
* {@link PcodeExecutorState} were designed with composition in mind. Referring to examples, it is
* generally pretty easy to extend the emulator via composition. Search for references to
* {@link PairedPcodeExecutorState} to find such examples.
*
* <pre>
* emulator : PcodeMachine<T>
* - language : SleighLanguage
* - arithmetic : PcodeArithmetic<T>
* - sharedState : PcodeExecutorState<T>
* - library : PcodeUseropLibrary<T>
* - injects : Map<Address, PcodeProgram>
* - threads : List<PcodeThread<T>>
* - [0] : PcodeThread<T>
* - decoder : InstructionDecoder
* - executor : PcodeExecutor<T>
* - frame : PcodeFrame
* - localState : PcodeExecutorState<T>
* - library : PcodeUseropLibrary<T>
* - injects : Map<Address, PcodeProgram>
* - [1] ...
* </pre>
*
* <p>
* The root object of an emulator is the {@link PcodeEmulator}, usually ascribed the type
* {@link PcodeMachine}. At the very least, it must know the language of the processor it emulates.
* It then derives appropriate arithmetic definitions, a shared (memory) state, and a shared userop
* library. Initially, the machine has no threads. For many use cases creating a single
* {@link PcodeThread} suffices; however, this default implementation models multi-threaded
* execution "out of the box." Upon creation, each thread is assigned a local (register) state, and
* a userop library for controlling that particular thread. The thread's full state and userop
* library are composed from the machine's shared components and that thread's particular
* components. For state, the composition directs memory accesses to the machine's state and
* register accesses to the thread's state. (Accesses to the "unique" space are also directed to the
* thread's state.) This properly emulates the thread semantics of most platforms. For the userop
* library, composition is achieved simply via
* {@link PcodeUseropLibrary#compose(PcodeUseropLibrary)}. Thus, each invocation is directed to the
* library that exports the invoked userop.
*
* <p>
* Each thread creates an {@link InstructionDecoder} and a {@link PcodeExecutor}, providing the
* kernel of p-code emulation for that thread. That executor is bound to the thread's composed
* state, and to the machine's arithmetic. Together, the state and the arithmetic "define" all the
* p-code ops that the executor can invoke. Unsurprisingly, arithmetic operations are delegated to
* the {@link PcodeArithmetic}, and state operations (including memory operations and temporary
* variable access) are delegated to the {@link PcodeExecutorState}. The core execution loop easily
* follows: 1) decode the current instruction, 2) generate that instruction's p-code, 3) feed the
* code to the executor, 4) resolve the outcome and advance the program counter, then 5) repeat. So
* long as the arithmetic and state objects agree in type, a p-code machine can be readily
* implemented to manipulate values of that type. Both arithmetic and state are readily composed
* using {@link PairedPcodeArithmetic} and {@link PairedPcodeExecutorState} or
* {@link PairedPcodeExecutorStatePiece}.
*
* <p>
* This concrete emulator chooses a {@link BytesPcodeArithmetic} based on the endianness of the
* target language. Its threads are {@link BytesPcodeThread}. The shared and thread-local states are
* all {@link BytesPcodeExecutorState}. That state class can be extended to read through to some
* other backing object. For example, the memory state could read through to an imported program
* image, which allows the emulator's memory to be loaded lazily. The default userop library is
* empty. For many use cases, it will be necessary to override {@link #createUseropLibrary()} if
* only to implement the language-defined userops. If needed, simulation of the host operating
* system is typically achieved by implementing the {@code syscall} userop. The fidelity of that
* simulation depends on the use case. See {@link EmuSyscallLibrary} and its implementations to see
* what simulations are available "out of the box."
*
* <p>
* Alternatively, if the target program never invokes system calls directly, but rather via
* system-provided APIs, then it may suffice to stub out those imports. Typically, Ghidra will place
* a "thunk" at each import address with the name of the import. Stubbing an import is accomplished
* by injecting p-code at the import address. See {@link PcodeMachine#inject(Address, List)}. The
* inject will need to replicate the semantics of that call to the desired fidelity.
* <b>IMPORTANT:</b> The inject must also return control to the calling function, usually by
* replicating the conventions of the target platform.
*/
public class PcodeEmulator extends AbstractPcodeMachine<byte[]> {
/**
* Construct a new concrete emulator
*
* <p>
* Yes, it is customary to invoke this constructor directly.
*
* @param language the language of the target processor
*/
public PcodeEmulator(SleighLanguage language) {
super(language, BytesPcodeArithmetic.forLanguage(language));
}
@Override
protected BytesPcodeThread createThread(String name) {
return new BytesPcodeThread(name, this);
}
@Override
protected PcodeExecutorState<byte[]> createSharedState() {
return new BytesPcodeExecutorState(language);
}
@Override
protected PcodeExecutorState<byte[]> createLocalState(PcodeThread<byte[]> thread) {
return new BytesPcodeExecutorState(language);
}
@Override
protected PcodeUseropLibrary<byte[]> createUseropLibrary() {
return PcodeUseropLibrary.nil();
}
}
@@ -19,7 +19,7 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.DefaultPcodeThread.SleighEmulationLibrary; import ghidra.pcode.emu.DefaultPcodeThread.PcodeEmulationLibrary;
import ghidra.pcode.exec.*; import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
@@ -48,16 +48,16 @@ public interface PcodeMachine<T> {
* Get the userop library common to all threads in the machine. * Get the userop library common to all threads in the machine.
* *
* <p> * <p>
* Note that threads may have larger libraries, but each should contain all the userops in this * Note that threads may have larger libraries, but each contains all the userops in this
* library. * library.
* *
* @return the userop library * @return the userop library
*/ */
SleighUseropLibrary<T> getUseropLibrary(); PcodeUseropLibrary<T> getUseropLibrary();
/** /**
* Get a userop library which at least declares all userops available in thread userop * Get a userop library which at least declares all userops available in each thread userop
* libraries. * library.
* *
* <p> * <p>
* Thread userop libraries may have more userops than are defined in the machine's userop * Thread userop libraries may have more userops than are defined in the machine's userop
@@ -69,7 +69,7 @@ public interface PcodeMachine<T> {
* *
* @return the stub library * @return the stub library
*/ */
SleighUseropLibrary<T> getStubUseropLibrary(); PcodeUseropLibrary<T> getStubUseropLibrary();
/** /**
* Create a new thread with a default name in this machine * Create a new thread with a default name in this machine
@@ -134,7 +134,7 @@ public interface PcodeMachine<T> {
* will inject it at the given address. The resulting p-code <em>replaces</em> that which would * will inject it at the given address. The resulting p-code <em>replaces</em> that which would
* be executed by decoding the instruction at the given address. The means the machine will not * be executed by decoding the instruction at the given address. The means the machine will not
* decode, nor advance its counter, unless the SLEIGH causes it. In most cases, the SLEIGH will * decode, nor advance its counter, unless the SLEIGH causes it. In most cases, the SLEIGH will
* call {@link SleighEmulationLibrary#emu_exec_decoded()} to cause the machine to decode and * call {@link PcodeEmulationLibrary#emu_exec_decoded()} to cause the machine to decode and
* execute the overridden instruction. * execute the overridden instruction.
* *
* <p> * <p>
@@ -165,7 +165,7 @@ public interface PcodeMachine<T> {
* <p> * <p>
* Breakpoints are implemented at the p-code level using an inject, without modification to the * Breakpoints are implemented at the p-code level using an inject, without modification to the
* emulated image. As such, it cannot coexist with another inject. A client needing to break * emulated image. As such, it cannot coexist with another inject. A client needing to break
* during an inject must use {@link SleighEmulationLibrary#emu_swi()} in the injected SLEIGH. * during an inject must use {@link PcodeEmulationLibrary#emu_swi()} in the injected SLEIGH.
* *
* @param address the address at which to break * @param address the address at which to break
* @param sleighCondition a SLEIGH expression which controls the breakpoint * @param sleighCondition a SLEIGH expression which controls the breakpoint
@@ -23,7 +23,7 @@ import ghidra.util.classfinder.ExtensionPoint;
* *
* <p> * <p>
* As much as possible, it's highly-recommended to use SLEIGH execution to perform any * As much as possible, it's highly-recommended to use SLEIGH execution to perform any
* modifications. This will help it remain portable to various state types. * modifications. This will help it remain agnostic to various state types.
* *
* <p> * <p>
* TODO: Implement annotation-based {@link #isApplicable(Language)}? * TODO: Implement annotation-based {@link #isApplicable(Language)}?
@@ -39,8 +39,8 @@ public interface PcodeStateInitializer extends ExtensionPoint {
boolean isApplicable(Language language); boolean isApplicable(Language language);
/** /**
* The machine's memory state has just been initialized from a "real" target, and additional * The machine's memory state has just been initialized, and additional initialization is needed
* initialization is needed for SLEIGH execution * for SLEIGH execution
* *
* <p> * <p>
* There's probably not much preparation of memory * There's probably not much preparation of memory
@@ -52,8 +52,8 @@ public interface PcodeStateInitializer extends ExtensionPoint {
} }
/** /**
* The thread's register state has just been initialized from a "real" target, and additional * The thread's register state has just been initialized, and additional initialization is
* initialization is needed for SLEIGH execution * needed for SLEIGH execution
* *
* <p> * <p>
* Initialization generally consists of setting "virtual" registers using data from the real * Initialization generally consists of setting "virtual" registers using data from the real
@@ -18,9 +18,10 @@ package ghidra.pcode.emu;
import java.util.List; import java.util.List;
import ghidra.app.plugin.processors.sleigh.SleighLanguage; import ghidra.app.plugin.processors.sleigh.SleighLanguage;
import ghidra.pcode.emu.DefaultPcodeThread.SleighEmulationLibrary; import ghidra.pcode.emu.DefaultPcodeThread.PcodeEmulationLibrary;
import ghidra.pcode.exec.*; import ghidra.pcode.exec.*;
import ghidra.program.model.address.Address; import ghidra.program.model.address.Address;
import ghidra.program.model.lang.Register;
import ghidra.program.model.lang.RegisterValue; import ghidra.program.model.lang.RegisterValue;
import ghidra.program.model.listing.Instruction; import ghidra.program.model.listing.Instruction;
@@ -48,6 +49,7 @@ public interface PcodeThread<T> {
/** /**
* Set the emulator's counter without writing to its machine state * Set the emulator's counter without writing to its machine state
* *
* @see #overrideCounter(Address)
* @param counter the new target address * @param counter the new target address
*/ */
void setCounter(Address counter); void setCounter(Address counter);
@@ -62,23 +64,25 @@ public interface PcodeThread<T> {
/** /**
* Set the emulator's counter and write the PC of its machine state * Set the emulator's counter and write the PC of its machine state
* *
* @see #setCounter(Address)
* @param counter the new target address * @param counter the new target address
*/ */
void overrideCounter(Address counter); void overrideCounter(Address counter);
/** /**
* Adjust the emulator's parsing context without writing to its machine state * Adjust the emulator's decoding context without writing to its machine state
* *
* <p>
* As in {@link RegisterValue#assign(Register, RegisterValue)}, only those bits having a value
* in the given context are applied to the current context.
*
* @see #overrideContext(RegisterValue)
* @param context the new context * @param context the new context
*/ */
void assignContext(RegisterValue context); void assignContext(RegisterValue context);
/** /**
* Adjust the emulator's parsing context without writing to its machine state * Get the emulator's decoding context
*
* @param context the new context void assignContext(RegisterValue context);
*
* /** Get the emulator's parsing context
* *
* @return the context * @return the context
*/ */
@@ -87,6 +91,7 @@ public interface PcodeThread<T> {
/** /**
* Adjust the emulator's parsing context and write the contextreg of its machine state * Adjust the emulator's parsing context and write the contextreg of its machine state
* *
* @see #assignContext(RegisterValue)
* @param context the new context * @param context the new context
*/ */
void overrideContext(RegisterValue context); void overrideContext(RegisterValue context);
@@ -114,12 +119,23 @@ public interface PcodeThread<T> {
* *
* <p> * <p>
* Note because of the way Ghidra and Sleigh handle delay slots, the execution of an instruction * Note because of the way Ghidra and Sleigh handle delay slots, the execution of an instruction
* with delay slots cannot be separated from the following instructions filling them. It and its * with delay slots cannot be separated from the following instructions filling those slots. It
* slots are executed in a single "step." Stepping individual p-code ops which comprise the * and its slotted instructions are executed in a single "step." However, stepping the
* delay-slotted instruction is possible using {@link #stepPcodeOp(PcodeFrame)}. * individual p-code ops is still possible using {@link #stepPcodeOp(PcodeFrame)}.
*/ */
void stepInstruction(); void stepInstruction();
/**
* Repeat {@link #stepInstruction()} count times
*
* @param count the number of instructions to step
*/
default void stepInstruction(long count) {
for (long i = 0; i < count; i++) {
stepInstruction();
}
}
/** /**
* Step emulation a single p-code operation * Step emulation a single p-code operation
* *
@@ -130,19 +146,44 @@ public interface PcodeThread<T> {
* completed, the machine's program counter is advanced and the current frame is removed. * completed, the machine's program counter is advanced and the current frame is removed.
* *
* <p> * <p>
* In order to provide the most flexibility, there is no enforcement of various emulation state * Consider the case of a fall-through instruction: The first p-code step decodes the
* on this method. Expect strange behavior for strange call sequences. For example, the caller * instruction and sets up the p-code frame. The second p-code step executes the first p-code op
* should ensure that the given frame was in fact generated from the emulators current * of the frame. Each subsequent p-code step executes the next p-code op until no ops remain.
* instruction. Doing otherwise may cause the emulator to advance in strange ways. * The final p-code step detects the fall-through result, advances the counter, and disposes the
* frame. The next p-code step is actually the first p-code step of the next instruction.
*
* <p>
* Consider the case of a branching instruction: The first p-code step decodes the instruction
* and sets up the p-code frame. The second p-code step executes the first p-code op of the
* frame. Each subsequent p-code step executes the next p-code op until an (external) branch is
* executed. That branch itself sets the program counter appropriately. The final p-code step
* detects the branch result and simply disposes the frame. The next p-code step is actually the
* first p-code step of the next instruction.
*
* <p>
* The decode step in both examples is subject to p-code injections. In order to provide the
* most flexibility, there is no enforcement of various emulation state on this method. Expect
* strange behavior for strange call sequences.
* *
* <p> * <p>
* While this method heeds injects, such injects will obscure the p-code of the instruction * While this method heeds injects, such injects will obscure the p-code of the instruction
* itself. If the inject executes the instruction, the entire instruction will be executed when * itself. If the inject executes the instruction, the entire instruction will be executed when
* stepping the {@link SleighEmulationLibrary#emu_exec_decoded()} userop, since there is not * stepping the {@link PcodeEmulationLibrary#emu_exec_decoded()} userop, since there is not
* (currently) any way to "step into" a userop. * (currently) any way to "step into" a userop.
*/ */
void stepPcodeOp(); void stepPcodeOp();
/**
* Repeat {@link #stepPcodeOp()} count times
*
* @param count the number of p-code operations to step
*/
default void stepPcodeOp(long count) {
for (long i = 0; i < count; i++) {
stepPcodeOp();
}
}
/** /**
* Get the current frame, if present * Get the current frame, if present
* *
@@ -169,9 +210,9 @@ public interface PcodeThread<T> {
* Execute the next instruction, ignoring injects * Execute the next instruction, ignoring injects
* *
* <p> * <p>
* This method should likely only be used internally. It steps the current instruction, but * <b>WARNING:</b> This method should likely only be used internally. It steps the current
* without any consideration for user injects, e.g., breakpoints. Most clients should call * instruction, but without any consideration for user injects, e.g., breakpoints. Most clients
* {@link #stepInstruction()} instead. * should call {@link #stepInstruction()} instead.
* *
* @throws IllegalStateException if the emulator is still in the middle of an instruction. That * @throws IllegalStateException if the emulator is still in the middle of an instruction. That
* can happen if the machine is interrupted, or if the client has called * can happen if the machine is interrupted, or if the client has called
@@ -201,10 +242,11 @@ public interface PcodeThread<T> {
* If there is a current instruction, drop its frame of execution * If there is a current instruction, drop its frame of execution
* *
* <p> * <p>
* This does not revert any state changes caused by a partially-executed instruction. It is up * <b>WARNING:</b> This does not revert any state changes caused by a partially-executed
* to the client to revert the underlying machine state if desired. Note the thread's program * instruction. It is up to the client to revert the underlying machine state if desired. Note
* counter will not be advanced. Likely, the next call to {@link #stepInstruction()} will * the thread's program counter will not be advanced. Likely, the next call to
* re-start the same instruction. If there is no current instruction, this method has no effect. * {@link #stepInstruction()} will re-start the same instruction. If there is no current
* instruction, this method has no effect.
*/ */
void dropInstruction(); void dropInstruction();
@@ -216,7 +258,7 @@ public interface PcodeThread<T> {
* instruction is finished. By calling this method, you are "donating" the current Java thread * instruction is finished. By calling this method, you are "donating" the current Java thread
* to the emulator. This method will not likely return, but instead only terminates via * to the emulator. This method will not likely return, but instead only terminates via
* exception, e.g., hitting a user breakpoint or becoming suspended. Depending on the use case, * exception, e.g., hitting a user breakpoint or becoming suspended. Depending on the use case,
* this method might be invoked from a dedicated Java thread. * this method might be invoked from a Java thread dedicated to this emulated thread.
*/ */
void run(); void run();
@@ -226,8 +268,8 @@ public interface PcodeThread<T> {
* <p> * <p>
* When {@link #run()} is invoked by a dedicated thread, suspending the pcode thread is the most * When {@link #run()} is invoked by a dedicated thread, suspending the pcode thread is the most
* reliable way to halt execution. Note the emulator will halt mid instruction. If this is not * reliable way to halt execution. Note the emulator will halt mid instruction. If this is not
* desired, then upon catching the exception, the dedicated thread should un-suspend the machine * desired, then upon catching the exception, un-suspend the p-code thread and call
* and call {@link #finishInstruction()}. * {@link #finishInstruction()} or {@link #dropInstruction()}.
*/ */
void setSuspended(boolean suspended); void setSuspended(boolean suspended);
@@ -264,11 +306,12 @@ public interface PcodeThread<T> {
PcodeExecutor<T> getExecutor(); PcodeExecutor<T> getExecutor();
/** /**
* Get the userop library for controlling this thread's execution * Get the complete userop library for this thread, including userops for controlling this
* thread
* *
* @return the library * @return the library
*/ */
SleighUseropLibrary<T> getUseropLibrary(); PcodeUseropLibrary<T> getUseropLibrary();
/** /**
* Get the thread's memory and register state * Get the thread's memory and register state
@@ -283,6 +326,7 @@ public interface PcodeThread<T> {
/** /**
* Override the p-code at the given address with the given SLEIGH source for only this thread * Override the p-code at the given address with the given SLEIGH source for only this thread
* *
* <p>
* This works the same {@link PcodeMachine#inject(Address, List)} but on a per-thread basis. * This works the same {@link PcodeMachine#inject(Address, List)} but on a per-thread basis.
* Where there is both a machine-level and thread-level inject the thread inject takes * Where there is both a machine-level and thread-level inject the thread inject takes
* precedence. Furthermore, the machine-level inject cannot be accessed by the thread-level * precedence. Furthermore, the machine-level inject cannot be accessed by the thread-level
@@ -25,9 +25,15 @@ import ghidra.program.model.listing.Instruction;
import ghidra.util.Msg; import ghidra.util.Msg;
import ghidra.util.task.TaskMonitor; import ghidra.util.task.TaskMonitor;
/**
* The default instruction decoder, based on SLEIGH
*
* <p>
* This simply uses a {@link Disassembler} on the machine's memory state.
*/
public class SleighInstructionDecoder implements InstructionDecoder { public class SleighInstructionDecoder implements InstructionDecoder {
// TODO: Some sort of instruction decode caching? // TODO: Some sort of instruction decode caching?
// Not as imported for stepping small distances // Not as important for stepping small distances
// Could become important when dealing with "full system emulation," if we get there. // Could become important when dealing with "full system emulation," if we get there.
private static final String DEFAULT_ERROR = "Unknown disassembly error"; private static final String DEFAULT_ERROR = "Unknown disassembly error";
@@ -43,6 +49,14 @@ public class SleighInstructionDecoder implements InstructionDecoder {
private Instruction instruction; private Instruction instruction;
/**
* Construct a SLEIGH instruction decoder
*
* @see {@link DefaultPcodeThread#createInstructionDecoder(PcodeExecutorState)}
* @param language the language to decoder
* @param state the state containing the target program, probably the shared state of the p-code
* machine. It must be possible to obtain concrete buffers on this state.
*/
public SleighInstructionDecoder(Language language, PcodeExecutorState<?> state) { public SleighInstructionDecoder(Language language, PcodeExecutorState<?> state) {
this.state = state; this.state = state;
addrFactory = language.getAddressFactory(); addrFactory = language.getAddressFactory();
@@ -67,6 +81,11 @@ public class SleighInstructionDecoder implements InstructionDecoder {
return instruction; return instruction;
} }
/**
* Compute the "length" of an instruction, including any delay-slotted instructions that follow
*
* @return the length
*/
protected int computeLength() { protected int computeLength() {
int length = instruction.getLength(); int length = instruction.getLength();
int slots = instruction.getDelaySlotDepth(); int slots = instruction.getDelaySlotDepth();
@@ -20,16 +20,35 @@ import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace; import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.mem.MemBuffer; import ghidra.program.model.mem.MemBuffer;
/**
* A p-code executor state that multiplexes shared and thread-local states for use in a
* multi-threaded emulator
*
* @param <T> the type of values stored in the states
*/
public class ThreadPcodeExecutorState<T> implements PcodeExecutorState<T> { public class ThreadPcodeExecutorState<T> implements PcodeExecutorState<T> {
protected final PcodeExecutorState<T> sharedState; protected final PcodeExecutorState<T> sharedState;
protected final PcodeExecutorState<T> localState; protected final PcodeExecutorState<T> localState;
/**
* Create a multiplexed state
*
* @see {@link DefaultPcodeThread#DefaultPcodeThread(String, AbstractPcodeMachine)}
* @param sharedState the shared part of the state
* @param localState the thread-local part of the state
*/
public ThreadPcodeExecutorState(PcodeExecutorState<T> sharedState, public ThreadPcodeExecutorState(PcodeExecutorState<T> sharedState,
PcodeExecutorState<T> localState) { PcodeExecutorState<T> localState) {
this.sharedState = sharedState; this.sharedState = sharedState;
this.localState = localState; this.localState = localState;
} }
/**
* Decide whether or not access to the given space is directed to thread-local state
*
* @param space the space
* @return true for thread-local state, false for shared state
*/
protected boolean isThreadLocalSpace(AddressSpace space) { protected boolean isThreadLocalSpace(AddressSpace space) {
return space.isRegisterSpace() || space.isUniqueSpace(); return space.isRegisterSpace() || space.isUniqueSpace();
} }
@@ -71,10 +90,20 @@ public class ThreadPcodeExecutorState<T> implements PcodeExecutorState<T> {
return sharedState.getConcreteBuffer(address); return sharedState.getConcreteBuffer(address);
} }
/**
* Get the shared state
*
* @return the shared state
*/
public PcodeExecutorState<T> getSharedState() { public PcodeExecutorState<T> getSharedState() {
return sharedState; return sharedState;
} }
/**
* Get the thread-local state
*
* @return the thread-local state
*/
public PcodeExecutorState<T> getLocalState() { public PcodeExecutorState<T> getLocalState() {
return localState; return localState;
} }
@@ -0,0 +1,109 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.linux;
import java.util.*;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.unix.*;
import ghidra.pcode.emu.unix.EmuUnixFileSystem.OpenFlag;
import ghidra.program.model.listing.Program;
/**
* An abstract library of Linux system calls, suitable for use with any processor
*
* @param <T> the type of values processed by the library
*/
public abstract class AbstractEmuLinuxSyscallUseropLibrary<T>
extends AbstractEmuUnixSyscallUseropLibrary<T> {
public static final int O_MASK_RDWR = 0x3;
public static final int O_RDONLY = 0x0;
public static final int O_WRONLY = 0x1;
public static final int O_RDWR = 0x2;
public static final int O_CREAT = 0x40;
public static final int O_TRUNC = 0x200;
public static final int O_APPEND = 0x400;
/**
* TODO: A map from simulator-defined errno to Linux-defined errno
*
* <p>
* TODO: These may be applicable to all Linux, not just amd64....
*/
protected static final Map<Errno, Integer> ERRNOS = Map.ofEntries(
Map.entry(Errno.EBADF, 9));
/**
* Construct a new library
*
* @param machine the machine emulating the hardware
* @param fs the file system to export to the user-space program
* @param program a program containing the syscall definitions and conventions, likely the
* target program
*/
public AbstractEmuLinuxSyscallUseropLibrary(PcodeMachine<T> machine, EmuUnixFileSystem<T> fs,
Program program) {
super(machine, fs, program);
}
/**
* Construct a new library
*
* @param machine the machine emulating the hardware
* @param fs the file system to export to the user-space program
* @param program a program containing the syscall definitions and conventions, likely the
* target program
* @param user the "current user" to simulate
*/
public AbstractEmuLinuxSyscallUseropLibrary(PcodeMachine<T> machine, EmuUnixFileSystem<T> fs,
Program program, EmuUnixUser user) {
super(machine, fs, program, user);
}
@Override
protected Set<OpenFlag> convertFlags(int flags) {
EnumSet<OpenFlag> result = EnumSet.noneOf(OpenFlag.class);
int rdwr = flags & O_MASK_RDWR;
if (rdwr == O_RDONLY) {
result.add(OpenFlag.O_RDONLY);
}
if (rdwr == O_WRONLY) {
result.add(OpenFlag.O_WRONLY);
}
if (rdwr == O_RDWR) {
result.add(OpenFlag.O_RDWR);
}
if ((flags & O_CREAT) != 0) {
result.add(OpenFlag.O_CREAT);
}
if ((flags & O_TRUNC) != 0) {
result.add(OpenFlag.O_TRUNC);
}
if ((flags & O_APPEND) != 0) {
result.add(OpenFlag.O_APPEND);
}
return result;
}
@Override
protected int getErrno(Errno err) {
Integer errno = ERRNOS.get(err);
if (errno == null) {
throw new AssertionError("Do not know errno value for " + err);
}
return errno;
}
}
@@ -0,0 +1,104 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.linux;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import generic.jar.ResourceFile;
import ghidra.framework.Application;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.unix.EmuUnixFileSystem;
import ghidra.pcode.emu.unix.EmuUnixUser;
import ghidra.pcode.exec.PcodeExecutor;
import ghidra.pcode.exec.PcodeExecutorStatePiece;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.data.FileDataTypeManager;
import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.Program;
/**
* A system call library simulating Linux for amd64 / x86_64
*
* @param <T> the type of values processed by the library
*/
public class EmuLinuxAmd64SyscallUseropLibrary<T> extends AbstractEmuLinuxSyscallUseropLibrary<T> {
protected final Register regRAX;
protected FileDataTypeManager clib64;
/**
* Construct the system call library for Linux-amd64
*
* @param machine the machine emulating the hardware
* @param fs the file system to export to the user-space program
* @param program a program containing syscall definitions and conventions, likely the target
* program
*/
public EmuLinuxAmd64SyscallUseropLibrary(PcodeMachine<T> machine, EmuUnixFileSystem<T> fs,
Program program) {
super(machine, fs, program);
regRAX = machine.getLanguage().getRegister("RAX");
}
/**
* Construct the system call library for Linux-amd64
*
* @param machine the machine emulating the hardware
* @param fs the file system to export to the user-space program
* @param program a program containing syscall definitions and conventions, likely the target
* program
* @param user the "current user" to simulate
*/
public EmuLinuxAmd64SyscallUseropLibrary(PcodeMachine<T> machine, EmuUnixFileSystem<T> fs,
Program program, EmuUnixUser user) {
super(machine, fs, program, user);
regRAX = machine.getLanguage().getRegister("RAX");
}
@Override
protected Collection<DataTypeManager> getAdditionalArchives() {
try {
ResourceFile file =
Application.findDataFileInAnyModule("typeinfo/generic/generic_clib_64.gdt");
clib64 = FileDataTypeManager.openFileArchive(file, false);
return List.of(clib64);
}
catch (IOException e) {
throw new AssertionError(e);
}
}
@Override
protected void disposeAdditionalArchives() {
clib64.close();
}
@Override
public long readSyscallNumber(PcodeExecutorStatePiece<T, T> state) {
return machine.getArithmetic().toConcrete(state.getVar(regRAX)).longValue();
}
@Override
protected boolean returnErrno(PcodeExecutor<T> executor, int errno) {
executor.getState()
.setVar(regRAX,
executor.getArithmetic().fromConst(-errno, regRAX.getMinimumByteSize()));
return true;
}
}
@@ -0,0 +1,127 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.linux;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import generic.jar.ResourceFile;
import ghidra.framework.Application;
import ghidra.pcode.emu.DefaultPcodeThread;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.emu.unix.EmuUnixFileSystem;
import ghidra.pcode.emu.unix.EmuUnixUser;
import ghidra.pcode.exec.*;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.data.FileDataTypeManager;
import ghidra.program.model.lang.Register;
import ghidra.program.model.listing.Program;
/**
* A system call library simulating Linux for x86 (32-bit)
*
* @param <T> the type of values processed by the library
*/
public class EmuLinuxX86SyscallUseropLibrary<T> extends AbstractEmuLinuxSyscallUseropLibrary<T> {
protected final Register regEIP;
protected final Register regEAX;
protected FileDataTypeManager clib32;
/**
* Construct the system call library for Linux-x86
*
* @param machine the machine emulating the hardware
* @param fs the file system to export to the user-space program
* @param program a program containing syscall definitions and conventions, likely the target
* program
*/
public EmuLinuxX86SyscallUseropLibrary(PcodeMachine<T> machine, EmuUnixFileSystem<T> fs,
Program program) {
this(machine, fs, program, EmuUnixUser.DEFAULT_USER);
}
/**
* Construct the system call library for Linux-x86
*
* @param machine the machine emulating the hardware
* @param fs the file system to export to the user-space program
* @param program a program containing syscall definitions and conventions, likely the target
* program
* @param user the "current user" to simulate
*/
public EmuLinuxX86SyscallUseropLibrary(PcodeMachine<T> machine, EmuUnixFileSystem<T> fs,
Program program, EmuUnixUser user) {
super(machine, fs, program, user);
regEIP = machine.getLanguage().getRegister("EIP");
regEAX = machine.getLanguage().getRegister("EAX");
}
@Override
protected Collection<DataTypeManager> getAdditionalArchives() {
try {
ResourceFile file =
Application.findDataFileInAnyModule("typeinfo/generic/generic_clib.gdt");
clib32 = FileDataTypeManager.openFileArchive(file, false);
return List.of(clib32);
}
catch (IOException e) {
throw new AssertionError(e);
}
}
@Override
protected void disposeAdditionalArchives() {
clib32.close();
}
@Override
public long readSyscallNumber(PcodeExecutorStatePiece<T, T> state) {
return machine.getArithmetic().toConcrete(state.getVar(regEAX)).longValue();
}
@Override
protected boolean returnErrno(PcodeExecutor<T> executor, int errno) {
executor.getState()
.setVar(regEAX,
executor.getArithmetic().fromConst(-errno, regEAX.getMinimumByteSize()));
return true;
}
@PcodeUserop
public T swi(@OpExecutor PcodeExecutor<T> executor, @OpLibrary PcodeUseropLibrary<T> library,
T number) {
PcodeArithmetic<T> arithmetic = executor.getArithmetic();
long intNo = arithmetic.toConcrete(number).longValue();
if (intNo == 0x80) {
// A CALLIND follows to the return of swi().... OK.
// We'll just make that "fall through" instead
T next = executor.getState().getVar(regEIP);
DefaultPcodeThread<T>.PcodeThreadExecutor te =
(DefaultPcodeThread<T>.PcodeThreadExecutor) executor;
int pcSize = regEIP.getNumBytes();
int iLen = te.getInstruction().getLength();
next = arithmetic.binaryOp(PcodeArithmetic.INT_ADD, pcSize, pcSize, next, pcSize,
arithmetic.fromConst(iLen, pcSize));
syscall(executor, library);
return next;
}
else {
throw new PcodeExecutionException("Unknown interrupt: 0x" + Long.toString(intNo, 16));
}
}
}
@@ -0,0 +1,180 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
import java.lang.annotation.*;
import java.lang.reflect.Method;
import java.util.*;
import org.apache.commons.collections4.BidiMap;
import org.apache.commons.collections4.bidimap.DualHashBidiMap;
import ghidra.pcode.emu.PcodeMachine;
import ghidra.pcode.exec.AnnotatedPcodeUseropLibrary;
import ghidra.pcode.struct.StructuredSleigh;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DataTypeManager;
import ghidra.program.model.lang.CompilerSpec;
import ghidra.program.model.lang.PrototypeModel;
import ghidra.program.model.listing.Program;
import ghidra.util.Msg;
import utilities.util.AnnotationUtilities;
/**
* A syscall library wherein Java methods are exported via a special annotated
*
* <p>
* This library is both a system call and a sleigh userop library. To export a system call, it must
* also be exported as a sleigh userop. This is more conventional, as the system call dispatcher
* does not require it, however, this library uses a wrapping technique that does require it. In
* general, exporting system calls as userops will make developers and users lives easier. To avoid
* naming collisions, system calls can be exported with customized names.
*
* @param <T> the type of data processed by the library, typically {@code byte[]}
*/
public abstract class AnnotatedEmuSyscallUseropLibrary<T> extends AnnotatedPcodeUseropLibrary<T>
implements EmuSyscallLibrary<T> {
public static final String SYSCALL_SPACE_NAME = "syscall";
protected static final Map<Class<?>, Set<Method>> CACHE_BY_CLASS = new HashMap<>();
private static Set<Method> collectSyscalls(Class<?> cls) {
return AnnotationUtilities.collectAnnotatedMethods(EmuSyscall.class, cls);
}
/**
* An annotation to export a method as a system call in the library.
*
* <p>
* The method must also be exported in the userop library, likely via {@link PcodeUserop}.
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
public @interface EmuSyscall {
String value();
}
private final SyscallPcodeUseropDefinition<T> syscallUserop =
new SyscallPcodeUseropDefinition<>(this);
protected final PcodeMachine<T> machine;
protected final CompilerSpec cSpec;
protected final Program program;
protected final DataType dtMachineWord;
protected final Map<Long, EmuSyscallDefinition<T>> syscallMap = new HashMap<>();
protected final Collection<DataTypeManager> additionalArchives;
/**
* Construct a new library including the "syscall" userop
*
* @param machine the machine using this library
* @param program a program from which to derive syscall configuration, conventions, etc.
*/
public AnnotatedEmuSyscallUseropLibrary(PcodeMachine<T> machine, Program program) {
this.machine = machine;
this.program = program;
this.cSpec = program.getCompilerSpec();
// TODO: Take signatures / types from database
this.dtMachineWord = UseropEmuSyscallDefinition.requirePointerDataType(program);
mapAndBindSyscalls();
additionalArchives = getAdditionalArchives();
StructuredSleigh structured = newStructuredPart();
structured.generate(ops);
disposeAdditionalArchives();
mapAndBindSyscalls(structured.getClass());
}
protected Collection<DataTypeManager> getAdditionalArchives() {
return List.of();
}
protected void disposeAdditionalArchives() {
}
/**
* Create the structured-sleigh part of this library
*
* @return the structured part
*/
protected StructuredPart newStructuredPart() {
return new StructuredPart();
}
/**
* Export a userop as a system call
*
* @param opdef the userop
* @return the syscall definition
*/
public UseropEmuSyscallDefinition<T> newBoundSyscall(PcodeUseropDefinition<T> opdef,
PrototypeModel convention) {
return new UseropEmuSyscallDefinition<>(opdef, program, convention, dtMachineWord);
}
protected void mapAndBindSyscalls(Class<?> cls) {
BidiMap<Long, String> mapNames =
new DualHashBidiMap<>(EmuSyscallLibrary.loadSyscallNumberMap(program));
Map<Long, PrototypeModel> mapConventions =
EmuSyscallLibrary.loadSyscallConventionMap(program);
Set<Method> methods = collectSyscalls(cls);
for (Method m : methods) {
String name = m.getAnnotation(EmuSyscall.class).value();
Long number = mapNames.getKey(name);
if (number == null) {
Msg.warn(cls, "Syscall " + name + " has no number");
continue;
}
PcodeUseropDefinition<T> opdef = getUserops().get(m.getName());
if (opdef == null) {
throw new IllegalArgumentException("Method " + m.getName() +
" annotated with @" + EmuSyscall.class.getSimpleName() +
" must also be a p-code userop");
}
PrototypeModel convention = mapConventions.get(number);
EmuSyscallDefinition<T> existed =
syscallMap.put(number, newBoundSyscall(opdef, convention));
if (existed != null) {
throw new IllegalArgumentException("Duplicate @" +
EmuSyscall.class.getSimpleName() + " annotated methods with name " + name);
}
}
}
protected void mapAndBindSyscalls() {
mapAndBindSyscalls(this.getClass());
}
@Override
public PcodeUseropDefinition<T> getSyscallUserop() {
return syscallUserop;
}
@Override
public Map<Long, EmuSyscallDefinition<T>> getSyscalls() {
return syscallMap;
}
protected class StructuredPart extends StructuredSleigh {
protected StructuredPart() {
super(program);
addDataTypeSources(additionalArchives);
}
}
}
@@ -0,0 +1,37 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
import java.io.IOException;
/**
* The simulated system interrupted with an I/O error
*
* <p>
* This exception is for I/O errors within the simulated system. If the host implementation causes a
* real {@link IOException}, it should <em>not</em> be wrapped in this exception unless, e.g., a
* simulated file system intends to proxy the real file system.
*/
public class EmuIOException extends EmuInvalidSystemCallException {
public EmuIOException(String message, Throwable cause) {
super(message, cause);
}
public EmuIOException(String message) {
super(message);
}
}
@@ -0,0 +1,39 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
/**
* The emulated program invoked a system call incorrectly
*/
public class EmuInvalidSystemCallException extends EmuSystemException {
/**
* The system call number was not valid
*
* @param number the system call number
*/
public EmuInvalidSystemCallException(long number) {
this("Invalid system call number: " + number);
}
public EmuInvalidSystemCallException(String message) {
super(message);
}
public EmuInvalidSystemCallException(String message, Throwable cause) {
super(message, null, cause);
}
}
@@ -0,0 +1,63 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
import java.math.BigInteger;
import ghidra.pcode.exec.PcodeArithmetic;
/**
* A simulated process (or thread group) has exited
*/
public class EmuProcessExitedException extends EmuSystemException {
public static <T> String tryConcereteToString(PcodeArithmetic<T> arithmetic, T status) {
try {
BigInteger value = arithmetic.toConcrete(status);
return value.toString();
}
catch (Exception e) {
return status.toString();
}
}
private final Object status;
/**
* Construct a process-exited exception with the given status code
*
* <p>
* This will attempt to concretize the status according to the given arithmetic, for display
* purposes. The original status remains accessible via {@link #getStatus()}
*
* @param <T> the type values processed by the library
* @param arithmetic the machine's arithmetic
* @param status
*/
public <T> EmuProcessExitedException(PcodeArithmetic<T> arithmetic, T status) {
super("Process exited with status " + tryConcereteToString(arithmetic, status));
this.status = status;
}
/**
* Get the status code as a {@code T} of the throwing machine
*
* @return the status
*/
public Object getStatus() {
return status;
}
}
@@ -0,0 +1,268 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import generic.jar.ResourceFile;
import ghidra.framework.Application;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.AnnotatedPcodeUseropLibrary.*;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.lang.PrototypeModel;
import ghidra.program.model.listing.Function;
import ghidra.program.model.listing.Program;
import ghidra.program.model.pcode.Varnode;
import ghidra.program.model.symbol.*;
/**
* A library of system calls
*
* <p>
* A system call library is a collection of p-code executable routines, invoked by a system call
* dispatcher. That dispatcher is represented by {@link #syscall(PcodeExecutor)}, and is exported as
* a sleigh userop. If this interface is "mixed in" with {@link AnnotatedPcodeUseropLibrary}, that
* userop is automatically included in the userop library. The simplest means of implementing a
* syscall library is probably via {@link AnnotatedEmuSyscallUseropLibrary}. It implements this
* interface and extends {@link AnnotatedPcodeUseropLibrary}. In addition, it provides its own
* annotation system for exporting Java methods as system calls.
*
* @param <T> the type of data processed by the system calls, typically {@code byte[]}
*/
public interface EmuSyscallLibrary<T> {
String SYSCALL_SPACE_NAME = "syscall";
String SYSCALL_CONVENTION_NAME = "syscall";
/**
* Derive a syscall number to name map from the specification in a given file.
*
* @param dataFileName the file name to be found in a modules data directory
* @return the map
* @throws IOException if the file could not be read
*/
public static Map<Long, String> loadSyscallNumberMap(String dataFileName) throws IOException {
ResourceFile mapFile = Application.findDataFileInAnyModule(dataFileName);
if (mapFile == null) {
throw new FileNotFoundException("Cannot find syscall number map: " + dataFileName);
}
Map<Long, String> result = new HashMap<>();
final BufferedReader reader =
new BufferedReader(new InputStreamReader(mapFile.getInputStream()));
String line;
while (null != (line = reader.readLine())) {
line = line.strip();
if (line.startsWith("#")) {
continue;
}
String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new IOException(
"Badly formatted syscall number map: " + dataFileName + ". Line: " + line);
}
try {
result.put(Long.parseLong(parts[0]), parts[1]);
}
catch (NumberFormatException e) {
throw new IOException("Badly formatted syscall number map: " + dataFileName, e);
}
}
return result;
}
/**
* Scrape functions from the given program's "syscall" space.
*
* @param program the program
* @return a map of syscall number to function
*/
public static Map<Long, Function> loadSyscallFunctionMap(Program program) {
AddressSpace space = program.getAddressFactory().getAddressSpace(SYSCALL_SPACE_NAME);
if (space == null) {
throw new IllegalStateException(
"No syscall address space in program. Please analyze the syscalls first.");
}
Map<Long, Function> result = new HashMap<>();
SymbolIterator sit =
program.getSymbolTable().getSymbolIterator(space.getMinAddress(), true);
while (sit.hasNext()) {
Symbol s = sit.next();
if (s.getAddress().getAddressSpace() != space) {
break;
}
if (s.getSymbolType() != SymbolType.FUNCTION) {
continue;
}
result.put(s.getAddress().getOffset(), (Function) s.getObject());
}
return result;
}
/**
* Derive a syscall number to name map by scraping functions in the program's "syscall" space.
*
* @param program the program, likely analyzed for system calls already
* @return the map
*/
public static Map<Long, String> loadSyscallNumberMap(Program program) {
return loadSyscallFunctionMap(program).entrySet()
.stream()
.collect(Collectors.toMap(Entry::getKey, e -> e.getValue().getName()));
}
/**
* Derive a syscall number to calling convention map by scraping functions in the program's
* "syscall" space.
*
* @param program
* @return
*/
public static Map<Long, PrototypeModel> loadSyscallConventionMap(Program program) {
return loadSyscallFunctionMap(program).entrySet()
.stream()
.collect(Collectors.toMap(Entry::getKey, e -> e.getValue().getCallingConvention()));
}
/**
* The {@link EmuSyscallLibrary#syscall(PcodeExecutor)} method wrapped as a userop definition
*
* @param <T> the type of data processed by the userop, typically {@code byte[]}
*/
final class SyscallPcodeUseropDefinition<T> implements PcodeUseropDefinition<T> {
private final EmuSyscallLibrary<T> syslib;
public SyscallPcodeUseropDefinition(EmuSyscallLibrary<T> syslib) {
this.syslib = syslib;
}
@Override
public String getName() {
return "syscall";
}
@Override
public int getInputCount() {
return 0;
}
@Override
public void execute(PcodeExecutor<T> executor, PcodeUseropLibrary<T> library,
Varnode outVar, List<Varnode> inVars) {
syslib.syscall(executor, library);
}
}
/**
* The definition of a system call
*
* @param <T> the type of data processed by the system call, typically {@code byte[]}.
*/
interface EmuSyscallDefinition<T> {
/**
* Invoke the system call
*
* @param executor the executor for the system/thread invoking the call
* @param library the complete sleigh userop library for the system
*/
void invoke(PcodeExecutor<T> executor, PcodeUseropLibrary<T> library);
}
/**
* In case this is not an {@link AnnotatedEmuSyscallUseropLibrary} or
* {@link AnnotatedPcodeUseropLibrary}, get the definition of the "syscall" userop for inclusion
* in the {@link PcodeUseropLibrary}.
*
* <p>
* Implementors may wish to override this to use a pre-constructed definition. That definition
* can be easily constructed using {@link SyscallPcodeUseropDefinition}.
*
* @return the syscall userop definition
*/
default PcodeUseropDefinition<T> getSyscallUserop() {
return new SyscallPcodeUseropDefinition<>(this);
};
/**
* Retrieve the desired system call number according to the emulated system's conventions
*
* <p>
* TODO: This should go away in favor of some specification stored in the emulated program
* database. Until then, we require system-specific implementations.
*
* @param state the executor's state
* @return the system call number
*/
long readSyscallNumber(PcodeExecutorStatePiece<T, T> state);
/**
* Try to handle an error, usually by returning it to the user program
*
* <p>
* If the particular error was not expected, it is best practice to return false, causing the
* emulator to interrupt. Otherwise, some state is set in the machine that, by convention,
* communicates the error back to the user program.
*
* @param executor the executor for the thread that caused the error
* @param err the error
* @return true if execution can continue uninterrupted
*/
boolean handleError(PcodeExecutor<T> executor, PcodeExecutionException err);
/**
* The entry point for executing a system call on the given executor
*
* <p>
* The executor's state must already be prepared according to the relevant system calling
* conventions. This will determine the system call number, according to
* {@link #readSyscallNumber(PcodeExecutorStatePiece)}, retrieve the relevant system call
* definition, and invoke it.
*
* @param executor the executor
* @param library the library
*/
@PcodeUserop
default void syscall(@OpExecutor PcodeExecutor<T> executor,
@OpLibrary PcodeUseropLibrary<T> library) {
long syscallNumber = readSyscallNumber(executor.getState());
EmuSyscallDefinition<T> syscall = getSyscalls().get(syscallNumber);
if (syscall == null) {
throw new EmuInvalidSystemCallException(syscallNumber);
}
try {
syscall.invoke(executor, library);
}
catch (PcodeExecutionException e) {
if (!handleError(executor, e)) {
throw e;
}
}
}
/**
* Get the map of syscalls by number
*
* <p>
* Note this method will be invoked for every emulated syscall, so it should be a simple
* accessor. Any computations needed to create the map should be done ahead of time.
*
* @return the system call map
*/
Map<Long, EmuSyscallDefinition<T>> getSyscalls();
}
@@ -0,0 +1,36 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
import ghidra.pcode.exec.PcodeExecutionException;
import ghidra.pcode.exec.PcodeFrame;
/**
* A p-code execution exception related to system simulation
*/
public class EmuSystemException extends PcodeExecutionException {
public EmuSystemException(String message) {
super(message);
}
public EmuSystemException(String message, PcodeFrame frame) {
super(message, frame);
}
public EmuSystemException(String message, PcodeFrame frame, Throwable cause) {
super(message, frame, cause);
}
}
@@ -0,0 +1,121 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.sys;
import java.util.Arrays;
import java.util.List;
import ghidra.lifecycle.Unfinished;
import ghidra.pcode.emu.sys.EmuSyscallLibrary.EmuSyscallDefinition;
import ghidra.pcode.exec.*;
import ghidra.pcode.exec.PcodeUseropLibrary.PcodeUseropDefinition;
import ghidra.program.model.data.DataType;
import ghidra.program.model.lang.PrototypeModel;
import ghidra.program.model.listing.Program;
import ghidra.program.model.listing.VariableStorage;
import ghidra.program.model.pcode.Varnode;
/**
* A system call that is defined by delegating to a p-code userop
*
* <p>
* This is essentially a wrapper of the p-code userop. Knowing the number of inputs to the userop
* and by applying the calling conventions of the platform, the wrapper aliases each parameter's
* storage to its respective parameter of the userop. The userop's output is also aliased to the
* system call's return storage, again as defined by the platform's conventions.
*
* @see AnnotatedEmuSyscallUseropLibrary
* @param <T> the type of values processed by the library
*/
public class UseropEmuSyscallDefinition<T> implements EmuSyscallDefinition<T> {
/**
* Obtain the program's "pointer" data type, throwing an exception if absent
*
* @param program the program
* @return the "pointer" data type
*/
protected static DataType requirePointerDataType(Program program) {
DataType dtPointer = program.getDataTypeManager().getDataType("/pointer");
if (dtPointer == null) {
throw new IllegalArgumentException("No 'pointer' data type in " + program);
}
return dtPointer;
}
protected final PcodeUseropDefinition<T> opdef;
protected final List<Varnode> inVars;
protected final Varnode outVar;
/**
* Construct a syscall definition
*
* @see AnnotatedEmuSyscallUseropLibrary
* @param opdef the wrapped userop definition
* @param program the program, used for storage computation
* @param convention the "syscall" calling convention
* @param dtMachineWord the "pointer" data type
*/
public UseropEmuSyscallDefinition(PcodeUseropDefinition<T> opdef, Program program,
PrototypeModel convention, DataType dtMachineWord) {
this.opdef = opdef;
// getStorageLocations needs return(1) + parameters(n)
int inputCount = opdef.getInputCount();
if (inputCount < 0) {
throw new IllegalArgumentException("Variadic sleigh userop " + opdef.getName() +
" cannot be used as a syscall");
}
DataType[] locs = new DataType[inputCount + 1];
for (int i = 0; i < locs.length; i++) {
locs[i] = dtMachineWord;
}
VariableStorage[] vss = convention.getStorageLocations(program, locs, false);
outVar = getSingleVnStorage(vss[0]);
inVars = Arrays.asList(new Varnode[inputCount]);
for (int i = 0; i < inputCount; i++) {
inVars.set(i, getSingleVnStorage(vss[i + 1]));
}
}
/**
* Assert variable storage is a single varnode, and get that varnode
*
* @param vs the storage
* @return the single varnode
*/
protected Varnode getSingleVnStorage(VariableStorage vs) {
Varnode[] vns = vs.getVarnodes();
if (vns.length != 1) {
Unfinished.TODO();
}
return vns[0];
}
@Override
public void invoke(PcodeExecutor<T> executor, PcodeUseropLibrary<T> library) {
try {
opdef.execute(executor, library, outVar, inVars);
}
catch (PcodeExecutionException e) {
throw e;
}
catch (Throwable e) {
throw new EmuSystemException("Error during syscall", null, e);
}
}
}
@@ -0,0 +1,65 @@
/* ###
* IP: GHIDRA
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ghidra.pcode.emu.unix;
/**
* An abstract file contained in an emulated file system
*
* <p>
* Contrast this with {@link DefaultEmuUnixFileHandle}, which is a particular process's handle when
* opening the file, not the file itself.
*
* @param <T> the type of values stored in the file
*/
public abstract class AbstractEmuUnixFile<T> implements EmuUnixFile<T> {
protected final String pathname;
protected final EmuUnixFileStat stat = createStat();
/**
* Construct a new file
*
* <p>
* TODO: Technically, a file can be hardlinked to several pathnames, but for simplicity, or for
* diagnostics, we let the file know its own original name.
*
* @see AbstractEmuUnixFileSystem#newFile(String)
* @param pathname the pathname of the file
* @param mode the mode of the file
*/
public AbstractEmuUnixFile(String pathname, int mode) {
this.pathname = pathname;
stat.st_mode = mode;
}
/**
* A factory method for the file's {@code stat} structure.
*
* @return the stat structure.
*/
protected EmuUnixFileStat createStat() {
return new EmuUnixFileStat();
}
@Override
public String getPathname() {
return pathname;
}
@Override
public EmuUnixFileStat getStat() {
return stat;
}
}

Some files were not shown because too many files have changed in this diff Show More