diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutHeader.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutHeader.java new file mode 100644 index 0000000000..87f22f2323 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutHeader.java @@ -0,0 +1,574 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.ByteProvider; +import ghidra.app.util.bin.StructConverter; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.CategoryPath; +import ghidra.program.model.data.DataType; +import ghidra.program.model.data.Structure; +import ghidra.program.model.data.StructureDataType; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.Program; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; + +public class UnixAoutHeader implements StructConverter { + + public enum AoutType { + OMAGIC, NMAGIC, ZMAGIC, QMAGIC, CMAGIC, UNKNOWN + } + + private BinaryReader reader; + + private long binarySize; + private AoutType exeType; + private boolean machineTypeValid; + private String languageSpec; + private String compilerSpec = "default"; + private long pageSize; + + private boolean isNetBSD = false; + private boolean isSparc = false; + + private long a_magic; + private long a_text; + private long a_data; + private long a_bss; + private long a_syms; + private long a_entry; + private long a_trsize; + private long a_drsize; + + private long strSize; + + private long txtOffset; + private long datOffset; + private long txtRelOffset; + private long datRelOffset; + private long symOffset; + private long strOffset; + + private long txtAddr; + private long txtEndAddr; + private long datAddr; + private long bssAddr; + + // The Linux implementation of a.out appears to start the .text content at + // file offset 0x400 (rather than immediately after the 0x20 bytes of header + // data). It's possible that there exist Linux a.out executabes with other + // (unintended?) header sizes caused by a mixture of 32- and 64-bit integers + // being padded out in the struct. The intended size is eight 32-bit words + // (32 bytes total.) + private static final int sizeOfExecHeader = 0x20; + private static final int sizeOfLongExecHeader = 0x400; + + /** + * Interprets binary data as an exec header from a UNIX-style a.out executable, + * and validates the contained fields. + * + * @param provider Source of header binary data + * @param isLittleEndian Flag indicating whether to interpret the data as + * little-endian. + * @throws IOException + */ + public UnixAoutHeader(ByteProvider provider, boolean isLittleEndian) throws IOException { + this.reader = new BinaryReader(provider, isLittleEndian); + + this.a_magic = reader.readNextUnsignedInt(); + this.a_text = reader.readNextUnsignedInt(); + this.a_data = reader.readNextUnsignedInt(); + this.a_bss = reader.readNextUnsignedInt(); + this.a_syms = reader.readNextUnsignedInt(); + this.a_entry = reader.readNextUnsignedInt(); + this.a_trsize = reader.readNextUnsignedInt(); + this.a_drsize = reader.readNextUnsignedInt(); + this.binarySize = reader.length(); + + checkExecutableType(); + + // NOTE: In NetBSD/i386 examples of a.out, the "new-style" 32-bit a_magic/midmag + // word + // is written in big-endian regardless of the data endianness in the rest of the + // file. + if ((this.exeType == AoutType.UNKNOWN) && isLittleEndian) { + this.a_magic = Integer.reverseBytes((int) this.a_magic); + checkExecutableType(); + } + + checkMachineTypeValidity(isLittleEndian); + determineTextOffset(reader, isLittleEndian); + + this.datOffset = this.txtOffset + this.a_text; + this.txtRelOffset = this.datOffset + this.a_data; + this.datRelOffset = this.txtRelOffset + this.a_trsize; + this.symOffset = this.datRelOffset + this.a_drsize; + this.strOffset = this.symOffset + this.a_syms; + + this.strSize = 0; + if (this.strOffset != 0 && (this.strOffset + 4) <= binarySize) { + this.strSize = reader.readUnsignedInt(this.strOffset); + } + + determineTextAddr(); + this.txtEndAddr = this.txtAddr + this.a_text; + this.datAddr = (this.exeType == AoutType.OMAGIC) ? this.txtEndAddr : segmentRound(this.txtEndAddr); + this.bssAddr = this.datAddr + this.a_data; + } + + public BinaryReader getReader() { + return this.reader; + } + + /** + * Returns the processor/language specified by this header. + */ + public String getLanguageSpec() { + return this.languageSpec; + } + + /** + * Returns the compiler used by this executable. This is left as 'default' for + * all machine types other than i386, where it is assumed to be gcc. + */ + public String getCompilerSpec() { + return this.compilerSpec; + } + + /** + * Returns the enumerated type of executable contained in this A.out file. + */ + public AoutType getExecutableType() { + return this.exeType; + } + + /** + * Returns an indication of whether this header's fields are all valid; this + * includes the machine type, executable type, and section offsets. + */ + public boolean isValid() { + return isMachineTypeValid() && + (this.exeType != AoutType.UNKNOWN) && + areOffsetsValid(); + } + + public long getTextSize() { + return this.a_text; + } + + public long getDataSize() { + return this.a_data; + } + + public long getBssSize() { + return this.a_bss; + } + + public long getSymSize() { + return this.a_syms; + } + + public long getStrSize() { + return this.strSize; + } + + public long getEntryPoint() { + return this.a_entry; + } + + public long getTextRelocSize() { + return this.a_trsize; + } + + public long getDataRelocSize() { + return this.a_drsize; + } + + public long getTextOffset() { + return this.txtOffset; + } + + public long getDataOffset() { + return this.datOffset; + } + + public long getTextRelocOffset() { + return this.txtRelOffset; + } + + public long getDataRelocOffset() { + return this.datRelOffset; + } + + public long getSymOffset() { + return this.symOffset; + } + + public long getStrOffset() { + return this.strOffset; + } + + public long getTextAddr() { + return this.txtAddr; + } + + public long getDataAddr() { + return this.datAddr; + } + + public long getBssAddr() { + return this.bssAddr; + } + + /** + * Checks the magic word in the header for a known machine type ID, and sets the + * languageSpec string accordingly. + */ + private void checkMachineTypeValidity(boolean readingAsLittleEndian) { + + this.machineTypeValid = true; + this.pageSize = 4096; + final short machtype = (short) ((this.a_magic >> 16) & 0xFF); + final String readEndianness = readingAsLittleEndian ? "LE" : "BE"; + + switch (machtype) { + /** + * Motorola 68K family + */ + case UnixAoutMachineType.M_68010: + this.languageSpec = "68000:BE:32:MC68010"; + break; + case UnixAoutMachineType.M_68020: + this.languageSpec = "68000:BE:32:MC68020"; + break; + case UnixAoutMachineType.M_M68K_NETBSD: + this.pageSize = 8192; + case UnixAoutMachineType.M_M68K4K_NETBSD: + this.isNetBSD = true; + this.languageSpec = "68000:BE:32:default"; + break; + + /** + * SPARC family + */ + case UnixAoutMachineType.M_SPARC_NETBSD: + this.isNetBSD = true; + case UnixAoutMachineType.M_SPARC: + case UnixAoutMachineType.M_SPARCLET: + this.isSparc = true; + this.pageSize = 8192; + this.languageSpec = "sparc:BE:32:default"; + break; + case UnixAoutMachineType.M_SPARC64_NETBSD: + this.isNetBSD = true; + this.isSparc = true; + this.languageSpec = "sparc:BE:64:default"; + break; + + /** + * MIPS family + */ + case UnixAoutMachineType.M_PMAX_NETBSD: + this.isNetBSD = true; + case UnixAoutMachineType.M_MIPS1: + case UnixAoutMachineType.M_MIPS2: + case UnixAoutMachineType.M_R3000: + this.languageSpec = "MIPS:LE:32:default"; + break; + case UnixAoutMachineType.M_MIPS: + this.languageSpec = "MIPS:BE:32:default"; + break; + + /** + * National Semiconductor NS32000 family + */ + case UnixAoutMachineType.M_532_NETBSD: + this.isNetBSD = true; + case UnixAoutMachineType.M_NS32032: + case UnixAoutMachineType.M_NS32532: + this.languageSpec = "UNKNOWN:LE:32:default"; + break; + + /** + * x86 family + */ + case UnixAoutMachineType.M_386_NETBSD: + this.isNetBSD = true; + case UnixAoutMachineType.M_386: + case UnixAoutMachineType.M_386_DYNIX: + this.compilerSpec = "gcc"; + this.languageSpec = "x86:LE:32:default"; + break; + case UnixAoutMachineType.M_X86_64_NETBSD: + this.compilerSpec = "gcc"; + this.languageSpec = "x86:LE:64:default"; + break; + + /** + * ARM family + */ + case UnixAoutMachineType.M_ARM6_NETBSD: + this.isNetBSD = true; + case UnixAoutMachineType.M_ARM: + this.languageSpec = "ARM:" + readEndianness + ":32:default"; + break; + case UnixAoutMachineType.M_AARCH64: + this.languageSpec = "AARCH64:" + readEndianness + ":64:default"; + break; + + /** + * RISC family + */ + case UnixAoutMachineType.M_OR1K: + this.languageSpec = "UNKNOWN:BE:32:default"; + break; + case UnixAoutMachineType.M_RISCV: + this.languageSpec = "RISCV:LE:32:default"; + break; + case UnixAoutMachineType.M_HPPA_OPENBSD: + this.languageSpec = "pa-risc:BE:32:default"; + break; + + /** + * PowerPC family + */ + case UnixAoutMachineType.M_POWERPC_NETBSD: + this.isNetBSD = true; + this.languageSpec = "PowerPC:" + readEndianness + ":32:default"; + break; + case UnixAoutMachineType.M_POWERPC64: + this.languageSpec = "PowerPC:" + readEndianness + ":64:default"; + break; + + /** + * SuperH family + * NOTE: It's unclear if there is support for SuperH SH-3 or SH-5 cores; + * the primary SuperH language seems to support SH-1 and SH-2 variants + * and the alternative is the SuperH4 language. + */ + case UnixAoutMachineType.M_SH3: + case UnixAoutMachineType.M_SH5_32: + this.languageSpec = "SuperH:BE:32:default"; + break; + case UnixAoutMachineType.M_SH5_64: + this.languageSpec = "SuperH:BE:64:default"; + break; + + /** + * VAX family + */ + case UnixAoutMachineType.M_VAX_NETBSD: + this.pageSize = 512; + case UnixAoutMachineType.M_VAX4K_NETBSD: + this.isNetBSD = true; + this.languageSpec = "UNKNOWN:LE:32:default"; + break; + + /** + * Other + */ + case UnixAoutMachineType.M_CRIS: + this.languageSpec = "UNKNOWN:LE:32:default"; + break; + case UnixAoutMachineType.M_ALPHA_NETBSD: + this.isNetBSD = true; + case UnixAoutMachineType.M_IA64: + this.languageSpec = "UNKNOWN:" + readEndianness + ":64:default"; + break; + case UnixAoutMachineType.M_29K: + case UnixAoutMachineType.M_88K_OPENBSD: + this.languageSpec = "UNKNOWN:" + readEndianness + ":32:default"; + break; + case UnixAoutMachineType.M_UNKNOWN: + this.languageSpec = "UNKNOWN:" + readEndianness + ":32:default"; + break; + default: + this.machineTypeValid = false; + } + + // Check that the detected architecture's endianness matches the endianness + // with which we're reading the file; if there's a mismatch, clear the + // machineTypeValid flag because this was evidently a false reading. + if (this.machineTypeValid) { + String[] languageTokens = this.languageSpec.split(":"); + if ((languageTokens.length < 2) || + !languageTokens[1].equalsIgnoreCase(readEndianness)) { + this.machineTypeValid = false; + } + } + } + + /** + * Returns a flag indicating whether the header contains a known machine type + * ID. + */ + private boolean isMachineTypeValid() { + return this.machineTypeValid; + } + + /** + * Returns a flag indicating whether this header contains a representation of a + * valid executable type. + */ + private void checkExecutableType() { + final short exetypeMagic = (short) (this.a_magic & 0xFFFF); + + switch (exetypeMagic) { + case 0x111: // 0421: core file + this.exeType = AoutType.CMAGIC; + break; + case 0x108: // 0410: pure executable + this.exeType = AoutType.NMAGIC; + break; + case 0x107: // 0407: object file or impure executable + this.exeType = AoutType.OMAGIC; + break; + case 0x0CC: // 0314: demand-paged exe w/ header in .text + this.exeType = AoutType.QMAGIC; + break; + case 0x10B: // 0413: demand-paged executable + this.exeType = AoutType.ZMAGIC; + break; + default: + this.exeType = AoutType.UNKNOWN; + } + } + + /** + * Determines the offset in the binary file at which the .text segment begins. + * This routine should attempt to replicate the logic from the N_TXTOFF macro + * that appears in the different incarnations of a.out.h. + * + * NOTE: The FreeBSD imgact_aout.h implies that, if the a_magic word contains + * ZMAGIC when read as little endian, the file offset for .text is __LDPGSZ; + * otherwise, if a_magic contains ZMAGIC when read as big endian, the file + * offset + * for .text is 0. Indeed, it looks like NetBSD uses big-endian ordering for + * the a_magic word even when the file contains code for a little-endian + * processor. + */ + private void determineTextOffset(BinaryReader reader, boolean isLittleEndian) { + + boolean isLinuxStyle = false; + final long fixedContentSize = this.a_text + this.a_data + this.a_syms + this.a_trsize + this.a_drsize; + + // If the file is large enough to read at least one word beyond a long-style + // header + // of 0x400 bytes plus all the sections whose sizes are specified in the + // header... + if (reader.isValidIndex(sizeOfLongExecHeader + fixedContentSize)) { + try { + // The word that immediately follows the symbol table will contain the size of + // the string table. + final long stringTableLength = reader.readUnsignedInt(sizeOfLongExecHeader + fixedContentSize); + final long longHeaderExpectedFileSize = sizeOfLongExecHeader + fixedContentSize + stringTableLength; + + // If the size of the file exactly matches what we'd expect if the .text content + // starts at offset 0x400 rather than 0, this implies that the a.out is a + // Linux-style binary. + if (this.binarySize == longHeaderExpectedFileSize) { + isLinuxStyle = true; + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + if (isLinuxStyle && (this.exeType == AoutType.ZMAGIC)) { + // Linux ZMAGICs don't start the .text content until 0x400 + this.txtOffset = sizeOfLongExecHeader; + + } else if ((this.exeType == AoutType.QMAGIC) || + (this.exeType == AoutType.ZMAGIC)) { + // ZMAGIC for other platforms (as well as QMAGIC) include the file header itself + // in the .text content + this.txtOffset = 0; + + } else { + // Otherwise, the .text content starts immediately after the 0x20-byte header + this.txtOffset = sizeOfExecHeader; + } + } + + /** + * Uses the combination of executable type and architecture to set the + * appropriate + * base address of the .text segment when loaded. + */ + private void determineTextAddr() { + + if ((this.isSparc && (this.exeType == AoutType.NMAGIC)) || + (this.isNetBSD) || + (this.exeType == AoutType.QMAGIC)) { + this.txtAddr = this.pageSize; + + } else { + this.txtAddr = 0; + } + } + + /** + * Returns a flag indicating whether all the file offsets in the header + * (for the segments of nonzero size) fall within the size of the file. + */ + private boolean areOffsetsValid() { + // Note that we can't check the string table validity because, if it + // doesn't exist, its offset will be computed to be beyond the end of + // the file. The string table is also not given an explicit size in + // the header. + boolean status = ((this.a_text == 0) || (this.txtOffset < this.binarySize) && + ((this.a_data == 0) || (this.datOffset < this.binarySize)) && + ((this.a_trsize == 0) || (this.txtRelOffset < this.binarySize)) && + ((this.a_drsize == 0) || (this.datRelOffset < this.binarySize)) && + ((this.a_syms == 0) || (this.symOffset < this.binarySize))); + return status; + } + + /** + * Rounds the provided address up to the next page boundary. + */ + private long segmentRound(long addr) { + final long mask = this.pageSize - 1; + long rounded = ((addr + mask) & ~mask); + return rounded; + } + + @Override + public DataType toDataType() throws DuplicateNameException, IOException { + String dtName = "exec"; + Structure struct = new StructureDataType(new CategoryPath("/AOUT"), dtName, 0); + struct.add(DWORD, "a_midmag", null); + struct.add(DWORD, "a_text", null); + struct.add(DWORD, "a_data", null); + struct.add(DWORD, "a_bss", null); + struct.add(DWORD, "a_syms", null); + struct.add(DWORD, "a_entry", null); + struct.add(DWORD, "a_trsize", null); + struct.add(DWORD, "a_drsize", null); + + return struct; + } + + public void markup(Program program, Address headerAddress) throws CodeUnitInsertionException, DuplicateNameException, IOException { + Listing listing = program.getListing(); + listing.createData(headerAddress, toDataType()); + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutMachineType.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutMachineType.java new file mode 100644 index 0000000000..102f8e861e --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutMachineType.java @@ -0,0 +1,87 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ghidra.app.util.bin.format.unixaout; + +public class UnixAoutMachineType { + + // These values come from a combination of sources, including NetBSD's + // aout_mids.h + // and the GNU BFD Library's libaout.h. + // + // Note: some a.out header files list a few HP values (for the 300 Series, 800 + // Series, etc.) + // and these values exceed a full eight-bit count. Occasionally, this is + // accounted for by + // extending the Machine ID field of the a_magic word two bits higher, leaving + // only six bits + // in the MSB for other flags. This may not be correct, because those high-value + // HP machine + // IDs probably only appear in HP UX binaries, which use a different format. + // (This format is + // still named "a.out", but has a completely different header and internal + // organization.) + // The 10-bit Machine ID field would also interfere with flags used by VxWorks, + // NetBSD, and + // probably others. + + public final static short M_UNKNOWN = 0x00; + public final static short M_68010 = 0x01; + public final static short M_68020 = 0x02; + public final static short M_SPARC = 0x03; + public final static short M_R3000 = 0x04; + public final static short M_NS32032 = 0x40; + public final static short M_NS32532 = 0x45; + public final static short M_386 = 0x64; + public final static short M_29K = 0x65; // AMD 29000 + public final static short M_386_DYNIX = 0x66; // i386-based Sequet machine running DYNIX + public final static short M_ARM = 0x67; + public final static short M_SPARCLET = 0x83; // Sparclet = M_SPARC + 128 + public final static short M_386_NETBSD = 0x86; // NetBSD/i386 + public final static short M_M68K_NETBSD = 0x87; // NetBSD/m68k, 8K pages + public final static short M_M68K4K_NETBSD = 0x88; // NetBSD/m68k, 4K pages + public final static short M_532_NETBSD = 0x89; // NetBSD/ns32k + public final static short M_SPARC_NETBSD = 0x8a; // NetBSD/sparc + public final static short M_PMAX_NETBSD = 0x8b; // NetBSD/pmax (MIPS little-endian) + public final static short M_VAX_NETBSD = 0x8c; // NetBSD/VAX (1K pages?) + public final static short M_ALPHA_NETBSD = 0x8d; // NetBSD/Alpha + public final static short M_MIPS = 0x8e; // big-endian + public final static short M_ARM6_NETBSD = 0x8f; // NetBSD/arm32 + public final static short M_SH3 = 0x91; + public final static short M_POWERPC64 = 0x94; // PowerPC 64 + public final static short M_POWERPC_NETBSD = 0x95; // NetBSD/PowerPC (big-endian) + public final static short M_VAX4K_NETBSD = 0x96; // NetBSD/VAX (4K pages) + public final static short M_MIPS1 = 0x97; // MIPS R2000/R3000 + public final static short M_MIPS2 = 0x98; // MIPS R4000/R6000 + public final static short M_88K_OPENBSD = 0x99; // OpenBSD/m88k + public final static short M_HPPA_OPENBSD = 0x9a; // OpenBSD/hppa (PA-RISC) + public final static short M_SH5_64 = 0x9b; // SuperH 64-bit + public final static short M_SPARC64_NETBSD = 0x9c; // NetBSD/sparc64 + public final static short M_X86_64_NETBSD = 0x9d; // NetBSD/amd64 + public final static short M_SH5_32 = 0x9e; // SuperH 32-bit (ILP 32) + public final static short M_IA64 = 0x9f; // Itanium + public final static short M_AARCH64 = 0xb7; // ARM AARCH64 + public final static short M_OR1K = 0xb8; // OpenRISC 1000 + public final static short M_RISCV = 0xb9; // RISC-V + public final static short M_CRIS = 0xff; // Axis ETRAX CRIS + + /** + * Machine IDs that should only appear in the incompatible HP UX a.out format: + * HP300 (68020+68881): 0x12c + * HP200/300 : 0x20c + * HP800 : 0x20b + */ +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocation.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocation.java new file mode 100644 index 0000000000..91f46cb9a5 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocation.java @@ -0,0 +1,85 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ghidra.app.util.bin.format.unixaout; + +import ghidra.app.util.opinion.UnixAoutProgramLoader; + +/** + * Represents the content of a single entry in the relocation table format used + * by the UNIX a.out executable. + */ +public class UnixAoutRelocation { + public long address; + public int symbolNum; + public byte flags; + public boolean pcRelativeAddressing; + public byte pointerLength; + public boolean extern; + public boolean baseRelative; + public boolean jmpTable; + public boolean relative; + public boolean copy; + + /** + * + * @param address First of the two words in the table entry (a 32-bit address) + * @param flags Second of the two words in the table entry (containing several + * bitfields) + */ + public UnixAoutRelocation(long address, long flags, boolean bigEndian) { + this.address = (0xFFFFFFFF & address); + + if (bigEndian) { + this.symbolNum = (int) ((flags & 0xFFFFFF00) >> 8); + this.flags = (byte) (flags & 0xFF); + this.pcRelativeAddressing = ((flags & 0x80) != 0); + this.pointerLength = (byte) (1 << ((flags & 0x60) >> 5)); + this.extern = ((flags & 0x10) != 0); + this.baseRelative = ((flags & 0x8) != 0); + this.jmpTable = ((flags & 0x4) != 0); + this.relative = ((flags & 0x2) != 0); + this.copy = ((flags & 0x1) != 0); + } else { + this.symbolNum = (int) (flags & 0x00FFFFFF); + this.flags = (byte) ((flags & 0xFF000000) >> 24); + this.pcRelativeAddressing = ((this.flags & 0x01) != 0); + this.pointerLength = (byte) (1 << ((this.flags & 0x06) >> 1)); + this.extern = ((this.flags & 0x08) != 0); + this.baseRelative = ((this.flags & 0x10) != 0); + this.jmpTable = ((this.flags & 0x20) != 0); + this.relative = ((this.flags & 0x40) != 0); + this.copy = ((this.flags & 0x80) != 0); + } + } + + public String getSymbolName(UnixAoutSymbolTable symtab) { + if (extern == true && symbolNum < symtab.size()) { + return symtab.get(symbolNum).name; + } else if (extern == false) { + switch (symbolNum) { + case 4: + return UnixAoutProgramLoader.dot_text; + case 6: + return UnixAoutProgramLoader.dot_data; + case 8: + return UnixAoutProgramLoader.dot_bss; + } + } + + return null; + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocationTable.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocationTable.java new file mode 100644 index 0000000000..a782345075 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocationTable.java @@ -0,0 +1,110 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.StructConverter; +import ghidra.program.model.data.ArrayDataType; +import ghidra.program.model.data.CategoryPath; +import ghidra.program.model.data.DataType; +import ghidra.program.model.data.InvalidDataTypeException; +import ghidra.program.model.data.Structure; +import ghidra.program.model.data.StructureDataType; +import ghidra.program.model.listing.CodeUnit; +import ghidra.program.model.listing.Data; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.Program; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; + +public class UnixAoutRelocationTable implements Iterable, StructConverter { + private static final int ENTRY_SIZE = 8; + + private final long fileSize; + private final List relocations; + private final UnixAoutSymbolTable symtab; + + public UnixAoutRelocationTable(BinaryReader reader, long fileOffset, long fileSize, UnixAoutSymbolTable symtab) + throws IOException { + this.fileSize = fileSize; + this.relocations = new ArrayList<>(); + this.symtab = symtab; + + reader.setPointerIndex(fileOffset); + + // read each relocation table entry + while (reader.getPointerIndex() < (fileOffset + fileSize)) { + long address = reader.readNextUnsignedInt(); + long flags = reader.readNextUnsignedInt(); + + UnixAoutRelocation relocation = new UnixAoutRelocation(address, flags, reader.isBigEndian()); + relocations.add(relocation); + } + } + + @Override + public Iterator iterator() { + return relocations.iterator(); + } + + @Override + public DataType toDataType() throws DuplicateNameException, IOException { + String dtName = "relocation_info"; + Structure struct = new StructureDataType(new CategoryPath("/AOUT"), dtName, 0); + struct.setPackingEnabled(true); + try { + struct.add(DWORD, "r_address", null); + struct.addBitField(DWORD, 24, "r_symbolnum", null); + struct.addBitField(BYTE, 1, "r_pcrel", null); + struct.addBitField(BYTE, 2, "r_length", null); + struct.addBitField(BYTE, 1, "r_extern", null); + struct.addBitField(BYTE, 1, "r_baserel", null); + struct.addBitField(BYTE, 1, "r_jmptable", null); + struct.addBitField(BYTE, 1, "r_relative", null); + struct.addBitField(BYTE, 1, "r_copy", null); + } catch (InvalidDataTypeException e) { + throw new RuntimeException(e); + } + + return new ArrayDataType(struct, (int) (fileSize / ENTRY_SIZE), ENTRY_SIZE); + } + + public void markup(Program program, MemoryBlock block) + throws CodeUnitInsertionException, DuplicateNameException, IOException { + Listing listing = program.getListing(); + Data array = listing.createData(block.getStart(), toDataType()); + + int idx = 0; + for (UnixAoutRelocation relocation : this) { + String name = relocation.getSymbolName(symtab); + + if (!StringUtils.isBlank(name)) { + Data structData = array.getComponent(idx); + structData.setComment(CodeUnit.EOL_COMMENT, name); + } + + idx++; + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutStringTable.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutStringTable.java new file mode 100644 index 0000000000..9678b30af6 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutStringTable.java @@ -0,0 +1,63 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.StructConverter; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.TerminatedStringDataType; +import ghidra.program.model.listing.Data; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.Program; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.util.CodeUnitInsertionException; + +public class UnixAoutStringTable { + private final BinaryReader reader; + private final long fileOffset; + + public UnixAoutStringTable(BinaryReader reader, long fileOffset, long fileSize) { + this.reader = reader; + this.fileOffset = fileOffset; + } + + public String readString(long stringOffset) { + if (fileOffset < 0) { + return null; + } + try { + return reader.readUtf8String(fileOffset + stringOffset).trim(); + } catch (IOException e) { + // FIXME + } + return null; + } + + public void markup(Program program, MemoryBlock block) throws CodeUnitInsertionException { + Listing listing = program.getListing(); + Address address = block.getStart(); + listing.createData(address, StructConverter.DWORD); + + int strlen = 4; + while ((address.getOffset() + strlen) < block.getEnd().getOffset()) { + address = address.add(strlen); + Data str = listing.createData(address, TerminatedStringDataType.dataType, -1); + strlen = str.getLength(); + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbol.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbol.java new file mode 100644 index 0000000000..fdb8c2db6f --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbol.java @@ -0,0 +1,92 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +/** + * Represents the content of a single entry in the symbol table format used by + * the UNIX a.out executable. + */ +public class UnixAoutSymbol { + + public enum SymbolType { + N_UNDF, N_ABS, N_TEXT, N_DATA, N_BSS, N_INDR, N_FN, N_STAB, UNKNOWN + } + + public enum SymbolKind { + AUX_FUNC, AUX_OBJECT, AUX_LABEL, UNKNOWN + } + + public long nameStringOffset; + public String name; + public SymbolType type; + public SymbolKind kind; + public byte otherByte; + public short desc; + public long value; + public boolean isExt; + + public UnixAoutSymbol(long nameStringOffset, byte typeByte, byte otherByte, + short desc, long value) { + this.nameStringOffset = nameStringOffset; + this.otherByte = otherByte; + this.desc = desc; + this.value = value; + this.isExt = (typeByte & 1) == 1; + + switch (typeByte & 0xfe) { + case 0: + type = SymbolType.N_UNDF; + break; + case 2: + type = SymbolType.N_ABS; + break; + case 4: + type = SymbolType.N_TEXT; + break; + case 6: + type = SymbolType.N_DATA; + break; + case 8: + type = SymbolType.N_BSS; + break; + case 10: + type = SymbolType.N_INDR; + break; + default: + if ((typeByte & 0xfe) >= 0x20) { + type = SymbolType.N_STAB; + } else { + type = SymbolType.UNKNOWN; + } + break; + } + + switch (otherByte & 0x0f) { + case 1: + kind = SymbolKind.AUX_OBJECT; + break; + case 2: + kind = SymbolKind.AUX_FUNC; + break; + case 3: + kind = SymbolKind.AUX_LABEL; + break; + default: + kind = SymbolKind.UNKNOWN; + break; + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbolTable.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbolTable.java new file mode 100644 index 0000000000..e2a0d664e1 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbolTable.java @@ -0,0 +1,121 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.StructConverter; +import ghidra.app.util.importer.MessageLog; +import ghidra.app.util.opinion.UnixAoutProgramLoader; +import ghidra.program.model.data.ArrayDataType; +import ghidra.program.model.data.CategoryPath; +import ghidra.program.model.data.DataType; +import ghidra.program.model.data.Structure; +import ghidra.program.model.data.StructureDataType; +import ghidra.program.model.listing.CodeUnit; +import ghidra.program.model.listing.Data; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.Program; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; + +public class UnixAoutSymbolTable implements Iterable, StructConverter { + private static final int ENTRY_SIZE = 12; + + private final long fileSize; + private List symbols; + + public UnixAoutSymbolTable(BinaryReader reader, long fileOffset, long fileSize, UnixAoutStringTable strtab, MessageLog log) + throws IOException { + this.fileSize = fileSize; + this.symbols = new ArrayList<>(); + + reader.setPointerIndex(fileOffset); + int idx = 0; + + // read each symbol table entry + while (reader.getPointerIndex() < (fileOffset + fileSize)) { + long strOffset = reader.readNextUnsignedInt(); + byte typeByte = reader.readNextByte(); + byte otherByte = reader.readNextByte(); + short desc = reader.readNextShort(); + long value = reader.readNextUnsignedInt(); + + UnixAoutSymbol symbol = new UnixAoutSymbol(strOffset, typeByte, otherByte, desc, value); + if (symbol.type == UnixAoutSymbol.SymbolType.UNKNOWN) { + log.appendMsg(UnixAoutProgramLoader.dot_symtab, String.format("Unknown symbol type 0x%02x at symbol index %d", typeByte, idx)); + } + symbols.add(symbol); + + idx++; + } + + // lookup and set each string table symbol name + for (UnixAoutSymbol symbol : this) { + symbol.name = strtab.readString(symbol.nameStringOffset); + } + } + + @Override + public Iterator iterator() { + return symbols.iterator(); + } + + @Override + public DataType toDataType() throws DuplicateNameException, IOException { + String dtName = "nlist"; + Structure struct = new StructureDataType(new CategoryPath("/AOUT"), dtName, 0); + struct.add(DWORD, "n_strx", null); + struct.add(BYTE, "n_type", null); + struct.add(BYTE, "n_other", null); + struct.add(WORD, "n_desc", null); + struct.add(DWORD, "n_value", null); + return new ArrayDataType(struct, (int) (fileSize / ENTRY_SIZE), ENTRY_SIZE); + } + + public UnixAoutSymbol get(int symbolNum) { + return symbols.get(symbolNum); + } + + public long size() { + return symbols.size(); + } + + public void markup(Program program, MemoryBlock block) throws CodeUnitInsertionException, DuplicateNameException, IOException { + Listing listing = program.getListing(); + Data array = listing.createData(block.getStart(), toDataType()); + + int idx = 0; + for (UnixAoutSymbol symbol : this) { + if (!StringUtils.isBlank(symbol.name)) { + Data structData = array.getComponent(idx); + + if (structData != null) { + structData.setComment(CodeUnit.EOL_COMMENT, symbol.name); + } + } + + idx++; + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/UnixAoutLoader.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/UnixAoutLoader.java new file mode 100644 index 0000000000..86e1ad1d8a --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/UnixAoutLoader.java @@ -0,0 +1,165 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.opinion; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import ghidra.app.util.Option; +import ghidra.app.util.OptionException; +import ghidra.app.util.bin.ByteProvider; +import ghidra.app.util.bin.format.unixaout.UnixAoutHeader; +import ghidra.app.util.importer.MessageLog; +import ghidra.framework.model.DomainObject; +import ghidra.program.model.address.Address; +import ghidra.program.model.address.AddressFactory; +import ghidra.program.model.address.AddressSpace; +import ghidra.program.model.lang.LanguageCompilerSpecPair; +import ghidra.program.model.listing.Program; +import ghidra.util.exception.CancelledException; +import ghidra.util.task.TaskMonitor; + +/** + * A {@link Loader} for processing UNIX-style A.out executables + * + * This style was also used by UNIX-like systems such as SunOS, BSD, and + * VxWorks, as well as some early distributions of Linux. Although there do + * exist implementations of A.out with 64-bit and GNU extensions, this loader + * does not currently support them. + * + * @see OSDev.org A.out + * @see FreeBSD + * manpage + */ +public class UnixAoutLoader extends AbstractProgramWrapperLoader { + public static final String OPTION_NAME_BASE_ADDR = "Base Address"; + + @Override + public String getName() { + return "UNIX A.out executable"; + } + + /** + * Retrieves the Address offset given in the "Base Address" option. + * Returns 0 if the option could not be found or contains an invalid value. + */ + private long getBaseAddrOffset(List