diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutHeader.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutHeader.java new file mode 100644 index 0000000000..06c145021e --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutHeader.java @@ -0,0 +1,552 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; + +import ghidra.app.util.bin.*; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.Listing; +import ghidra.program.model.listing.Program; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; + +public class UnixAoutHeader implements StructConverter { + + public enum AoutType { + OMAGIC, NMAGIC, ZMAGIC, QMAGIC, CMAGIC, UNKNOWN + } + + private BinaryReader reader; + + private long binarySize; + private AoutType exeType; + private boolean machineTypeValid; + private String languageSpec; + private String compilerSpec = "default"; + private long pageSize; + + private boolean isNetBSD = false; + private boolean isSparc = false; + + private long a_magic; + private long a_text; + private long a_data; + private long a_bss; + private long a_syms; + private long a_entry; + private long a_trsize; + private long a_drsize; + + private long strSize; + + private long txtOffset; + private long datOffset; + private long txtRelOffset; + private long datRelOffset; + private long symOffset; + private long strOffset; + + private long txtAddr; + private long txtEndAddr; + private long datAddr; + private long bssAddr; + + // The Linux implementation of a.out appears to start the .text content at + // file offset 0x400 (rather than immediately after the 0x20 bytes of header + // data). It's possible that there exist Linux a.out executabLes with other + // (unintended?) header sizes caused by a mixture of 32- and 64-bit integers + // being padded out in the struct. The intended size is eight 32-bit words + // (32 bytes total.) + private static final int SIZE_OF_EXEC_HEADER = 0x20; + private static final int SIZE_OF_LONG_EXEC_HEADER = 0x400; + + /** + * Interprets binary data as an exec header from a UNIX-style a.out executable, and validates + * the contained fields. + * + * @param provider Source of header binary data + * @param isLittleEndian Flag indicating whether to interpret the data as little-endian. + * @throws IOException if an IO-related error occurred + */ + public UnixAoutHeader(ByteProvider provider, boolean isLittleEndian) throws IOException { + reader = new BinaryReader(provider, isLittleEndian); + + a_magic = reader.readNextUnsignedInt(); + a_text = reader.readNextUnsignedInt(); + a_data = reader.readNextUnsignedInt(); + a_bss = reader.readNextUnsignedInt(); + a_syms = reader.readNextUnsignedInt(); + a_entry = reader.readNextUnsignedInt(); + a_trsize = reader.readNextUnsignedInt(); + a_drsize = reader.readNextUnsignedInt(); + binarySize = reader.length(); + + setExecutableType(a_magic); + + // NOTE: In NetBSD/i386 examples of a.out, the "new-style" 32-bit a_magic/midmag word is + // written in big-endian regardless of the data endianness in the rest of the file. + if ((exeType == AoutType.UNKNOWN) && isLittleEndian) { + a_magic = Integer.reverseBytes((int) a_magic); + setExecutableType(a_magic); + } + + checkMachineTypeValidity(isLittleEndian); + determineTextOffset(); + + datOffset = txtOffset + a_text; + txtRelOffset = datOffset + a_data; + datRelOffset = txtRelOffset + a_trsize; + symOffset = datRelOffset + a_drsize; + strOffset = symOffset + a_syms; + + strSize = 0; + if (strOffset != 0 && (strOffset + 4) <= binarySize) { + strSize = reader.readUnsignedInt(strOffset); + } + + determineTextAddr(); + txtEndAddr = txtAddr + a_text; + datAddr = (exeType == AoutType.OMAGIC) ? txtEndAddr : segmentRound(txtEndAddr); + bssAddr = datAddr + a_data; + } + + public BinaryReader getReader() { + return reader; + } + + /** + * {@return the processor/language specified by this header.} + */ + public String getLanguageSpec() { + return languageSpec; + } + + /** + * {@return the compiler used by this executable. This is left as 'default' for + * all machine types other than i386, where it is assumed to be gcc.} + */ + public String getCompilerSpec() { + return compilerSpec; + } + + /** + * {@return the enumerated type of executable contained in this A.out file.} + */ + public AoutType getExecutableType() { + return exeType; + } + + /** + * {@return an indication of whether this header's fields are all valid; this + * includes the machine type, executable type, and section offsets.} + */ + public boolean isValid() { + return isMachineTypeValid() && + (exeType != AoutType.UNKNOWN) && + areOffsetsValid(); + } + + public long getTextSize() { + return a_text; + } + + public long getDataSize() { + return a_data; + } + + public long getBssSize() { + return a_bss; + } + + public long getSymSize() { + return a_syms; + } + + public long getStrSize() { + return strSize; + } + + public long getEntryPoint() { + return a_entry; + } + + public long getTextRelocSize() { + return a_trsize; + } + + public long getDataRelocSize() { + return a_drsize; + } + + public long getTextOffset() { + return txtOffset; + } + + public long getDataOffset() { + return datOffset; + } + + public long getTextRelocOffset() { + return txtRelOffset; + } + + public long getDataRelocOffset() { + return datRelOffset; + } + + public long getSymOffset() { + return symOffset; + } + + public long getStrOffset() { + return strOffset; + } + + public long getTextAddr() { + return txtAddr; + } + + public long getDataAddr() { + return datAddr; + } + + public long getBssAddr() { + return bssAddr; + } + + /** + * Checks the magic word in the header for a known machine type ID, and sets the + * languageSpec string accordingly. + */ + private void checkMachineTypeValidity(boolean readingAsLittleEndian) { + + machineTypeValid = true; + pageSize = 4096; + final short machtype = (short) ((a_magic >> 16) & 0xFF); + final String readEndianness = readingAsLittleEndian ? "LE" : "BE"; + + switch (machtype) { + /** + * Motorola 68K family + */ + case UnixAoutMachineType.M_68010: + languageSpec = "68000:BE:32:MC68010"; + break; + case UnixAoutMachineType.M_68020: + languageSpec = "68000:BE:32:MC68020"; + break; + case UnixAoutMachineType.M_M68K_NETBSD: + pageSize = 8192; + case UnixAoutMachineType.M_M68K4K_NETBSD: + isNetBSD = true; + languageSpec = "68000:BE:32:default"; + break; + + /** + * SPARC family + */ + case UnixAoutMachineType.M_SPARC_NETBSD: + isNetBSD = true; + case UnixAoutMachineType.M_SPARC: + case UnixAoutMachineType.M_SPARCLET: + isSparc = true; + pageSize = 8192; + languageSpec = "sparc:BE:32:default"; + break; + case UnixAoutMachineType.M_SPARC64_NETBSD: + isNetBSD = true; + isSparc = true; + languageSpec = "sparc:BE:64:default"; + break; + + /** + * MIPS family + */ + case UnixAoutMachineType.M_PMAX_NETBSD: + isNetBSD = true; + case UnixAoutMachineType.M_MIPS1: + case UnixAoutMachineType.M_MIPS2: + case UnixAoutMachineType.M_R3000: + languageSpec = "MIPS:LE:32:default"; + break; + case UnixAoutMachineType.M_MIPS: + languageSpec = "MIPS:BE:32:default"; + break; + + /** + * National Semiconductor NS32000 family + */ + case UnixAoutMachineType.M_532_NETBSD: + isNetBSD = true; + case UnixAoutMachineType.M_NS32032: + case UnixAoutMachineType.M_NS32532: + languageSpec = "UNKNOWN:LE:32:default"; + break; + + /** + * x86 family + */ + case UnixAoutMachineType.M_386_NETBSD: + isNetBSD = true; + case UnixAoutMachineType.M_386: + case UnixAoutMachineType.M_386_DYNIX: + compilerSpec = "gcc"; + languageSpec = "x86:LE:32:default"; + break; + case UnixAoutMachineType.M_X86_64_NETBSD: + compilerSpec = "gcc"; + languageSpec = "x86:LE:64:default"; + break; + + /** + * ARM family + */ + case UnixAoutMachineType.M_ARM6_NETBSD: + isNetBSD = true; + case UnixAoutMachineType.M_ARM: + languageSpec = "ARM:" + readEndianness + ":32:default"; + break; + case UnixAoutMachineType.M_AARCH64: + languageSpec = "AARCH64:" + readEndianness + ":64:default"; + break; + + /** + * RISC family + */ + case UnixAoutMachineType.M_OR1K: + languageSpec = "UNKNOWN:BE:32:default"; + break; + case UnixAoutMachineType.M_RISCV: + languageSpec = "RISCV:LE:32:default"; + break; + case UnixAoutMachineType.M_HPPA_OPENBSD: + languageSpec = "pa-risc:BE:32:default"; + break; + + /** + * PowerPC family + */ + case UnixAoutMachineType.M_POWERPC_NETBSD: + isNetBSD = true; + languageSpec = "PowerPC:" + readEndianness + ":32:default"; + break; + case UnixAoutMachineType.M_POWERPC64: + languageSpec = "PowerPC:" + readEndianness + ":64:default"; + break; + + /** + * SuperH family + * NOTE: It's unclear if there is support for SuperH SH-3 or SH-5 cores; + * the primary SuperH language seems to support SH-1 and SH-2 variants + * and the alternative is the SuperH4 language. + */ + case UnixAoutMachineType.M_SH3: + case UnixAoutMachineType.M_SH5_32: + languageSpec = "SuperH:BE:32:default"; + break; + case UnixAoutMachineType.M_SH5_64: + languageSpec = "SuperH:BE:64:default"; + break; + + /** + * VAX family + */ + case UnixAoutMachineType.M_VAX_NETBSD: + pageSize = 512; + case UnixAoutMachineType.M_VAX4K_NETBSD: + isNetBSD = true; + languageSpec = "UNKNOWN:LE:32:default"; + break; + + /** + * Other + */ + case UnixAoutMachineType.M_CRIS: + languageSpec = "UNKNOWN:LE:32:default"; + break; + case UnixAoutMachineType.M_ALPHA_NETBSD: + isNetBSD = true; + case UnixAoutMachineType.M_IA64: + languageSpec = "UNKNOWN:" + readEndianness + ":64:default"; + break; + case UnixAoutMachineType.M_29K: + case UnixAoutMachineType.M_88K_OPENBSD: + languageSpec = "UNKNOWN:" + readEndianness + ":32:default"; + break; + case UnixAoutMachineType.M_UNKNOWN: + languageSpec = "UNKNOWN:" + readEndianness + ":32:default"; + break; + default: + machineTypeValid = false; + } + + // Check that the detected architecture's endianness matches the endianness + // with which we're reading the file; if there's a mismatch, clear the + // machineTypeValid flag because this was evidently a false reading. + if (machineTypeValid) { + String[] languageTokens = languageSpec.split(":"); + if ((languageTokens.length < 2) || + !languageTokens[1].equalsIgnoreCase(readEndianness)) { + machineTypeValid = false; + } + } + } + + /** + * Returns a flag indicating whether the header contains a known machine type + * ID. + */ + private boolean isMachineTypeValid() { + return machineTypeValid; + } + + /** + * Sets the executable type based on the given magic + * + * @param magic The magic + */ + private void setExecutableType(long magic) { + exeType = switch ((short) (magic & 0xFFFF)) { + case 0x111 -> AoutType.CMAGIC; // 0421: core file + case 0x108 -> AoutType.NMAGIC; // 0410: pure executable + case 0x107 -> AoutType.OMAGIC; // 0407: object file or impure executable + case 0x0CC -> AoutType.QMAGIC; // 0314: demand-paged exe w/ header in .text + case 0x10B -> AoutType.ZMAGIC; // 0413: demand-paged executable + default -> AoutType.UNKNOWN; + }; + } + + /** + * Determines the offset in the binary file at which the .text segment begins. + * This routine should attempt to replicate the logic from the N_TXTOFF macro + * that appears in the different incarnations of a.out.h. + * + * NOTE: The FreeBSD imgact_aout.h implies that, if the a_magic word contains + * ZMAGIC when read as little endian, the file offset for .text is __LDPGSZ; + * otherwise, if a_magic contains ZMAGIC when read as big endian, the file + * offset + * for .text is 0. Indeed, it looks like NetBSD uses big-endian ordering for + * the a_magic word even when the file contains code for a little-endian + * processor. + */ + private void determineTextOffset() { + + boolean isLinuxStyle = false; + final long fixedContentSize = a_text + a_data + a_syms + a_trsize + a_drsize; + + // If the file is large enough to read at least one word beyond a long-style + // header + // of 0x400 bytes plus all the sections whose sizes are specified in the + // header... + if (reader.isValidIndex(SIZE_OF_LONG_EXEC_HEADER + fixedContentSize)) { + try { + // The word that immediately follows the symbol table will contain the size of + // the string table. + final long stringTableLength = + reader.readUnsignedInt(SIZE_OF_LONG_EXEC_HEADER + fixedContentSize); + final long longHeaderExpectedFileSize = + SIZE_OF_LONG_EXEC_HEADER + fixedContentSize + stringTableLength; + + // If the size of the file exactly matches what we'd expect if the .text content + // starts at offset 0x400 rather than 0, this implies that the a.out is a + // Linux-style binary. + if (binarySize == longHeaderExpectedFileSize) { + isLinuxStyle = true; + } + } + catch (IOException e) { + e.printStackTrace(); + } + } + + if (isLinuxStyle && (exeType == AoutType.ZMAGIC)) { + // Linux ZMAGICs don't start the .text content until 0x400 + txtOffset = SIZE_OF_LONG_EXEC_HEADER; + + } + else if ((exeType == AoutType.QMAGIC) || + (exeType == AoutType.ZMAGIC)) { + // ZMAGIC for other platforms (as well as QMAGIC) include the file header itself + // in the .text content + txtOffset = 0; + + } + else { + // Otherwise, the .text content starts immediately after the 0x20-byte header + txtOffset = SIZE_OF_EXEC_HEADER; + } + } + + /** + * Uses the combination of executable type and architecture to set the + * appropriate + * base address of the .text segment when loaded. + */ + private void determineTextAddr() { + txtAddr = (isSparc && exeType == AoutType.NMAGIC) || isNetBSD || exeType == AoutType.QMAGIC + ? pageSize + : 0; + } + + /** + * Returns a flag indicating whether all the file offsets in the header + * (for the segments of nonzero size) fall within the size of the file. + */ + private boolean areOffsetsValid() { + // Note that we can't check the string table validity because, if it + // doesn't exist, its offset will be computed to be beyond the end of + // the file. The string table is also not given an explicit size in + // the header. + boolean status = ((a_text == 0) || (txtOffset < binarySize) && + ((a_data == 0) || (datOffset < binarySize)) && + ((a_trsize == 0) || (txtRelOffset < binarySize)) && + ((a_drsize == 0) || (datRelOffset < binarySize)) && + ((a_syms == 0) || (symOffset < binarySize))); + return status; + } + + /** + * Rounds the provided address up to the next page boundary. + */ + private long segmentRound(long addr) { + final long mask = pageSize - 1; + long rounded = ((addr + mask) & ~mask); + return rounded; + } + + @Override + public DataType toDataType() throws DuplicateNameException, IOException { + Structure struct = new StructureDataType(new CategoryPath("/AOUT"), "exec", 0); + struct.add(DWORD, "a_midmag", "magic (network byte order)"); + struct.add(DWORD, "a_text", "the size of the text segment in bytes"); + struct.add(DWORD, "a_data", "the size of the data segment in bytes"); + struct.add(DWORD, "a_bss", "the number of bytes in the bss segment"); + struct.add(DWORD, "a_syms", "the size in bytes of the symbol table section"); + struct.add(DWORD, "a_entry", "the address of the entry point"); + struct.add(DWORD, "a_trsize", "the size in bytes of the text relocation table"); + struct.add(DWORD, "a_drsize", "the size in bytes of the data relocation table"); + return struct; + } + + public void markup(Program program, Address headerAddress) + throws CodeUnitInsertionException, DuplicateNameException, IOException { + Listing listing = program.getListing(); + listing.createData(headerAddress, toDataType()); + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutMachineType.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutMachineType.java new file mode 100644 index 0000000000..15c69bae3a --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutMachineType.java @@ -0,0 +1,78 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ghidra.app.util.bin.format.unixaout; + +public class UnixAoutMachineType { + + // These values come from a combination of sources, including NetBSD's aout_mids.h and the GNU + // BFD Library's libaout.h. + // + // Note: some a.out header files list a few HP values (for the 300 Series, 800 Series, etc.) + // and these values exceed a full eight-bit count. Occasionally, this is accounted for by + // extending the Machine ID field of the a_magic word two bits higher, leaving only six bits in + // the MSB for other flags. This may not be correct, because those high-value HP machine IDs + // probably only appear in HP UX binaries, which use a different format. (This format is still + // named "a.out", but has a completely different header and internal organization.) The 10-bit + // Machine ID field would also interfere with flags used by VxWorks, NetBSD, and probably + // others. + + public final static short M_UNKNOWN = 0x00; + public final static short M_68010 = 0x01; + public final static short M_68020 = 0x02; + public final static short M_SPARC = 0x03; + public final static short M_R3000 = 0x04; + public final static short M_NS32032 = 0x40; + public final static short M_NS32532 = 0x45; + public final static short M_386 = 0x64; + public final static short M_29K = 0x65; // AMD 29000 + public final static short M_386_DYNIX = 0x66; // i386-based Sequet machine running DYNIX + public final static short M_ARM = 0x67; + public final static short M_SPARCLET = 0x83; // Sparclet = M_SPARC + 128 + public final static short M_386_NETBSD = 0x86; // NetBSD/i386 + public final static short M_M68K_NETBSD = 0x87; // NetBSD/m68k, 8K pages + public final static short M_M68K4K_NETBSD = 0x88; // NetBSD/m68k, 4K pages + public final static short M_532_NETBSD = 0x89; // NetBSD/ns32k + public final static short M_SPARC_NETBSD = 0x8a; // NetBSD/sparc + public final static short M_PMAX_NETBSD = 0x8b; // NetBSD/pmax (MIPS little-endian) + public final static short M_VAX_NETBSD = 0x8c; // NetBSD/VAX (1K pages?) + public final static short M_ALPHA_NETBSD = 0x8d; // NetBSD/Alpha + public final static short M_MIPS = 0x8e; // big-endian + public final static short M_ARM6_NETBSD = 0x8f; // NetBSD/arm32 + public final static short M_SH3 = 0x91; + public final static short M_POWERPC64 = 0x94; // PowerPC 64 + public final static short M_POWERPC_NETBSD = 0x95; // NetBSD/PowerPC (big-endian) + public final static short M_VAX4K_NETBSD = 0x96; // NetBSD/VAX (4K pages) + public final static short M_MIPS1 = 0x97; // MIPS R2000/R3000 + public final static short M_MIPS2 = 0x98; // MIPS R4000/R6000 + public final static short M_88K_OPENBSD = 0x99; // OpenBSD/m88k + public final static short M_HPPA_OPENBSD = 0x9a; // OpenBSD/hppa (PA-RISC) + public final static short M_SH5_64 = 0x9b; // SuperH 64-bit + public final static short M_SPARC64_NETBSD = 0x9c; // NetBSD/sparc64 + public final static short M_X86_64_NETBSD = 0x9d; // NetBSD/amd64 + public final static short M_SH5_32 = 0x9e; // SuperH 32-bit (ILP 32) + public final static short M_IA64 = 0x9f; // Itanium + public final static short M_AARCH64 = 0xb7; // ARM AARCH64 + public final static short M_OR1K = 0xb8; // OpenRISC 1000 + public final static short M_RISCV = 0xb9; // RISC-V + public final static short M_CRIS = 0xff; // Axis ETRAX CRIS + + // Machine IDs that should only appear in the incompatible HP UX a.out format: + // + // HP300 (68020+68881): 0x12c + // HP200/300 : 0x20c + // HP800 : 0x20b +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocation.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocation.java new file mode 100644 index 0000000000..d8cc4926e0 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocation.java @@ -0,0 +1,85 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ghidra.app.util.bin.format.unixaout; + +import ghidra.app.util.opinion.UnixAoutProgramLoader; + +/** + * Represents the content of a single entry in the relocation table format used + * by the UNIX a.out executable. + */ +public class UnixAoutRelocation { + public long address; + public int symbolNum; + public byte flags; + public boolean pcRelativeAddressing; + public byte pointerLength; + public boolean extern; + public boolean baseRelative; + public boolean jmpTable; + public boolean relative; + public boolean copy; + + /** + * + * @param address First of the two words in the table entry (a 32-bit address) + * @param flags Second of the two words in the table entry (containing several bitfields) + * @param bigEndian True if big endian; otherwise, false + */ + public UnixAoutRelocation(long address, long flags, boolean bigEndian) { + this.address = (0xFFFFFFFF & address); + + if (bigEndian) { + this.symbolNum = (int) ((flags & 0xFFFFFF00) >> 8); + this.flags = (byte) (flags & 0xFF); + this.pcRelativeAddressing = ((flags & 0x80) != 0); + this.pointerLength = (byte) (1 << ((flags & 0x60) >> 5)); + this.extern = ((flags & 0x10) != 0); + this.baseRelative = ((flags & 0x8) != 0); + this.jmpTable = ((flags & 0x4) != 0); + this.relative = ((flags & 0x2) != 0); + this.copy = ((flags & 0x1) != 0); + } + else { + this.symbolNum = (int) (flags & 0x00FFFFFF); + this.flags = (byte) ((flags & 0xFF000000) >> 24); + this.pcRelativeAddressing = ((this.flags & 0x01) != 0); + this.pointerLength = (byte) (1 << ((this.flags & 0x06) >> 1)); + this.extern = ((this.flags & 0x08) != 0); + this.baseRelative = ((this.flags & 0x10) != 0); + this.jmpTable = ((this.flags & 0x20) != 0); + this.relative = ((this.flags & 0x40) != 0); + this.copy = ((this.flags & 0x80) != 0); + } + } + + public String getSymbolName(UnixAoutSymbolTable symtab) { + if (extern && symbolNum < symtab.size()) { + return symtab.get(symbolNum).name; + } + else if (!extern) { + return switch (symbolNum) { + case 4 -> UnixAoutProgramLoader.dot_text; + case 6 -> UnixAoutProgramLoader.dot_data; + case 8 -> UnixAoutProgramLoader.dot_bss; + default -> null; + }; + } + + return null; + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocationTable.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocationTable.java new file mode 100644 index 0000000000..613da8cf15 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutRelocationTable.java @@ -0,0 +1,102 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; +import java.util.*; + +import org.apache.commons.lang3.StringUtils; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.StructConverter; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.*; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; + +public class UnixAoutRelocationTable implements Iterable, StructConverter { + private static final int ENTRY_SIZE = 8; + + private final long fileSize; + private final List relocations; + private final UnixAoutSymbolTable symtab; + + public UnixAoutRelocationTable(BinaryReader reader, long fileOffset, long fileSize, + UnixAoutSymbolTable symtab) throws IOException { + this.fileSize = fileSize; + this.relocations = new ArrayList<>(); + this.symtab = symtab; + + reader.setPointerIndex(fileOffset); + + // read each relocation table entry + while (reader.getPointerIndex() < (fileOffset + fileSize)) { + long address = reader.readNextUnsignedInt(); + long flags = reader.readNextUnsignedInt(); + + UnixAoutRelocation relocation = + new UnixAoutRelocation(address, flags, reader.isBigEndian()); + relocations.add(relocation); + } + } + + @Override + public Iterator iterator() { + return relocations.iterator(); + } + + @Override + public DataType toDataType() throws DuplicateNameException, IOException { + String dtName = "relocation_info"; + Structure struct = new StructureDataType(new CategoryPath("/AOUT"), dtName, 0); + struct.setPackingEnabled(true); + try { + struct.add(DWORD, "r_address", null); + struct.addBitField(DWORD, 24, "r_symbolnum", null); + struct.addBitField(BYTE, 1, "r_pcrel", null); + struct.addBitField(BYTE, 2, "r_length", null); + struct.addBitField(BYTE, 1, "r_extern", null); + struct.addBitField(BYTE, 1, "r_baserel", null); + struct.addBitField(BYTE, 1, "r_jmptable", null); + struct.addBitField(BYTE, 1, "r_relative", null); + struct.addBitField(BYTE, 1, "r_copy", null); + } + catch (InvalidDataTypeException e) { + throw new RuntimeException(e); + } + + return new ArrayDataType(struct, (int) (fileSize / ENTRY_SIZE), ENTRY_SIZE); + } + + public void markup(Program program, MemoryBlock block) + throws CodeUnitInsertionException, DuplicateNameException, IOException { + Listing listing = program.getListing(); + Data array = listing.createData(block.getStart(), toDataType()); + + int idx = 0; + for (UnixAoutRelocation relocation : this) { + String name = relocation.getSymbolName(symtab); + + if (!StringUtils.isBlank(name)) { + Data structData = array.getComponent(idx); + structData.setComment(CommentType.EOL, name); + } + + idx++; + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutStringTable.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutStringTable.java new file mode 100644 index 0000000000..2709811cae --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutStringTable.java @@ -0,0 +1,62 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.StructConverter; +import ghidra.program.model.address.Address; +import ghidra.program.model.data.TerminatedStringDataType; +import ghidra.program.model.listing.*; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.util.CodeUnitInsertionException; + +public class UnixAoutStringTable { + private final BinaryReader reader; + private final long fileOffset; + + public UnixAoutStringTable(BinaryReader reader, long fileOffset, long fileSize) { + this.reader = reader; + this.fileOffset = fileOffset; + } + + public String readString(long stringOffset) { + if (fileOffset < 0) { + return null; + } + try { + return reader.readUtf8String(fileOffset + stringOffset).trim(); + } + catch (IOException e) { + // FIXME + } + return null; + } + + public void markup(Program program, MemoryBlock block) throws CodeUnitInsertionException { + Listing listing = program.getListing(); + Address address = block.getStart(); + listing.createData(address, StructConverter.DWORD); + + int strlen = 4; + while ((address.getOffset() + strlen) < block.getEnd().getOffset()) { + address = address.add(strlen); + Data str = listing.createData(address, TerminatedStringDataType.dataType, -1); + strlen = str.getLength(); + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbol.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbol.java new file mode 100644 index 0000000000..1d31a58da7 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbol.java @@ -0,0 +1,66 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +/** + * Represents the content of a single entry in the symbol table format used by + * the UNIX a.out executable. + */ +public class UnixAoutSymbol { + + public enum SymbolType { + N_UNDF, N_ABS, N_TEXT, N_DATA, N_BSS, N_INDR, N_FN, N_STAB, UNKNOWN + } + + public enum SymbolKind { + AUX_FUNC, AUX_OBJECT, AUX_LABEL, UNKNOWN + } + + public long nameStringOffset; + public String name; + public SymbolType type; + public SymbolKind kind; + public byte otherByte; + public short desc; + public long value; + public boolean isExt; + + public UnixAoutSymbol(long nameStringOffset, byte typeByte, byte otherByte, short desc, + long value) { + this.nameStringOffset = nameStringOffset; + this.otherByte = otherByte; + this.desc = desc; + this.value = value; + this.isExt = (typeByte & 1) == 1; + + this.type = switch (typeByte & 0xfe) { + case 0 -> SymbolType.N_UNDF; + case 2 -> SymbolType.N_ABS; + case 4 -> SymbolType.N_TEXT; + case 6 -> SymbolType.N_DATA; + case 8 -> SymbolType.N_BSS; + case 10 -> SymbolType.N_INDR; + default -> (typeByte & 0xfe) >= 0x20 ? SymbolType.N_STAB : SymbolType.UNKNOWN; + }; + + this.kind = switch (otherByte & 0x0f) { + case 1 -> SymbolKind.AUX_OBJECT; + case 2 -> SymbolKind.AUX_FUNC; + case 3 -> SymbolKind.AUX_LABEL; + default -> SymbolKind.UNKNOWN; + }; + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbolTable.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbolTable.java new file mode 100644 index 0000000000..fcea31e9a3 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/bin/format/unixaout/UnixAoutSymbolTable.java @@ -0,0 +1,114 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.bin.format.unixaout; + +import java.io.IOException; +import java.util.*; + +import org.apache.commons.lang3.StringUtils; + +import ghidra.app.util.bin.BinaryReader; +import ghidra.app.util.bin.StructConverter; +import ghidra.app.util.importer.MessageLog; +import ghidra.app.util.opinion.UnixAoutProgramLoader; +import ghidra.program.model.data.*; +import ghidra.program.model.listing.*; +import ghidra.program.model.mem.MemoryBlock; +import ghidra.program.model.util.CodeUnitInsertionException; +import ghidra.util.exception.DuplicateNameException; + +public class UnixAoutSymbolTable implements Iterable, StructConverter { + private static final int ENTRY_SIZE = 12; + + private final long fileSize; + private List symbols; + + public UnixAoutSymbolTable(BinaryReader reader, long fileOffset, long fileSize, + UnixAoutStringTable strtab, MessageLog log) throws IOException { + this.fileSize = fileSize; + this.symbols = new ArrayList<>(); + + reader.setPointerIndex(fileOffset); + int idx = 0; + + // read each symbol table entry + while (reader.getPointerIndex() < (fileOffset + fileSize)) { + long strOffset = reader.readNextUnsignedInt(); + byte typeByte = reader.readNextByte(); + byte otherByte = reader.readNextByte(); + short desc = reader.readNextShort(); + long value = reader.readNextUnsignedInt(); + + UnixAoutSymbol symbol = new UnixAoutSymbol(strOffset, typeByte, otherByte, desc, value); + if (symbol.type == UnixAoutSymbol.SymbolType.UNKNOWN) { + log.appendMsg(UnixAoutProgramLoader.dot_symtab, + String.format("Unknown symbol type 0x%02x at symbol index %d", typeByte, idx)); + } + symbols.add(symbol); + + idx++; + } + + // lookup and set each string table symbol name + for (UnixAoutSymbol symbol : this) { + symbol.name = strtab.readString(symbol.nameStringOffset); + } + } + + @Override + public Iterator iterator() { + return symbols.iterator(); + } + + @Override + public DataType toDataType() throws DuplicateNameException, IOException { + String dtName = "nlist"; + Structure struct = new StructureDataType(new CategoryPath("/AOUT"), dtName, 0); + struct.add(DWORD, "n_strx", null); + struct.add(BYTE, "n_type", null); + struct.add(BYTE, "n_other", null); + struct.add(WORD, "n_desc", null); + struct.add(DWORD, "n_value", null); + return new ArrayDataType(struct, (int) (fileSize / ENTRY_SIZE), ENTRY_SIZE); + } + + public UnixAoutSymbol get(int symbolNum) { + return symbols.get(symbolNum); + } + + public long size() { + return symbols.size(); + } + + public void markup(Program program, MemoryBlock block) + throws CodeUnitInsertionException, DuplicateNameException, IOException { + Listing listing = program.getListing(); + Data array = listing.createData(block.getStart(), toDataType()); + + int idx = 0; + for (UnixAoutSymbol symbol : this) { + if (!StringUtils.isBlank(symbol.name)) { + Data structData = array.getComponent(idx); + + if (structData != null) { + structData.setComment(CommentType.EOL, symbol.name); + } + } + + idx++; + } + } +} diff --git a/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/UnixAoutLoader.java b/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/UnixAoutLoader.java new file mode 100644 index 0000000000..24f6cb80e3 --- /dev/null +++ b/Ghidra/Features/Base/src/main/java/ghidra/app/util/opinion/UnixAoutLoader.java @@ -0,0 +1,165 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.util.opinion; + +import java.io.IOException; +import java.util.*; + +import ghidra.app.util.Option; +import ghidra.app.util.OptionException; +import ghidra.app.util.bin.ByteProvider; +import ghidra.app.util.bin.format.unixaout.UnixAoutHeader; +import ghidra.app.util.importer.MessageLog; +import ghidra.framework.model.DomainObject; +import ghidra.program.model.address.*; +import ghidra.program.model.lang.LanguageCompilerSpecPair; +import ghidra.program.model.listing.Program; +import ghidra.util.exception.CancelledException; +import ghidra.util.task.TaskMonitor; + +/** + * A {@link Loader} for processing UNIX-style A.out executables + *

+ * This style was also used by UNIX-like systems such as SunOS, BSD, and VxWorks, as well as some + * early distributions of Linux. Although there do exist implementations of A.out with 64-bit and \ + * GNU extensions, this loader does not currently support them. + * + * @see OSDev.org A.out + * @see FreeBSD manpage + */ +public class UnixAoutLoader extends AbstractProgramWrapperLoader { + + public final static String UNIX_AOUT_NAME = "UNIX A.out"; + + public static final String OPTION_NAME_BASE_ADDR = "Base Address"; + + @Override + public Collection findSupportedLoadSpecs(ByteProvider provider) throws IOException { + List loadSpecs = new ArrayList<>(); + + // Attempt to parse the header as both little- and big-endian. + // It is likely that only one of these will produce sensible values. + UnixAoutHeader hdrBE = new UnixAoutHeader(provider, false); + UnixAoutHeader hdrLE = new UnixAoutHeader(provider, true); + boolean beValid = false; + + if (hdrBE.isValid()) { + final String lang = hdrBE.getLanguageSpec(); + final String comp = hdrBE.getCompilerSpec(); + loadSpecs.add(new LoadSpec(this, 0, new LanguageCompilerSpecPair(lang, comp), true)); + beValid = true; + } + if (hdrLE.isValid()) { + final String lang = hdrLE.getLanguageSpec(); + final String comp = hdrLE.getCompilerSpec(); + loadSpecs + .add(new LoadSpec(this, 0, new LanguageCompilerSpecPair(lang, comp), !beValid)); + } + + return loadSpecs; + } + + @Override + protected void load(ByteProvider provider, LoadSpec loadSpec, List