GT-3261_emteere Sped up string table reading

This commit is contained in:
emteere
2019-11-05 07:55:21 -05:00
parent 9b3de9fa5a
commit 90fe49f819
5 changed files with 94 additions and 30 deletions
@@ -16,6 +16,8 @@ Unwind_SjLj_Resume
assert_rtn
pthread_exit
errx
abort_message
clang_call_terminate
ZL11unreachablePKc
ZN5swift10fatalErrorEjPKcz
ZN5swift24swift_abortRetainUnownedEPKv
@@ -31,3 +33,5 @@ ZNKSt3__120__vector_base_commonILb1EE20__throw_length_errorEv
ZNKSt3__121__basic_string_commonILb1EE20__throw_length_errorEv
T0s9_abstracts5NeverOs12StaticStringV4file_Su4linetF
ZN5swift24swift_dynamicCastFailureEPKvPKcS1_S3_S3_
ZSt11__terminatePFvvE
ZSt12__unexpectedPFvvE
@@ -20,7 +20,10 @@ import java.io.IOException;
import ghidra.app.util.bin.StructConverter;
import ghidra.app.util.bin.format.FactoryBundledWithBinaryReader;
import ghidra.app.util.bin.format.macho.MachConstants;
import ghidra.program.model.data.*;
import ghidra.program.model.data.CategoryPath;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.StructureDataType;
import ghidra.util.exception.AssertException;
import ghidra.util.exception.DuplicateNameException;
/**
@@ -39,9 +42,9 @@ public class NList implements StructConverter {
private boolean is32bit;
public static NList createNList(FactoryBundledWithBinaryReader reader,
boolean is32bit, long stringTableOffset) throws IOException {
boolean is32bit) throws IOException {
NList nList = (NList) reader.getFactory().create(NList.class);
nList.initNList(reader, is32bit, stringTableOffset);
nList.initNList(reader, is32bit);
return nList;
}
@@ -50,8 +53,7 @@ public class NList implements StructConverter {
*/
public NList() {}
private void initNList(FactoryBundledWithBinaryReader reader, boolean is32bit,
long stringTableOffset) throws IOException {
private void initNList(FactoryBundledWithBinaryReader reader, boolean is32bit) throws IOException {
this.is32bit = is32bit;
n_strx = reader.readNextInt();
@@ -64,6 +66,22 @@ public class NList implements StructConverter {
else {
n_value = reader.readNextLong();
}
}
/**]
* Initialize the string from the string table.
*
* You MUST call this method after the NLIST element is created!
*
* Reading a large NList table can cause a large performance issue if the strings
* are initialized as the NList entry is created. The string table indexes are
* scattered. Initializing the strings linearly from the string table is much
* faster.
*
* @param reader
* @param stringTableOffset offset of the string table
*/
public void initString(FactoryBundledWithBinaryReader reader, long stringTableOffset) {
try {
string = reader.readAsciiString(stringTableOffset + n_strx);
}
@@ -95,6 +113,9 @@ public class NList implements StructConverter {
* @return the symbol string
*/
public String getString() {
if (string == null) {
throw new AssertException("initString must be called first");
}
return string;
}
@@ -18,10 +18,12 @@ package ghidra.app.util.bin.format.macho.commands;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import ghidra.app.util.bin.format.FactoryBundledWithBinaryReader;
import ghidra.app.util.bin.format.macho.MachConstants;
import ghidra.app.util.bin.format.macho.MachHeader;
import ghidra.app.util.bin.format.macho.dyld.DyldCacheAccelerateInfo;
import ghidra.app.util.importer.MessageLog;
import ghidra.program.flatapi.FlatProgramAPI;
import ghidra.program.model.address.Address;
@@ -73,11 +75,26 @@ public class SymbolTableCommand extends LoadCommand {
long index = reader.getPointerIndex();
reader.setPointerIndex(header.getStartIndexInProvider() + symoff);
List<NList> nlistList = new ArrayList<>(nsyms);
long startIndex = header.getStartIndexInProvider();
boolean is32bit = header.is32bit();
reader.setPointerIndex(startIndex + symoff);
for (int i = 0; i < nsyms; ++i) {
NList symbol = NList.createNList(reader, header.is32bit(), stroff);
symbols.add(symbol);
nlistList.add(NList.createNList(reader, is32bit));
}
// sort the entries by the index in the string table, so don't jump around reading
List<NList> sortedList = nlistList.stream()
.sorted((o1,o2)-> o1.getStringTableIndex() - o2.getStringTableIndex())
.collect(Collectors.toList());
// initialize the NList strings from string table
long stringTableOffset = stroff;
sortedList.forEach(entry -> {
entry.initString(reader, stringTableOffset);
symbols.add(entry);
} );
reader.setPointerIndex(index);
}
@@ -18,6 +18,8 @@ package ghidra.app.util.bin.format.macho.dyld;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import generic.continues.RethrowContinuesFactory;
import ghidra.app.util.bin.BinaryReader;
@@ -28,7 +30,10 @@ import ghidra.app.util.bin.format.macho.MachConstants;
import ghidra.app.util.bin.format.macho.commands.NList;
import ghidra.app.util.importer.MessageLog;
import ghidra.program.model.address.Address;
import ghidra.program.model.data.*;
import ghidra.program.model.data.CategoryPath;
import ghidra.program.model.data.DataType;
import ghidra.program.model.data.DataUtilities;
import ghidra.program.model.data.StructureDataType;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Program;
import ghidra.program.model.util.CodeUnitInsertionException;
@@ -149,14 +154,28 @@ public class DyldCacheLocalSymbolsInfo implements StructConverter {
FactoryBundledWithBinaryReader nListReader = new FactoryBundledWithBinaryReader(
RethrowContinuesFactory.INSTANCE, reader.getByteProvider(), reader.isLittleEndian());
monitor.setMessage("Parsing DYLD nlist symbol table...");
monitor.initialize(nlistCount);
monitor.initialize(nlistCount*2);
nListReader.setPointerIndex(startIndex + nlistOffset);
try {
for (int i = 0; i < nlistCount; ++i) {
nlistList.add(NList.createNList(nListReader, is32bit, startIndex + stringsOffset));
nlistList.add(NList.createNList(nListReader, is32bit));
monitor.checkCanceled();
monitor.incrementProgress(1);
}
// sort the entries by the index in the string table, so don't jump around reading
List<NList> sortedList = nlistList.stream()
.sorted((o1,o2)-> o1.getStringTableIndex() - o2.getStringTableIndex())
.collect(Collectors.toList());
// initialize the NList strings from string table
long stringTableOffset = startIndex + stringsOffset;
sortedList.forEach(entry -> {
if (!monitor.isCancelled()) {
entry.initString(nListReader, stringTableOffset);
monitor.incrementProgress(1);
}
} );
}
catch (IOException e) {
log.appendMsg(DyldCacheAccelerateInfo.class.getSimpleName(), "Failed to parse nlist.");
@@ -39,10 +39,8 @@ import ghidra.app.util.importer.MessageLogContinuesFactory;
import ghidra.program.database.mem.FileBytes;
import ghidra.program.model.address.Address;
import ghidra.program.model.address.AddressSpace;
import ghidra.program.model.data.DataUtilities;
import ghidra.program.model.data.Pointer64DataType;
import ghidra.program.model.listing.CodeUnit;
import ghidra.program.model.listing.Data;
import ghidra.program.model.listing.Program;
import ghidra.program.model.listing.ProgramFragment;
import ghidra.program.model.mem.MemoryAccessException;
@@ -286,6 +284,9 @@ public class DyldCacheProgramBuilder extends MachoProgramBuilder {
if (pageEntry == DYLD_CACHE_SLIDE_PAGE_ATTR_NO_REBASE) {
continue;
}
List<Address> unchainedLocList = new ArrayList<Address>(1024);
if ((pageEntry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) != 0) {
// go into extras and process list of chain entries for the same page
int extraIndex = (pageEntry & CHAIN_OFFSET_MASK);
@@ -293,15 +294,27 @@ public class DyldCacheProgramBuilder extends MachoProgramBuilder {
pageEntry = ((int) extraEntries[extraIndex]) & 0xffff;
long pageOffset = (pageEntry & CHAIN_OFFSET_MASK) * BYTES_PER_CHAIN_OFFSET;
fixedAddressCount += processPointerChain(page, pageOffset, deltaMask, deltaShift, valueAdd);
processPointerChain(unchainedLocList, page, pageOffset, deltaMask, deltaShift, valueAdd);
extraIndex++;
} while ((pageEntry & DYLD_CACHE_SLIDE_PAGE_ATTR_EXTRA) == 0);
}
else {
long pageOffset = pageEntry * BYTES_PER_CHAIN_OFFSET;
fixedAddressCount += processPointerChain(page, pageOffset, deltaMask, deltaShift, valueAdd);
processPointerChain(unchainedLocList, page, pageOffset, deltaMask, deltaShift, valueAdd);
}
fixedAddressCount += unchainedLocList.size();
unchainedLocList.forEach(entry -> {
// create a pointer at the fixed up chain pointer location
try {
// don't use data utilities. does too much extra checking work
listing.createData(entry, Pointer64DataType.dataType);
}
catch (CodeUnitInsertionException e) {
// No worries, something presumably more important was there already
}
});
}
log.appendMsg("Fixed " + fixedAddressCount + " chained pointers. Creating Pointers");
@@ -312,6 +325,7 @@ public class DyldCacheProgramBuilder extends MachoProgramBuilder {
/**
* Fixes up any chained pointers, starting at the given address.
*
* @param unchainedLocList list of locations that were unchained
* @param page within data pages that has pointers to be unchained
* @param nextOff offset within the page that is the chain start
* @param deltaMask delta offset mask for each value
@@ -322,17 +336,16 @@ public class DyldCacheProgramBuilder extends MachoProgramBuilder {
* @throws MemoryAccessException
* @throws CancelledException
*/
private long processPointerChain(long page, long nextOff, long deltaMask, long deltaShift, long valueAdd)
private void processPointerChain(List<Address> unchainedLocList, long page, long nextOff, long deltaMask, long deltaShift, long valueAdd)
throws MemoryAccessException, CancelledException {
// TODO: should the image base be used to perform the ASLR slide on the pointers.
// currently image is kept at it's initial location with no ASLR.
Address chainStart = memory.getProgram().getLanguage().getDefaultSpace().getAddress(page);
long fixedAddressCount = 0;
byte origBytes[] = new byte[8];
long valueMask = ~deltaMask;
long valueMask = 0xffffffffffffffffL >>> (64 - deltaShift);
long delta = -1;
while (delta != 0) {
@@ -356,21 +369,11 @@ public class DyldCacheProgramBuilder extends MachoProgramBuilder {
memory.setLong(chainLoc, chainValue);
// create a pointer at the fixed up chain pointer location
try {
// don't use data utilities. does too much extra checking work
listing.createData(chainLoc, Pointer64DataType.dataType);
}
catch (CodeUnitInsertionException e) {
// No worries, something presumably more important was there already
}
fixedAddressCount++;
// delay creating data until after memory has been changed
unchainedLocList.add(chainLoc);
nextOff += (delta * 4);
}
return fixedAddressCount;
}