diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile b/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile index d0808edede..a6c2c41360 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/Doxyfile @@ -457,7 +457,7 @@ RECURSIVE = NO # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. -EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc context.hh context.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc +EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/context.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/context.cc index 1a0a35f3e1..272a3e376f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/context.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/context.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,6 +19,39 @@ namespace ghidra { +ConstructState::ConstructState(void) + +{ + ct = (Constructor *)0; + resolve = (ConstructState **)0; + parent = (ConstructState *)0; + length = 0; + offset = 0; +} + +/// The array holding pointers to child nodes is preallocated. +/// \param numOperands is maximum number of children this node can have +ConstructState::ConstructState(int4 numOperands) + +{ + ct = (Constructor *)0; + parent = (ConstructState *)0; + length = 0; + offset = 0; + resolve = new ConstructState *[numOperands]; + for(int4 i=0;i=16) - throw BadDataError("Instruction is using more than 16 bytes"); + if (off >= MAX_INSTRUCTION_LEN) + throw BadDataError("Instruction is using more than " + to_string(MAX_INSTRUCTION_LEN) + " bytes"); const uint1 *ptr = buf + off; uintm res = 0; for(int4 i=0;i= 16) - throw BadDataError("Instruction is using more than 16 bytes"); + if (off >= MAX_INSTRUCTION_LEN) + throw BadDataError("Instruction is using more than " + to_string(MAX_INSTRUCTION_LEN) + " bytes"); const uint1 *ptr = buf + off; startbit = startbit % 8; int4 bytesize = (startbit+size-1)/8 + 1; @@ -93,9 +145,12 @@ uintm ParserContext::getInstructionBits(int4 startbit,int4 size,uint4 off) const return res; } +/// \param bytestart is the offset of the first byte to grab +/// \param size is the number of bytes to grab +/// \return the context bytes in a packed value uintm ParserContext::getContextBytes(int4 bytestart,int4 size) const -{ // Get bytes from context into a uintm +{ int4 intstart = bytestart / sizeof(uintm); uintm res = context[ intstart ]; int4 byteOffset = bytestart % sizeof(uintm); @@ -112,6 +167,9 @@ uintm ParserContext::getContextBytes(int4 bytestart,int4 size) const return res; } +/// \param startbit is the offset of the first bit +/// \param size is the number of bits to return +/// \return the requested range of bits (in the least significant positions and padded out with zero bits) uintm ParserContext::getContextBits(int4 startbit,int4 size) const { @@ -131,6 +189,11 @@ uintm ParserContext::getContextBits(int4 startbit,int4 size) const return res; } +/// \param sym is a symbol that resolves to the address where the setting takes effect +/// \param num is the index of the context word being affected +/// \param mask indicates the bits within the context word that are affected +/// \param flow is \b true if the context change \e flows forward from the point where it is set +/// \param point is the parse point where the change was made void ParserContext::addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point) { @@ -191,9 +254,29 @@ void ParserContext::applyCommits(void) } } +/// This can be called in the middle of a parse to accommodate larger constructor trees. +/// \param amount is the number of additional nodes to add +void ParserContext::expandState(int4 amount) + +{ + state.insert(state.begin(),amount,(ConstructState *)0); + for(int4 i=0;ict != ct) { @@ -220,17 +303,18 @@ void ParserWalker::setOutOfBandState(Constructor *ct,int4 index,ConstructState * breadcrumb[0] = 0; } +/// This assumes all the current nodes operands have been parsed into the tree. +/// \param length is the minimum length of the current constructor +/// \param numopers is the number of operands void ParserWalkerChange::calcCurrentLength(int4 length,int4 numopers) -{ // Calculate the length of the current constructor - // state assuming all its operands are constructed +{ length += point->offset; // Convert relative length to absolute length for(int4 i=0;iresolve[i]; int4 sublength = subpoint->length + subpoint->offset; - // Since subpoint->offset is an absolute offset - // (relative to beginning of instruction) sublength - if (sublength > length) // is absolute and must be compared to absolute length + // Since subpoint->offset is an absolutee (relative to beginning of instruction) + if (sublength > length) // sublength is absolute and must be compared to absolute length length = sublength; } point->length = length - point->offset; // Convert back to relative length diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/context.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/context.hh index 79fbe1eff6..abe1701de1 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/context.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/context.hh @@ -4,15 +4,17 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ +/// \file context.hh +/// \brief Objects for describing the context around the parsing of an instruction by the SLEIGH engine #ifndef __CONTEXT_HH__ #define __CONTEXT_HH__ @@ -21,184 +23,272 @@ namespace ghidra { -class Token { // A multiple-byte sized chunk of pattern in a bitstream - string name; - int4 size; // Number of bytes in token; - int4 index; // Index of this token, for resolving offsets - bool bigendian; +/// \brief A multiple-byte sized chunk of pattern in the instruction byte stream +class Token { + string name; ///< Name of the token + int4 size; ///< Number of bytes in token + int4 index; ///< Index of \b this token, for resolving offsets + bool bigendian; ///< Set to \b true if encodings within \b this token are big endian public: - Token(const string &nm,int4 sz,bool be,int4 ind) : name(nm) { size = sz; bigendian=be; index = ind; } - int4 getSize(void) const { return size; } - bool isBigEndian(void) const { return bigendian; } - int4 getIndex(void) const { return index; } - const string &getName(void) const { return name; } + Token(const string &nm,int4 sz,bool be,int4 ind) : name(nm) { size = sz; bigendian=be; index = ind; } ///< Constructor + int4 getSize(void) const { return size; } ///< Get the size in bytes + bool isBigEndian(void) const { return bigendian; } ///< Return \b true if encodings within \b this are big endian + int4 getIndex(void) const { return index; } ///< Get the index associated with \b this token + const string &getName(void) const { return name; } ///< Get the name of the token }; -struct FixedHandle { // A handle that is fully resolved - AddrSpace *space; - uint4 size; - AddrSpace *offset_space; // Either null or where dynamic offset is stored - uintb offset_offset; // Either static offset or ptr offset - uint4 offset_size; // Size of pointer - AddrSpace *temp_space; // Consistent temporary location for value - uintb temp_offset; +/// \brief A resolved version of (or pointer to) a SLEIGH defined Varnode +/// +/// For a static Varnode, this is the triple (address space, offset, size) for the Varnode. +/// For a dynamic Varnode, this also encodes the pointer Varnode containing the dynamic offset +/// and a temporary storage location for the dereferenced value. +struct FixedHandle { + AddrSpace *space; ///< The address space of the Varnode + uint4 size; ///< Number of bytes in the Varnode + AddrSpace *offset_space; ///< Null \e or the space where the dynamic offset is stored + uintb offset_offset; ///< The offset for the static Varnode \e or the offset for the pointer + uint4 offset_size; ///< Size of pointer + AddrSpace *temp_space; ///< Address space for temporary location for value + uintb temp_offset; ///< Offset of the temporary location }; class Constructor; + +/// \brief A node in a tree of subconstructors +/// +/// This knows its position in the tree (parent node, child nodes) and the underlying SLEIGH constructor that was matched. +/// Child nodes correspond to the operands for the specific constructor. struct ConstructState { - Constructor *ct; - FixedHandle hand; - vector resolve; - ConstructState *parent; - int4 length; // Length of this instantiation of the constructor - uint4 offset; // Absolute offset (from start of instruction) + Constructor *ct; ///< The matched Constructor + FixedHandle hand; ///< Resolved Varnode associated with the Constructor + ConstructState **resolve; ///< An array of pointers to child nodes + ConstructState *parent; ///< Pointer to parent node + int4 length; ///< Length of this instantiation of the constructor + uint4 offset; ///< Absolute offset (from start of instruction) + ConstructState(void); ///< Construct a node with no children + ConstructState(int4 numOperands); ///< Construct a node with given number of possible children + ~ConstructState(void); ///< Destructor }; class TripleSymbol; -struct ContextSet { // Instructions for setting a global context value - TripleSymbol *sym; // Resolves to address where setting takes effect - ConstructState *point; // Point at which context set was made - int4 num; // Number of context word affected - uintm mask; // Bits within word affected - uintm value; // New setting for bits - bool flow; // Does the new context flow from its set point + +/// \brief Command for globally setting a formal SLEIGH context value +struct ContextSet { + TripleSymbol *sym; ///< Symbol resolving to address where setting takes effect + ConstructState *point; ///< Point at which context set was made + int4 num; ///< Index of the specific context word affected + uintm mask; ///< Bits within word affected + uintm value; ///< New setting for bits + bool flow; ///< Does the new context flow from its set point }; class ParserWalker; // Forward declaration class ParserWalkerChange; class Translate; +/// \brief Context maintained while parsing a single instruction +/// +/// This contains: +/// - the bytes encoding the instruction +/// - the tree structure of the SLEIGH Constructors encountered while parsing the instruction +/// - any formal named SLEIGH context values referenced by the instruction class ParserContext { friend class ParserWalker; friend class ParserWalkerChange; public: - enum { // Possible states of the ParserContext - uninitialized = 0, // Instruction has not been parsed at all - disassembly = 1, // Instruction is parsed in preparation for disassembly - pcode = 2 // Instruction is parsed in preparation for generating p-code + static constexpr int4 MAX_DEPTH = 32; ///< Maximum subconstructor depth in a single instruction + static constexpr int4 MAX_OPERAND = 20; ///< Maximum operands for a single constructor + static constexpr int4 MAX_INSTRUCTION_LEN = 16; ///< Maximum number of bytes in a single instruction + static constexpr int4 INITIAL_STATE_NUM = 64; ///< Recommended number of initial states + static constexpr int4 STATE_GROWTH = 64; ///< Number of states to add for each expansion + /// \brief Possible states of the ParserContext + enum parse_state { + uninitialized = 0, ///< Instruction has not been parsed at all + disassembly = 1, ///< Instruction is parsed in preparation for disassembly + pcode = 2 ///< Instruction is parsed in preparation for generating p-code }; private: - Translate *translate; // Instruction parser - int4 parsestate; - AddrSpace *const_space; - uint1 buf[16]; // Buffer of bytes in the instruction stream - uintm *context; // Pointer to local context - int4 contextsize; // Number of entries in context array - ContextCache *contcache; // Interface for getting/setting context - vector contextcommit; - Address addr; // Address of start of instruction - Address naddr; // Address of next instruction - mutable Address n2addr; // Address of instruction after the next - Address calladdr; // For injections, this is the address of the call being overridden - vector state; // Current resolved instruction - ConstructState *base_state; - int4 alloc; // Number of ConstructState's allocated - int4 delayslot; // delayslot depth + Translate *translate; ///< The parent instruction parser + parse_state parsestate; ///< Overall state of the parse + AddrSpace *const_space; ///< Address space for constants + uint1 buf[MAX_INSTRUCTION_LEN]; ///< Buffer of bytes in the instruction stream + uintm *context; ///< Pointer to local context + int4 contextsize; ///< Number of entries in local context array + ContextCache *contcache; ///< Interface for getting/setting context + vector contextcommit; ///< Changes to SLEIGH context slated by this instruction + Address addr; ///< Address of start of instruction + Address naddr; ///< Address of next instruction + mutable Address n2addr; ///< Address of instruction after the next + Address calladdr; ///< For injections, this is the address of the call being overridden + vector state; ///< Available nodes for the constructor tree + ConstructState *base_state; ///< Root node of the constructor tree + int4 alloc; ///< Number of unallocated ConstructState nodes remaining + int4 delayslot; ///< delayslot depth public: - ParserContext(ContextCache *ccache,Translate *trans); - ~ParserContext(void) { if (context != (uintm *)0) delete [] context; } - uint1 *getBuffer(void) { return buf; } - void initialize(int4 maxstate,int4 maxparam,AddrSpace *spc); - int4 getParserState(void) const { return parsestate; } - void setParserState(int4 st) { parsestate = st; } - void deallocateState(ParserWalkerChange &walker); - void allocateOperand(int4 i,ParserWalkerChange &walker); - void setAddr(const Address &ad) { addr = ad; n2addr = Address(); } - void setNaddr(const Address &ad) { naddr = ad; } - void setCalladdr(const Address &ad) { calladdr = ad; } - void addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point); - void clearCommits(void) { contextcommit.clear(); } - void applyCommits(void); - const Address &getAddr(void) const { return addr; } - const Address &getNaddr(void) const { return naddr; } - const Address &getN2addr(void) const; - const Address &getDestAddr(void) const { return calladdr; } - const Address &getRefAddr(void) const { return calladdr; } - AddrSpace *getCurSpace(void) const { return addr.getSpace(); } - AddrSpace *getConstSpace(void) const { return const_space; } - uintm getInstructionBytes(int4 byteoff,int4 numbytes,uint4 off) const; - uintm getContextBytes(int4 byteoff,int4 numbytes) const; - uintm getInstructionBits(int4 startbit,int4 size,uint4 off) const; - uintm getContextBits(int4 startbit,int4 size) const; - void setContextWord(int4 i,uintm val,uintm mask) { context[i] = (context[i]&(~mask))|(mask&val); } - void loadContext(void) { contcache->getContext(addr,context); } - int4 getLength(void) const { return base_state->length; } - void setDelaySlot(int4 val) { delayslot = val; } - int4 getDelaySlot(void) const { return delayslot; } + ParserContext(ContextCache *ccache,Translate *trans); ///< Constructor + ~ParserContext(void); ///< Destructor + uint1 *getBuffer(void) { return buf; } ///< Get bytes in the stream at the point this instruction is encoded + void initialize(AddrSpace *spc,int4 maxstate = INITIAL_STATE_NUM); ///< Preallocate nodes for constructor trees + parse_state getParserState(void) const { return parsestate; } ///< Get the overall state of the parse + void setParserState(parse_state st) { parsestate = st; } ///< Update the overall parse state + void deallocateState(ParserWalkerChange &walker); ///< Clear any existing constructor tree + void allocateOperand(int4 i,ParserWalkerChange &walker); ///< Allocate a new child node in the constructor tree + void setAddr(const Address &ad) { addr = ad; n2addr = Address(); } ///< Set the starting address of the instruction + void setNaddr(const Address &ad) { naddr = ad; } ///< Set the ending address of the instruction + void setCalladdr(const Address &ad) { calladdr = ad; } ///< Set the address of the call being overridden + void addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point); ///< Add a formal SLEIGH context change command + void clearCommits(void) { contextcommit.clear(); } ///< Clear all context commits + void applyCommits(void); ///< Apply any pending commits to the context cache + const Address &getAddr(void) const { return addr; } ///< Get the starting address of the current instruction + const Address &getNaddr(void) const { return naddr; } ///< Get the address of the next instruction + const Address &getN2addr(void) const; ///< Get the address of the instruction after the next + const Address &getDestAddr(void) const { return calladdr; } ///< Get the destination address (inst_dest) for the overriden call + const Address &getRefAddr(void) const { return calladdr; } ///< Get the reference address (inst_ref) for the p-code snippet + AddrSpace *getCurSpace(void) const { return addr.getSpace(); } ///< Get the address space of the current instruction + AddrSpace *getConstSpace(void) const { return const_space; } ///< Get the address space for constants + uintm getInstructionBytes(int4 byteoff,int4 numbytes,uint4 off) const; ///< Get the specified instruction bytes + uintm getContextBytes(int4 byteoff,int4 numbytes) const; ///< Get bytes from the local context + uintm getInstructionBits(int4 startbit,int4 size,uint4 off) const; ///< Get the specific range of bits from the instruction stream + uintm getContextBits(int4 startbit,int4 size) const; ///< Get the specific range of bits from the local context + void setContextWord(int4 i,uintm val,uintm mask) { context[i] = (context[i]&(~mask))|(mask&val); } ///< Modify a context word, using given mask and value + void loadContext(void) { contcache->getContext(addr,context); } ///< Pull context words associated with the starting address into the local array + int4 getLength(void) const { return base_state->length; } ///< Get the length of the current instruction + void setDelaySlot(int4 val) { delayslot = val; } ///< Set (the number of instruction bytes) in the delay slot + int4 getDelaySlot(void) const { return delayslot; } ///< Get the number of instruction bytes in the delay slot + void expandState(int4 amount); ///< Expand the number of available nodes for the constructor tree }; - -class ParserWalker { // A class for walking the ParserContext - const ParserContext *const_context; - const ParserContext *cross_context; + +/// \brief A class for walking the constructor tree (ParserContext) +class ParserWalker { +private: + const ParserContext *const_context; ///< Context for the main instruction parse + const ParserContext *cross_context; ///< Context for an additional instruction parse needed to resolve a \e crossbuild protected: - ConstructState *point; // The current node being visited - int4 depth; // Depth of the current node - int4 breadcrumb[32]; // Path of operands from root + ConstructState *point; ///< The current node being visited + int4 depth; ///< Depth of the current node + int4 breadcrumb[ParserContext::MAX_DEPTH]; ///< Path of operands from root public: - ParserWalker(const ParserContext *c) { const_context = c; cross_context = (const ParserContext *)0; } - ParserWalker(const ParserContext *c,const ParserContext *cross) { const_context = c; cross_context = cross; } - const ParserContext *getParserContext(void) const { return const_context; } - void baseState(void) { point = const_context->base_state; depth=0; breadcrumb[0] = 0; } + ParserWalker(const ParserContext *c) { const_context = c; cross_context = (const ParserContext *)0; } ///< Constructor + ParserWalker(const ParserContext *c,const ParserContext *cross) { const_context = c; cross_context = cross; } ///< Constructor for crossbuilds + const ParserContext *getParserContext(void) const { return const_context; } ///< Get the current context + void baseState(void) { point = const_context->base_state; depth=0; breadcrumb[0] = 0; } ///< Initialize for a new walk void setOutOfBandState(Constructor *ct,int4 index,ConstructState *tempstate,const ParserWalker &otherwalker); - bool isState(void) const { return (point != (ConstructState *)0); } - void pushOperand(int4 i) { breadcrumb[depth++] = i+1; point = point->resolve[i]; breadcrumb[depth] = 0; } - void popOperand(void) { point = point->parent; depth-= 1; } + bool isState(void) const { return (point != (ConstructState *)0); } ///< Return \b true if there are more nodes to traverse + void pushOperand(int4 i); ///< Make the indicated child (operand) the current node + void popOperand(void) { point = point->parent; depth-= 1; } ///< Make the parent constructor the current node + uint4 getOffset(int4 i) const { if (i<0) return point->offset; - ConstructState *op=point->resolve[i]; return op->offset + op->length; } - Constructor *getConstructor(void) const { return point->ct; } - int4 getOperand(void) const { return breadcrumb[depth]; } - FixedHandle &getParentHandle(void) { return point->hand; } - const FixedHandle &getFixedHandle(int4 i) const { return point->resolve[i]->hand; } - AddrSpace *getCurSpace(void) const { return const_context->getCurSpace(); } - AddrSpace *getConstSpace(void) const { return const_context->getConstSpace(); } + ConstructState *op=point->resolve[i]; return op->offset + op->length; } ///< Get the byte offset of the indicated operand within the instruction stream + Constructor *getConstructor(void) const { return point->ct; } ///< Get the current constructor + int4 getOperand(void) const { return breadcrumb[depth]; } ///< Get the operand index of the next constructor in the walk + FixedHandle &getParentHandle(void) { return point->hand; } ///< Get the resolved value associated with the current constructor + const FixedHandle &getFixedHandle(int4 i) const { return point->resolve[i]->hand; } ///< Get the resolved value associated with the indicated child operand + AddrSpace *getCurSpace(void) const { return const_context->getCurSpace(); } ///< Get the address space associated with the instruction stream + AddrSpace *getConstSpace(void) const { return const_context->getConstSpace(); } ///< Get the constant address space + + /// \brief Get the starting address of the instruction const Address &getAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getAddr(); } return const_context->getAddr(); } + /// \brief Get the address of the next instruction const Address &getNaddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getNaddr();} return const_context->getNaddr(); } + /// \brief Get the address of the instruction after next const Address &getN2addr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getN2addr();} return const_context->getN2addr(); } + /// \brief Get the reference address (inst_ref) for the p-code snippet const Address &getRefAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getRefAddr();} return const_context->getRefAddr(); } + /// \brief Get the destination address (inst_dest) for the overridden call const Address &getDestAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getDestAddr();} return const_context->getDestAddr(); } - int4 getLength(void) const { return const_context->getLength(); } + + int4 getLength(void) const { return const_context->getLength(); } ///< Get the length of the instruction in bytes + + /// \brief Get packed instruction bytes associated with the current constructor + /// + /// \param byteoff is an offset from the starting point associated with the constructor + /// \param numbytes is the number of bytes to pack + /// \return the packed instruction bytes in big endian encoding uintm getInstructionBytes(int4 byteoff,int4 numbytes) const { return const_context->getInstructionBytes(byteoff,numbytes,point->offset); } + + /// \brief Get packed context bytes from the local context + /// + /// \param byteoff is the offset of the first byte to grab + /// \param numbytes is the number of bytes to grab + /// \return the context bytes in a packed value uintm getContextBytes(int4 byteoff,int4 numbytes) const { return const_context->getContextBytes(byteoff,numbytes); } + + /// \brief Get bits from the instruction stream associated with the current constructor + /// + /// \param startbit is the offset of the first bit (relative to the starting point associated with the constructor) + /// \param size is the number of bits to grab + /// \return the requested range of bits (in the least significant positions and padded out with zero bits) uintm getInstructionBits(int4 startbit,int4 size) const { return const_context->getInstructionBits(startbit,size,point->offset); } - uintm getContextBits(int4 startbit,int4 size) const { + + /// \brief Get a range of bits from the local context + /// + /// \param startbit is the offset of the first bit + /// \param size is the number of bits to return + /// \return the requested range of bits (in the least significant positions and padded out with zero bits) + uintm getContextBits(int4 startbit,int4 size) const { return const_context->getContextBits(startbit,size); } }; -class ParserWalkerChange : public ParserWalker { // Extension to walker that allows for on the fly modifications to tree +/// \brief A walker extension that allows for on the fly modifications to the constructor tree +/// +/// This is used to build the constructor tree as the instruction is parsed (Sleigh::resolve) +class ParserWalkerChange : public ParserWalker { friend class ParserContext; - ParserContext *context; + ParserContext *context; ///< The (currently active) context public: - ParserWalkerChange(ParserContext *c) : ParserWalker(c) { context = c; } - ParserContext *getParserContext(void) { return context; } - ConstructState *getPoint(void) { return point; } - void setOffset(uint4 off) { point->offset = off; } - void setConstructor(Constructor *c) { point->ct = c; } - void setCurrentLength(int4 len) { point->length = len; } - void calcCurrentLength(int4 length,int4 numopers); + ParserWalkerChange(ParserContext *c) : ParserWalker(c) { context = c; } ///< Constructor + ParserContext *getParserContext(void) { return context; } ///< Get the currently active context + ConstructState *getPoint(void) { return point; } ///< Get the current + void setOffset(uint4 off) { point->offset = off; } ///< Get the current node in the constructor tree + void setConstructor(Constructor *c) { point->ct = c; } ///< Set the underlying Constructor for the current node + void setCurrentLength(int4 len) { point->length = len; } ///< Set the length associated with the current constructor + void calcCurrentLength(int4 length,int4 numopers); ///< Calculate the length of the current constructor }; +/// \brief Exception thrown by the SLEIGH engine struct SleighError : public LowlevelError { - SleighError(const string &s) : LowlevelError(s) {} + SleighError(const string &s) : LowlevelError(s) {} ///< Constructor }; +/// The tree is reset to a single root node and the walker is prepared for a new parse +/// \param walker is the walker to initialize for a traversal inline void ParserContext::deallocateState(ParserWalkerChange &walker) { - alloc = 1; + alloc = state.size() - 2; // Number of allocations left walker.context=this; walker.baseState(); } +/// The next available node is linked to the current active node in the walker at the given operand index. +/// The child node becomes the new active node for the walker. The underlying constructor is not yet assigned. +/// \param i is the operand index of the new child +/// \param walker is the walker for the parse inline void ParserContext::allocateOperand(int4 i,ParserWalkerChange &walker) { - ConstructState *opstate = &state[alloc++]; + if (i >= MAX_OPERAND) + throw LowlevelError("SLEIGH parser out of state space"); + if (alloc < 0) + expandState(STATE_GROWTH); + ConstructState *opstate = state[alloc--]; opstate->parent = walker.point; opstate->ct = (Constructor *)0; walker.point->resolve[i] = opstate; + if (walker.depth > MAX_DEPTH-2) + throw LowlevelError("SLEIGH exceeded maximum parse depth"); walker.breadcrumb[walker.depth++] += 1; walker.point = opstate; walker.breadcrumb[walker.depth] = 0; } +/// \param i is the index of child/operand +inline void ParserWalker::pushOperand(int4 i) { + if (depth > ParserContext::MAX_DEPTH-2) + throw LowlevelError("SLEIGH exceeded maximum parse depth"); + breadcrumb[depth++] = i+1; + point = point->resolve[i]; + breadcrumb[depth] = 0; +} + } // End namespace ghidra #endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc index d1c550e535..4cd05df8a5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/inject_sleigh.cc @@ -382,7 +382,7 @@ void PcodeInjectLibrarySleigh::parseInject(InjectPayload *payload) } if (contextCache.pos == (ParserContext *)0) { // Make sure we have a context contextCache.pos = new ParserContext((ContextCache *)0,(Translate *)0); - contextCache.pos->initialize(8,8,slgh->getConstantSpace()); + contextCache.pos->initialize(slgh->getConstantSpace(),8); } PcodeSnippet compiler(slgh); // compiler.clear(); // Not necessary unless we reuse diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.cc index 4a6b0090a6..f3543d67bf 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.cc @@ -453,7 +453,7 @@ void DisassemblyCache::initialize(int4 min,int4 hashsize) hashtable = new ParserContext *[hashsize]; for(int4 i=0;iinitialize(75,20,constspace); + pos->initialize(constspace); list[i] = pos; } ParserContext *pos = list[0]; @@ -587,11 +587,11 @@ void Sleigh::initialize(DocumentStorage &store) /// \param addr is the given address of the instruction /// \param state is the desired parse state. /// \return the parse tree object (ParseContext) -ParserContext *Sleigh::obtainContext(const Address &addr,int4 state) const +ParserContext *Sleigh::obtainContext(const Address &addr,ParserContext::parse_state state) const { ParserContext *pos = discache->getParserContext(addr); - int4 curstate = pos->getParserState(); + ParserContext::parse_state curstate = pos->getParserState(); if (curstate >= state) return pos; if (curstate == ParserContext::uninitialized) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.hh index 32ef8565ef..1c6ebd72c0 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/sleigh.hh @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -167,7 +167,7 @@ class Sleigh : public SleighBase { mutable PcodeCacher pcode_cache; ///< Cache of p-code data just prior to emitting void clearForDelete(void); ///< Delete the context and disassembly caches protected: - ParserContext *obtainContext(const Address &addr,int4 state) const; + ParserContext *obtainContext(const Address &addr,ParserContext::parse_state state) const; void resolve(ParserContext &pos) const; ///< Generate a parse tree suitable for disassembly void resolveHandles(ParserContext &pos) const; ///< Prepare the parse tree for p-code generation public: diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc index 75bebffcb0..451a9d8243 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc @@ -180,30 +180,35 @@ SubtableSymbol *WithBlock::getCurrentSubtable(const list &stack) return (SubtableSymbol *)0; } -void ConsistencyChecker::OptimizeRecord::copyFromExcludingSize(ConsistencyChecker::OptimizeRecord &that) +/// \param records is the list of records to merge +ConsistencyChecker::OptimizeRecord::OptimizeRecord(vector &records) { - this->writeop = that.writeop; - this->readop = that.readop; - this->inslot = that.inslot; - this->writecount = that.writecount; - this->readcount = that.readcount; - this->writesection = that.writesection; - this->readsection = that.readsection; - this->opttype = that.opttype; -} + uintb minOff = -1; + uintb maxOff = -1; + vector::iterator iter; -void ConsistencyChecker::OptimizeRecord::update(int4 opIdx, int4 slotIdx, int4 secNum) - -{ - if (slotIdx >= 0) { - updateRead(opIdx, slotIdx, secNum); + for (iter = records.begin(); iter != records.end(); ++iter) { + if (minOff == -1 || (*iter)->offset < minOff) { + minOff = (*iter)->offset; + } + if (maxOff == -1 || (*iter)->offset + (*iter)->size > maxOff) { + maxOff = (*iter)->offset + (*iter)->size; + } } - else { - updateWrite(opIdx, secNum); + + offset = minOff; + size = maxOff - minOff; + writeop = -1; readop = -1; inslot=-1; writecount=0; readcount=0; writesection=-2; readsection=-2; opttype=-1; + + for (iter = records.begin(); iter != records.end(); ++iter) { + updateCombine(**iter); } } +/// \param i is the index of the op reading the range +/// \param inslot is the input slot of the op reading the range +/// \param secNum is the constructor section number of the op void ConsistencyChecker::OptimizeRecord::updateRead(int4 i, int4 inslot, int4 secNum) { @@ -213,6 +218,8 @@ void ConsistencyChecker::OptimizeRecord::updateRead(int4 i, int4 inslot, int4 se this->readsection = secNum; } +/// \param i is the index of the op writing to the range +/// \param secNum is the constructor section number of the op void ConsistencyChecker::OptimizeRecord::updateWrite(int4 i, int4 secNum) { @@ -221,17 +228,18 @@ void ConsistencyChecker::OptimizeRecord::updateWrite(int4 i, int4 secNum) this->writesection = secNum; } -void ConsistencyChecker::OptimizeRecord::updateExport() +void ConsistencyChecker::OptimizeRecord::updateExport(void) { this->writeop = 0; this->readop = 0; - this->writecount = 2; + this->writecount = 2; // Simulate a high count so the register cannot be optimized away this->readcount = 2; this->readsection = -2; this->writesection = -2; } +/// \param that is the other record to pull read/write info from void ConsistencyChecker::OptimizeRecord::updateCombine(ConsistencyChecker::OptimizeRecord &that) { @@ -1195,6 +1203,8 @@ void ConsistencyChecker::setPostOrder(SubtableSymbol *root) } } +/// \param offset is the given offset +/// \return an iterator to the last record before \b offset or end() if no records come before map::iterator ConsistencyChecker::UniqueState::lesserIter(uintb offset) { @@ -1209,42 +1219,22 @@ map::iterator ConsistencyChecker::Uniq return std::prev(iter); } -ConsistencyChecker::OptimizeRecord ConsistencyChecker::UniqueState::coalesce(vector &records) - -{ - uintb minOff = -1; - uintb maxOff = -1; - vector::iterator iter; - - for (iter = records.begin(); iter != records.end(); ++iter) { - if (minOff == -1 || (*iter)->offset < minOff) { - minOff = (*iter)->offset; - } - if (maxOff == -1 || (*iter)->offset + (*iter)->size > maxOff) { - maxOff = (*iter)->offset + (*iter)->size; - } - } - - OptimizeRecord result(minOff, maxOff - minOff); - - for (iter = records.begin(); iter != records.end(); ++iter) { - result.updateCombine(**iter); - } - - return result; -} - -void ConsistencyChecker::UniqueState::set(uintb offset, int4 size, OptimizeRecord &rec) +/// Any overlaps with the new record are merged, maintaining a disjoint collection of records +/// \param rec is the record to add +void ConsistencyChecker::UniqueState::set(OptimizeRecord &rec) { vector records; - getDefinitions(records, offset, size); + getDefinitions(records, rec.offset, rec.size); records.push_back(&rec); - OptimizeRecord coalesced = coalesce(records); + OptimizeRecord coalesced(records); recs.erase(recs.lower_bound(coalesced.offset), recs.lower_bound(coalesced.offset+coalesced.size)); recs.insert(pair(coalesced.offset, coalesced)); } +/// \param result holds all the overlapping records +/// \param offset is the start of the given range +/// \param size is the number of bytes in the range void ConsistencyChecker::UniqueState::getDefinitions(vector &result, uintb offset, int4 size) { @@ -1396,7 +1386,7 @@ void ConsistencyChecker::examineVn(UniqueState &state, else { OptimizeRecord rec(offset,size); rec.updateWrite(i,secnum); - state.set(offset,size,rec); + state.set(rec); } } @@ -1750,13 +1740,6 @@ void ConsistencyChecker::optimizeAll(void) } } -ostream& operator<<(ostream &os, const ConsistencyChecker::OptimizeRecord &rec) { - os << "{writeop=" << rec.writeop << " readop=" << rec.readop << " inslot=" << rec.inslot << - " writecount=" << rec.writecount << " readcount=" << rec.readcount << - " opttype=" << rec.opttype << "}"; - return os; -} - /// Sort based on the containing Varnode, then on the bit boundary /// \param op2 is a field to compare with \b this /// \return \b true if \b this should be sorted before the other field diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.hh index a193c7dbdb..204008f3cb 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.hh @@ -158,30 +158,32 @@ public: mutable int4 opttype; ///< 0 = register read by a COPY, 1 = register written by a COPY (-1 otherwise) /// \brief Construct a record, initializing counts + /// + /// \param offset is the starting offset of the temporary range + /// \param size is the number of bytes in the range OptimizeRecord(uintb offset, int4 size) { this->offset = offset; this->size = size; writeop = -1; readop = -1; inslot=-1; writecount=0; readcount=0; writesection=-2; readsection=-2; opttype=-1; } - void copyFromExcludingSize(OptimizeRecord &that); - void update(int4 opIdx, int4 slotIdx, int4 secNum); - void updateRead(int4 i, int4 inslot, int4 secNum); - void updateWrite(int4 i, int4 secNum); - void updateExport(); - void updateCombine(OptimizeRecord &that); + OptimizeRecord(vector &records); ///< Constructor merging the given overlapping records into a single record + void updateRead(int4 i, int4 inslot, int4 secNum); ///< Mark \b this range as being read by a specific op + void updateWrite(int4 i, int4 secNum); ///< Mark \b this range as being written by a specific op + void updateExport(void); ///< Mark \b this range as exported from the constructor + void updateCombine(OptimizeRecord &that); ///< Merge another record's read and write references into \b this }; private: + /// \brief Container of OptimizeRecords for possibly overlapping temporary registers class UniqueState { - map recs; - static uintb endOf(map::iterator &iter) { return iter->first + iter->second.size; } - OptimizeRecord coalesce(vector &records); - map::iterator lesserIter(uintb offset); + map recs; ///< Map from unique space offset to OptimizeRecord + static uintb endOf(map::iterator &iter) { return iter->first + iter->second.size; } ///< Get ending offset record + map::iterator lesserIter(uintb offset); ///< Get last record less than the given offset public: - void clear(void) { recs.clear(); } - void set(uintb offset, int4 size, OptimizeRecord &rec); - void getDefinitions(vector &result, uintb offset, int4 size); - map::const_iterator begin(void) const { return recs.begin(); } - map::const_iterator end(void) const { return recs.end(); } + void clear(void) { recs.clear(); } ///< Clear all records + void set(OptimizeRecord &rec); ///< Add a new record to the collection + void getDefinitions(vector &result, uintb offset, int4 size); ///< Get any definitions overlapping the given range + map::const_iterator begin(void) const { return recs.begin(); } ///< Get starting iterator to all records + map::const_iterator end(void) const { return recs.end(); } ///< Get ending iterator to all records }; SleighCompile *compiler; ///< Parsed form of the SLEIGH file being examined @@ -480,8 +482,6 @@ public: int4 run_compilation(const string &filein,const string &fileout); }; -ostream& operator<<(ostream &os, const ConsistencyChecker::OptimizeRecord &rec); - extern SleighCompile *slgh; ///< A global reference to the SLEIGH compiler accessible to the parse functions extern int yydebug; ///< Debug state for the SLEIGH parse functions