GP-6554 Refactor ConstructState allocation in ParserContext

This commit is contained in:
caheckman
2026-03-06 23:37:48 +00:00
parent a7a795b335
commit 5328fa2c6d
8 changed files with 374 additions and 217 deletions

View File

@@ -457,7 +457,7 @@ RECURSIVE = NO
# excluded from the INPUT source files. This way you can easily exclude a
# subdirectory from a directory tree whose root is specified with the INPUT tag.
EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc context.hh context.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc
EXCLUDE = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc
# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
# directories that are symbolic links (a Unix filesystem feature) are excluded

View File

@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,6 +19,39 @@
namespace ghidra {
ConstructState::ConstructState(void)
{
ct = (Constructor *)0;
resolve = (ConstructState **)0;
parent = (ConstructState *)0;
length = 0;
offset = 0;
}
/// The array holding pointers to child nodes is preallocated.
/// \param numOperands is maximum number of children this node can have
ConstructState::ConstructState(int4 numOperands)
{
ct = (Constructor *)0;
parent = (ConstructState *)0;
length = 0;
offset = 0;
resolve = new ConstructState *[numOperands];
for(int4 i=0;i<numOperands;++i)
resolve[i] = (ConstructState *)0;
}
ConstructState::~ConstructState(void)
{
if (resolve != (ConstructState **)0)
delete [] resolve;
}
/// \param ccache is the cache to use for formal context changes
/// \param trans is the parent parser
ParserContext::ParserContext(ContextCache *ccache,Translate *trans)
{
@@ -35,15 +68,25 @@ ParserContext::ParserContext(ContextCache *ccache,Translate *trans)
}
}
void ParserContext::initialize(int4 maxstate,int4 maxparam,AddrSpace *spc)
ParserContext::~ParserContext(void)
{
if (context != (uintm *)0)
delete [] context;
for(int4 i=0;i<state.size();++i)
delete state[i];
}
/// \param spc is the address space used for constants
/// \param maxstate is the number of nodes to allocate (initially)
void ParserContext::initialize(AddrSpace *spc,int4 maxstate)
{
const_space = spc;
state.resize(maxstate);
state[0].parent = (ConstructState *)0;
for(int4 i=0;i<maxstate;++i)
state[i].resolve.resize(maxparam);
base_state = &state[0];
state[i] = new ConstructState(MAX_OPERAND);
base_state = state[maxstate-1];
}
const Address &ParserContext::getN2addr(void) const
@@ -58,13 +101,17 @@ const Address &ParserContext::getN2addr(void) const
return n2addr;
}
/// Get bytes from the instruction stream into a packed value assuming a big endian encoding.
/// \param bytestart is the number of bytes to skip
/// \param size is the number of bytes to pack
/// \param off is the number of bytes in the instruction already read
/// \return the packed bytes from the instruction
uintm ParserContext::getInstructionBytes(int4 bytestart,int4 size,uint4 off) const
{ // Get bytes from the instruction stream into a intm
// (assuming big endian format)
{
off += bytestart;
if (off >=16)
throw BadDataError("Instruction is using more than 16 bytes");
if (off >= MAX_INSTRUCTION_LEN)
throw BadDataError("Instruction is using more than " + to_string(MAX_INSTRUCTION_LEN) + " bytes");
const uint1 *ptr = buf + off;
uintm res = 0;
for(int4 i=0;i<size;++i) {
@@ -74,12 +121,17 @@ uintm ParserContext::getInstructionBytes(int4 bytestart,int4 size,uint4 off) con
return res;
}
/// Get bits from the instruction stream assuming big endian encoding.
/// \param startbit is the offset of the first bit (within the instruction stream)
/// \param size is the number of bits to grab
/// \param off is the number of bytes in the instruction already read
/// \return the requested range of bits (in the least significant positions and padded out with zero bits)
uintm ParserContext::getInstructionBits(int4 startbit,int4 size,uint4 off) const
{
off += (startbit/8);
if (off >= 16)
throw BadDataError("Instruction is using more than 16 bytes");
if (off >= MAX_INSTRUCTION_LEN)
throw BadDataError("Instruction is using more than " + to_string(MAX_INSTRUCTION_LEN) + " bytes");
const uint1 *ptr = buf + off;
startbit = startbit % 8;
int4 bytesize = (startbit+size-1)/8 + 1;
@@ -93,9 +145,12 @@ uintm ParserContext::getInstructionBits(int4 startbit,int4 size,uint4 off) const
return res;
}
/// \param bytestart is the offset of the first byte to grab
/// \param size is the number of bytes to grab
/// \return the context bytes in a packed value
uintm ParserContext::getContextBytes(int4 bytestart,int4 size) const
{ // Get bytes from context into a uintm
{
int4 intstart = bytestart / sizeof(uintm);
uintm res = context[ intstart ];
int4 byteOffset = bytestart % sizeof(uintm);
@@ -112,6 +167,9 @@ uintm ParserContext::getContextBytes(int4 bytestart,int4 size) const
return res;
}
/// \param startbit is the offset of the first bit
/// \param size is the number of bits to return
/// \return the requested range of bits (in the least significant positions and padded out with zero bits)
uintm ParserContext::getContextBits(int4 startbit,int4 size) const
{
@@ -131,6 +189,11 @@ uintm ParserContext::getContextBits(int4 startbit,int4 size) const
return res;
}
/// \param sym is a symbol that resolves to the address where the setting takes effect
/// \param num is the index of the context word being affected
/// \param mask indicates the bits within the context word that are affected
/// \param flow is \b true if the context change \e flows forward from the point where it is set
/// \param point is the parse point where the change was made
void ParserContext::addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point)
{
@@ -191,9 +254,29 @@ void ParserContext::applyCommits(void)
}
}
/// This can be called in the middle of a parse to accommodate larger constructor trees.
/// \param amount is the number of additional nodes to add
void ParserContext::expandState(int4 amount)
{
state.insert(state.begin(),amount,(ConstructState *)0);
for(int4 i=0;i<amount;++i)
state[i] = new ConstructState(MAX_OPERAND);
alloc += amount;
}
/// \brief Initialize \b this from another walker assuming a given constructor and operand is the current position in the walk
///
/// The constructor tree state is simulated using only a single provided node.
/// This allows TokenField to behave as if it were just parsed so its getValue() will return the correct value.
/// \param ct is the given constructor
/// \param index is the index of the operand
/// \param tempstate is provided storage used to simulate the mid-walk tree node
/// \param otherwalker is the walker with the complete parse state
void ParserWalker::setOutOfBandState(Constructor *ct,int4 index,ConstructState *tempstate,const ParserWalker &otherwalker)
{ // Initialize walker for future calls into getInstructionBytes assuming -ct- is the current position in the walk
{
const ConstructState *pt = otherwalker.point;
int4 curdepth = otherwalker.depth;
while(pt->ct != ct) {
@@ -220,17 +303,18 @@ void ParserWalker::setOutOfBandState(Constructor *ct,int4 index,ConstructState *
breadcrumb[0] = 0;
}
/// This assumes all the current nodes operands have been parsed into the tree.
/// \param length is the minimum length of the current constructor
/// \param numopers is the number of operands
void ParserWalkerChange::calcCurrentLength(int4 length,int4 numopers)
{ // Calculate the length of the current constructor
// state assuming all its operands are constructed
{
length += point->offset; // Convert relative length to absolute length
for(int4 i=0;i<numopers;++i) {
ConstructState *subpoint = point->resolve[i];
int4 sublength = subpoint->length + subpoint->offset;
// Since subpoint->offset is an absolute offset
// (relative to beginning of instruction) sublength
if (sublength > length) // is absolute and must be compared to absolute length
// Since subpoint->offset is an absolutee (relative to beginning of instruction)
if (sublength > length) // sublength is absolute and must be compared to absolute length
length = sublength;
}
point->length = length - point->offset; // Convert back to relative length

View File

@@ -4,15 +4,17 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// \file context.hh
/// \brief Objects for describing the context around the parsing of an instruction by the SLEIGH engine
#ifndef __CONTEXT_HH__
#define __CONTEXT_HH__
@@ -21,184 +23,272 @@
namespace ghidra {
class Token { // A multiple-byte sized chunk of pattern in a bitstream
string name;
int4 size; // Number of bytes in token;
int4 index; // Index of this token, for resolving offsets
bool bigendian;
/// \brief A multiple-byte sized chunk of pattern in the instruction byte stream
class Token {
string name; ///< Name of the token
int4 size; ///< Number of bytes in token
int4 index; ///< Index of \b this token, for resolving offsets
bool bigendian; ///< Set to \b true if encodings within \b this token are big endian
public:
Token(const string &nm,int4 sz,bool be,int4 ind) : name(nm) { size = sz; bigendian=be; index = ind; }
int4 getSize(void) const { return size; }
bool isBigEndian(void) const { return bigendian; }
int4 getIndex(void) const { return index; }
const string &getName(void) const { return name; }
Token(const string &nm,int4 sz,bool be,int4 ind) : name(nm) { size = sz; bigendian=be; index = ind; } ///< Constructor
int4 getSize(void) const { return size; } ///< Get the size in bytes
bool isBigEndian(void) const { return bigendian; } ///< Return \b true if encodings within \b this are big endian
int4 getIndex(void) const { return index; } ///< Get the index associated with \b this token
const string &getName(void) const { return name; } ///< Get the name of the token
};
struct FixedHandle { // A handle that is fully resolved
AddrSpace *space;
uint4 size;
AddrSpace *offset_space; // Either null or where dynamic offset is stored
uintb offset_offset; // Either static offset or ptr offset
uint4 offset_size; // Size of pointer
AddrSpace *temp_space; // Consistent temporary location for value
uintb temp_offset;
/// \brief A resolved version of (or pointer to) a SLEIGH defined Varnode
///
/// For a static Varnode, this is the triple (address space, offset, size) for the Varnode.
/// For a dynamic Varnode, this also encodes the pointer Varnode containing the dynamic offset
/// and a temporary storage location for the dereferenced value.
struct FixedHandle {
AddrSpace *space; ///< The address space of the Varnode
uint4 size; ///< Number of bytes in the Varnode
AddrSpace *offset_space; ///< Null \e or the space where the dynamic offset is stored
uintb offset_offset; ///< The offset for the static Varnode \e or the offset for the pointer
uint4 offset_size; ///< Size of pointer
AddrSpace *temp_space; ///< Address space for temporary location for value
uintb temp_offset; ///< Offset of the temporary location
};
class Constructor;
/// \brief A node in a tree of subconstructors
///
/// This knows its position in the tree (parent node, child nodes) and the underlying SLEIGH constructor that was matched.
/// Child nodes correspond to the operands for the specific constructor.
struct ConstructState {
Constructor *ct;
FixedHandle hand;
vector<ConstructState *> resolve;
ConstructState *parent;
int4 length; // Length of this instantiation of the constructor
uint4 offset; // Absolute offset (from start of instruction)
Constructor *ct; ///< The matched Constructor
FixedHandle hand; ///< Resolved Varnode associated with the Constructor
ConstructState **resolve; ///< An array of pointers to child nodes
ConstructState *parent; ///< Pointer to parent node
int4 length; ///< Length of this instantiation of the constructor
uint4 offset; ///< Absolute offset (from start of instruction)
ConstructState(void); ///< Construct a node with no children
ConstructState(int4 numOperands); ///< Construct a node with given number of possible children
~ConstructState(void); ///< Destructor
};
class TripleSymbol;
struct ContextSet { // Instructions for setting a global context value
TripleSymbol *sym; // Resolves to address where setting takes effect
ConstructState *point; // Point at which context set was made
int4 num; // Number of context word affected
uintm mask; // Bits within word affected
uintm value; // New setting for bits
bool flow; // Does the new context flow from its set point
/// \brief Command for globally setting a formal SLEIGH context value
struct ContextSet {
TripleSymbol *sym; ///< Symbol resolving to address where setting takes effect
ConstructState *point; ///< Point at which context set was made
int4 num; ///< Index of the specific context word affected
uintm mask; ///< Bits within word affected
uintm value; ///< New setting for bits
bool flow; ///< Does the new context flow from its set point
};
class ParserWalker; // Forward declaration
class ParserWalkerChange;
class Translate;
/// \brief Context maintained while parsing a single instruction
///
/// This contains:
/// - the bytes encoding the instruction
/// - the tree structure of the SLEIGH Constructors encountered while parsing the instruction
/// - any formal named SLEIGH context values referenced by the instruction
class ParserContext {
friend class ParserWalker;
friend class ParserWalkerChange;
public:
enum { // Possible states of the ParserContext
uninitialized = 0, // Instruction has not been parsed at all
disassembly = 1, // Instruction is parsed in preparation for disassembly
pcode = 2 // Instruction is parsed in preparation for generating p-code
static constexpr int4 MAX_DEPTH = 32; ///< Maximum subconstructor depth in a single instruction
static constexpr int4 MAX_OPERAND = 20; ///< Maximum operands for a single constructor
static constexpr int4 MAX_INSTRUCTION_LEN = 16; ///< Maximum number of bytes in a single instruction
static constexpr int4 INITIAL_STATE_NUM = 64; ///< Recommended number of initial states
static constexpr int4 STATE_GROWTH = 64; ///< Number of states to add for each expansion
/// \brief Possible states of the ParserContext
enum parse_state {
uninitialized = 0, ///< Instruction has not been parsed at all
disassembly = 1, ///< Instruction is parsed in preparation for disassembly
pcode = 2 ///< Instruction is parsed in preparation for generating p-code
};
private:
Translate *translate; // Instruction parser
int4 parsestate;
AddrSpace *const_space;
uint1 buf[16]; // Buffer of bytes in the instruction stream
uintm *context; // Pointer to local context
int4 contextsize; // Number of entries in context array
ContextCache *contcache; // Interface for getting/setting context
vector<ContextSet> contextcommit;
Address addr; // Address of start of instruction
Address naddr; // Address of next instruction
mutable Address n2addr; // Address of instruction after the next
Address calladdr; // For injections, this is the address of the call being overridden
vector<ConstructState> state; // Current resolved instruction
ConstructState *base_state;
int4 alloc; // Number of ConstructState's allocated
int4 delayslot; // delayslot depth
Translate *translate; ///< The parent instruction parser
parse_state parsestate; ///< Overall state of the parse
AddrSpace *const_space; ///< Address space for constants
uint1 buf[MAX_INSTRUCTION_LEN]; ///< Buffer of bytes in the instruction stream
uintm *context; ///< Pointer to local context
int4 contextsize; ///< Number of entries in local context array
ContextCache *contcache; ///< Interface for getting/setting context
vector<ContextSet> contextcommit; ///< Changes to SLEIGH context slated by this instruction
Address addr; ///< Address of start of instruction
Address naddr; ///< Address of next instruction
mutable Address n2addr; ///< Address of instruction after the next
Address calladdr; ///< For injections, this is the address of the call being overridden
vector<ConstructState *> state; ///< Available nodes for the constructor tree
ConstructState *base_state; ///< Root node of the constructor tree
int4 alloc; ///< Number of unallocated ConstructState nodes remaining
int4 delayslot; ///< delayslot depth
public:
ParserContext(ContextCache *ccache,Translate *trans);
~ParserContext(void) { if (context != (uintm *)0) delete [] context; }
uint1 *getBuffer(void) { return buf; }
void initialize(int4 maxstate,int4 maxparam,AddrSpace *spc);
int4 getParserState(void) const { return parsestate; }
void setParserState(int4 st) { parsestate = st; }
void deallocateState(ParserWalkerChange &walker);
void allocateOperand(int4 i,ParserWalkerChange &walker);
void setAddr(const Address &ad) { addr = ad; n2addr = Address(); }
void setNaddr(const Address &ad) { naddr = ad; }
void setCalladdr(const Address &ad) { calladdr = ad; }
void addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point);
void clearCommits(void) { contextcommit.clear(); }
void applyCommits(void);
const Address &getAddr(void) const { return addr; }
const Address &getNaddr(void) const { return naddr; }
const Address &getN2addr(void) const;
const Address &getDestAddr(void) const { return calladdr; }
const Address &getRefAddr(void) const { return calladdr; }
AddrSpace *getCurSpace(void) const { return addr.getSpace(); }
AddrSpace *getConstSpace(void) const { return const_space; }
uintm getInstructionBytes(int4 byteoff,int4 numbytes,uint4 off) const;
uintm getContextBytes(int4 byteoff,int4 numbytes) const;
uintm getInstructionBits(int4 startbit,int4 size,uint4 off) const;
uintm getContextBits(int4 startbit,int4 size) const;
void setContextWord(int4 i,uintm val,uintm mask) { context[i] = (context[i]&(~mask))|(mask&val); }
void loadContext(void) { contcache->getContext(addr,context); }
int4 getLength(void) const { return base_state->length; }
void setDelaySlot(int4 val) { delayslot = val; }
int4 getDelaySlot(void) const { return delayslot; }
ParserContext(ContextCache *ccache,Translate *trans); ///< Constructor
~ParserContext(void); ///< Destructor
uint1 *getBuffer(void) { return buf; } ///< Get bytes in the stream at the point this instruction is encoded
void initialize(AddrSpace *spc,int4 maxstate = INITIAL_STATE_NUM); ///< Preallocate nodes for constructor trees
parse_state getParserState(void) const { return parsestate; } ///< Get the overall state of the parse
void setParserState(parse_state st) { parsestate = st; } ///< Update the overall parse state
void deallocateState(ParserWalkerChange &walker); ///< Clear any existing constructor tree
void allocateOperand(int4 i,ParserWalkerChange &walker); ///< Allocate a new child node in the constructor tree
void setAddr(const Address &ad) { addr = ad; n2addr = Address(); } ///< Set the starting address of the instruction
void setNaddr(const Address &ad) { naddr = ad; } ///< Set the ending address of the instruction
void setCalladdr(const Address &ad) { calladdr = ad; } ///< Set the address of the call being overridden
void addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point); ///< Add a formal SLEIGH context change command
void clearCommits(void) { contextcommit.clear(); } ///< Clear all context commits
void applyCommits(void); ///< Apply any pending commits to the context cache
const Address &getAddr(void) const { return addr; } ///< Get the starting address of the current instruction
const Address &getNaddr(void) const { return naddr; } ///< Get the address of the next instruction
const Address &getN2addr(void) const; ///< Get the address of the instruction after the next
const Address &getDestAddr(void) const { return calladdr; } ///< Get the destination address (inst_dest) for the overriden call
const Address &getRefAddr(void) const { return calladdr; } ///< Get the reference address (inst_ref) for the p-code snippet
AddrSpace *getCurSpace(void) const { return addr.getSpace(); } ///< Get the address space of the current instruction
AddrSpace *getConstSpace(void) const { return const_space; } ///< Get the address space for constants
uintm getInstructionBytes(int4 byteoff,int4 numbytes,uint4 off) const; ///< Get the specified instruction bytes
uintm getContextBytes(int4 byteoff,int4 numbytes) const; ///< Get bytes from the local context
uintm getInstructionBits(int4 startbit,int4 size,uint4 off) const; ///< Get the specific range of bits from the instruction stream
uintm getContextBits(int4 startbit,int4 size) const; ///< Get the specific range of bits from the local context
void setContextWord(int4 i,uintm val,uintm mask) { context[i] = (context[i]&(~mask))|(mask&val); } ///< Modify a context word, using given mask and value
void loadContext(void) { contcache->getContext(addr,context); } ///< Pull context words associated with the starting address into the local array
int4 getLength(void) const { return base_state->length; } ///< Get the length of the current instruction
void setDelaySlot(int4 val) { delayslot = val; } ///< Set (the number of instruction bytes) in the delay slot
int4 getDelaySlot(void) const { return delayslot; } ///< Get the number of instruction bytes in the delay slot
void expandState(int4 amount); ///< Expand the number of available nodes for the constructor tree
};
class ParserWalker { // A class for walking the ParserContext
const ParserContext *const_context;
const ParserContext *cross_context;
/// \brief A class for walking the constructor tree (ParserContext)
class ParserWalker {
private:
const ParserContext *const_context; ///< Context for the main instruction parse
const ParserContext *cross_context; ///< Context for an additional instruction parse needed to resolve a \e crossbuild
protected:
ConstructState *point; // The current node being visited
int4 depth; // Depth of the current node
int4 breadcrumb[32]; // Path of operands from root
ConstructState *point; ///< The current node being visited
int4 depth; ///< Depth of the current node
int4 breadcrumb[ParserContext::MAX_DEPTH]; ///< Path of operands from root
public:
ParserWalker(const ParserContext *c) { const_context = c; cross_context = (const ParserContext *)0; }
ParserWalker(const ParserContext *c,const ParserContext *cross) { const_context = c; cross_context = cross; }
const ParserContext *getParserContext(void) const { return const_context; }
void baseState(void) { point = const_context->base_state; depth=0; breadcrumb[0] = 0; }
ParserWalker(const ParserContext *c) { const_context = c; cross_context = (const ParserContext *)0; } ///< Constructor
ParserWalker(const ParserContext *c,const ParserContext *cross) { const_context = c; cross_context = cross; } ///< Constructor for crossbuilds
const ParserContext *getParserContext(void) const { return const_context; } ///< Get the current context
void baseState(void) { point = const_context->base_state; depth=0; breadcrumb[0] = 0; } ///< Initialize for a new walk
void setOutOfBandState(Constructor *ct,int4 index,ConstructState *tempstate,const ParserWalker &otherwalker);
bool isState(void) const { return (point != (ConstructState *)0); }
void pushOperand(int4 i) { breadcrumb[depth++] = i+1; point = point->resolve[i]; breadcrumb[depth] = 0; }
void popOperand(void) { point = point->parent; depth-= 1; }
bool isState(void) const { return (point != (ConstructState *)0); } ///< Return \b true if there are more nodes to traverse
void pushOperand(int4 i); ///< Make the indicated child (operand) the current node
void popOperand(void) { point = point->parent; depth-= 1; } ///< Make the parent constructor the current node
uint4 getOffset(int4 i) const { if (i<0) return point->offset;
ConstructState *op=point->resolve[i]; return op->offset + op->length; }
Constructor *getConstructor(void) const { return point->ct; }
int4 getOperand(void) const { return breadcrumb[depth]; }
FixedHandle &getParentHandle(void) { return point->hand; }
const FixedHandle &getFixedHandle(int4 i) const { return point->resolve[i]->hand; }
AddrSpace *getCurSpace(void) const { return const_context->getCurSpace(); }
AddrSpace *getConstSpace(void) const { return const_context->getConstSpace(); }
ConstructState *op=point->resolve[i]; return op->offset + op->length; } ///< Get the byte offset of the indicated operand within the instruction stream
Constructor *getConstructor(void) const { return point->ct; } ///< Get the current constructor
int4 getOperand(void) const { return breadcrumb[depth]; } ///< Get the operand index of the next constructor in the walk
FixedHandle &getParentHandle(void) { return point->hand; } ///< Get the resolved value associated with the current constructor
const FixedHandle &getFixedHandle(int4 i) const { return point->resolve[i]->hand; } ///< Get the resolved value associated with the indicated child operand
AddrSpace *getCurSpace(void) const { return const_context->getCurSpace(); } ///< Get the address space associated with the instruction stream
AddrSpace *getConstSpace(void) const { return const_context->getConstSpace(); } ///< Get the constant address space
/// \brief Get the starting address of the instruction
const Address &getAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getAddr(); } return const_context->getAddr(); }
/// \brief Get the address of the next instruction
const Address &getNaddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getNaddr();} return const_context->getNaddr(); }
/// \brief Get the address of the instruction after next
const Address &getN2addr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getN2addr();} return const_context->getN2addr(); }
/// \brief Get the reference address (inst_ref) for the p-code snippet
const Address &getRefAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getRefAddr();} return const_context->getRefAddr(); }
/// \brief Get the destination address (inst_dest) for the overridden call
const Address &getDestAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getDestAddr();} return const_context->getDestAddr(); }
int4 getLength(void) const { return const_context->getLength(); }
int4 getLength(void) const { return const_context->getLength(); } ///< Get the length of the instruction in bytes
/// \brief Get packed instruction bytes associated with the current constructor
///
/// \param byteoff is an offset from the starting point associated with the constructor
/// \param numbytes is the number of bytes to pack
/// \return the packed instruction bytes in big endian encoding
uintm getInstructionBytes(int4 byteoff,int4 numbytes) const {
return const_context->getInstructionBytes(byteoff,numbytes,point->offset); }
/// \brief Get packed context bytes from the local context
///
/// \param byteoff is the offset of the first byte to grab
/// \param numbytes is the number of bytes to grab
/// \return the context bytes in a packed value
uintm getContextBytes(int4 byteoff,int4 numbytes) const {
return const_context->getContextBytes(byteoff,numbytes); }
/// \brief Get bits from the instruction stream associated with the current constructor
///
/// \param startbit is the offset of the first bit (relative to the starting point associated with the constructor)
/// \param size is the number of bits to grab
/// \return the requested range of bits (in the least significant positions and padded out with zero bits)
uintm getInstructionBits(int4 startbit,int4 size) const {
return const_context->getInstructionBits(startbit,size,point->offset); }
uintm getContextBits(int4 startbit,int4 size) const {
/// \brief Get a range of bits from the local context
///
/// \param startbit is the offset of the first bit
/// \param size is the number of bits to return
/// \return the requested range of bits (in the least significant positions and padded out with zero bits)
uintm getContextBits(int4 startbit,int4 size) const {
return const_context->getContextBits(startbit,size); }
};
class ParserWalkerChange : public ParserWalker { // Extension to walker that allows for on the fly modifications to tree
/// \brief A walker extension that allows for on the fly modifications to the constructor tree
///
/// This is used to build the constructor tree as the instruction is parsed (Sleigh::resolve)
class ParserWalkerChange : public ParserWalker {
friend class ParserContext;
ParserContext *context;
ParserContext *context; ///< The (currently active) context
public:
ParserWalkerChange(ParserContext *c) : ParserWalker(c) { context = c; }
ParserContext *getParserContext(void) { return context; }
ConstructState *getPoint(void) { return point; }
void setOffset(uint4 off) { point->offset = off; }
void setConstructor(Constructor *c) { point->ct = c; }
void setCurrentLength(int4 len) { point->length = len; }
void calcCurrentLength(int4 length,int4 numopers);
ParserWalkerChange(ParserContext *c) : ParserWalker(c) { context = c; } ///< Constructor
ParserContext *getParserContext(void) { return context; } ///< Get the currently active context
ConstructState *getPoint(void) { return point; } ///< Get the current
void setOffset(uint4 off) { point->offset = off; } ///< Get the current node in the constructor tree
void setConstructor(Constructor *c) { point->ct = c; } ///< Set the underlying Constructor for the current node
void setCurrentLength(int4 len) { point->length = len; } ///< Set the length associated with the current constructor
void calcCurrentLength(int4 length,int4 numopers); ///< Calculate the length of the current constructor
};
/// \brief Exception thrown by the SLEIGH engine
struct SleighError : public LowlevelError {
SleighError(const string &s) : LowlevelError(s) {}
SleighError(const string &s) : LowlevelError(s) {} ///< Constructor
};
/// The tree is reset to a single root node and the walker is prepared for a new parse
/// \param walker is the walker to initialize for a traversal
inline void ParserContext::deallocateState(ParserWalkerChange &walker) {
alloc = 1;
alloc = state.size() - 2; // Number of allocations left
walker.context=this;
walker.baseState();
}
/// The next available node is linked to the current active node in the walker at the given operand index.
/// The child node becomes the new active node for the walker. The underlying constructor is not yet assigned.
/// \param i is the operand index of the new child
/// \param walker is the walker for the parse
inline void ParserContext::allocateOperand(int4 i,ParserWalkerChange &walker) {
ConstructState *opstate = &state[alloc++];
if (i >= MAX_OPERAND)
throw LowlevelError("SLEIGH parser out of state space");
if (alloc < 0)
expandState(STATE_GROWTH);
ConstructState *opstate = state[alloc--];
opstate->parent = walker.point;
opstate->ct = (Constructor *)0;
walker.point->resolve[i] = opstate;
if (walker.depth > MAX_DEPTH-2)
throw LowlevelError("SLEIGH exceeded maximum parse depth");
walker.breadcrumb[walker.depth++] += 1;
walker.point = opstate;
walker.breadcrumb[walker.depth] = 0;
}
/// \param i is the index of child/operand
inline void ParserWalker::pushOperand(int4 i) {
if (depth > ParserContext::MAX_DEPTH-2)
throw LowlevelError("SLEIGH exceeded maximum parse depth");
breadcrumb[depth++] = i+1;
point = point->resolve[i];
breadcrumb[depth] = 0;
}
} // End namespace ghidra
#endif

View File

@@ -382,7 +382,7 @@ void PcodeInjectLibrarySleigh::parseInject(InjectPayload *payload)
}
if (contextCache.pos == (ParserContext *)0) { // Make sure we have a context
contextCache.pos = new ParserContext((ContextCache *)0,(Translate *)0);
contextCache.pos->initialize(8,8,slgh->getConstantSpace());
contextCache.pos->initialize(slgh->getConstantSpace(),8);
}
PcodeSnippet compiler(slgh);
// compiler.clear(); // Not necessary unless we reuse

View File

@@ -453,7 +453,7 @@ void DisassemblyCache::initialize(int4 min,int4 hashsize)
hashtable = new ParserContext *[hashsize];
for(int4 i=0;i<minimumreuse;++i) {
ParserContext *pos = new ParserContext(contextcache,translate);
pos->initialize(75,20,constspace);
pos->initialize(constspace);
list[i] = pos;
}
ParserContext *pos = list[0];
@@ -587,11 +587,11 @@ void Sleigh::initialize(DocumentStorage &store)
/// \param addr is the given address of the instruction
/// \param state is the desired parse state.
/// \return the parse tree object (ParseContext)
ParserContext *Sleigh::obtainContext(const Address &addr,int4 state) const
ParserContext *Sleigh::obtainContext(const Address &addr,ParserContext::parse_state state) const
{
ParserContext *pos = discache->getParserContext(addr);
int4 curstate = pos->getParserState();
ParserContext::parse_state curstate = pos->getParserState();
if (curstate >= state)
return pos;
if (curstate == ParserContext::uninitialized) {

View File

@@ -4,9 +4,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -167,7 +167,7 @@ class Sleigh : public SleighBase {
mutable PcodeCacher pcode_cache; ///< Cache of p-code data just prior to emitting
void clearForDelete(void); ///< Delete the context and disassembly caches
protected:
ParserContext *obtainContext(const Address &addr,int4 state) const;
ParserContext *obtainContext(const Address &addr,ParserContext::parse_state state) const;
void resolve(ParserContext &pos) const; ///< Generate a parse tree suitable for disassembly
void resolveHandles(ParserContext &pos) const; ///< Prepare the parse tree for p-code generation
public:

View File

@@ -180,30 +180,35 @@ SubtableSymbol *WithBlock::getCurrentSubtable(const list<WithBlock> &stack)
return (SubtableSymbol *)0;
}
void ConsistencyChecker::OptimizeRecord::copyFromExcludingSize(ConsistencyChecker::OptimizeRecord &that)
/// \param records is the list of records to merge
ConsistencyChecker::OptimizeRecord::OptimizeRecord(vector<ConsistencyChecker::OptimizeRecord*> &records)
{
this->writeop = that.writeop;
this->readop = that.readop;
this->inslot = that.inslot;
this->writecount = that.writecount;
this->readcount = that.readcount;
this->writesection = that.writesection;
this->readsection = that.readsection;
this->opttype = that.opttype;
}
uintb minOff = -1;
uintb maxOff = -1;
vector<OptimizeRecord*>::iterator iter;
void ConsistencyChecker::OptimizeRecord::update(int4 opIdx, int4 slotIdx, int4 secNum)
{
if (slotIdx >= 0) {
updateRead(opIdx, slotIdx, secNum);
for (iter = records.begin(); iter != records.end(); ++iter) {
if (minOff == -1 || (*iter)->offset < minOff) {
minOff = (*iter)->offset;
}
if (maxOff == -1 || (*iter)->offset + (*iter)->size > maxOff) {
maxOff = (*iter)->offset + (*iter)->size;
}
}
else {
updateWrite(opIdx, secNum);
offset = minOff;
size = maxOff - minOff;
writeop = -1; readop = -1; inslot=-1; writecount=0; readcount=0; writesection=-2; readsection=-2; opttype=-1;
for (iter = records.begin(); iter != records.end(); ++iter) {
updateCombine(**iter);
}
}
/// \param i is the index of the op reading the range
/// \param inslot is the input slot of the op reading the range
/// \param secNum is the constructor section number of the op
void ConsistencyChecker::OptimizeRecord::updateRead(int4 i, int4 inslot, int4 secNum)
{
@@ -213,6 +218,8 @@ void ConsistencyChecker::OptimizeRecord::updateRead(int4 i, int4 inslot, int4 se
this->readsection = secNum;
}
/// \param i is the index of the op writing to the range
/// \param secNum is the constructor section number of the op
void ConsistencyChecker::OptimizeRecord::updateWrite(int4 i, int4 secNum)
{
@@ -221,17 +228,18 @@ void ConsistencyChecker::OptimizeRecord::updateWrite(int4 i, int4 secNum)
this->writesection = secNum;
}
void ConsistencyChecker::OptimizeRecord::updateExport()
void ConsistencyChecker::OptimizeRecord::updateExport(void)
{
this->writeop = 0;
this->readop = 0;
this->writecount = 2;
this->writecount = 2; // Simulate a high count so the register cannot be optimized away
this->readcount = 2;
this->readsection = -2;
this->writesection = -2;
}
/// \param that is the other record to pull read/write info from
void ConsistencyChecker::OptimizeRecord::updateCombine(ConsistencyChecker::OptimizeRecord &that)
{
@@ -1195,6 +1203,8 @@ void ConsistencyChecker::setPostOrder(SubtableSymbol *root)
}
}
/// \param offset is the given offset
/// \return an iterator to the last record before \b offset or end() if no records come before
map<uintb,ConsistencyChecker::OptimizeRecord>::iterator ConsistencyChecker::UniqueState::lesserIter(uintb offset)
{
@@ -1209,42 +1219,22 @@ map<uintb,ConsistencyChecker::OptimizeRecord>::iterator ConsistencyChecker::Uniq
return std::prev(iter);
}
ConsistencyChecker::OptimizeRecord ConsistencyChecker::UniqueState::coalesce(vector<ConsistencyChecker::OptimizeRecord*> &records)
{
uintb minOff = -1;
uintb maxOff = -1;
vector<OptimizeRecord*>::iterator iter;
for (iter = records.begin(); iter != records.end(); ++iter) {
if (minOff == -1 || (*iter)->offset < minOff) {
minOff = (*iter)->offset;
}
if (maxOff == -1 || (*iter)->offset + (*iter)->size > maxOff) {
maxOff = (*iter)->offset + (*iter)->size;
}
}
OptimizeRecord result(minOff, maxOff - minOff);
for (iter = records.begin(); iter != records.end(); ++iter) {
result.updateCombine(**iter);
}
return result;
}
void ConsistencyChecker::UniqueState::set(uintb offset, int4 size, OptimizeRecord &rec)
/// Any overlaps with the new record are merged, maintaining a disjoint collection of records
/// \param rec is the record to add
void ConsistencyChecker::UniqueState::set(OptimizeRecord &rec)
{
vector<OptimizeRecord*> records;
getDefinitions(records, offset, size);
getDefinitions(records, rec.offset, rec.size);
records.push_back(&rec);
OptimizeRecord coalesced = coalesce(records);
OptimizeRecord coalesced(records);
recs.erase(recs.lower_bound(coalesced.offset), recs.lower_bound(coalesced.offset+coalesced.size));
recs.insert(pair<uint4,OptimizeRecord>(coalesced.offset, coalesced));
}
/// \param result holds all the overlapping records
/// \param offset is the start of the given range
/// \param size is the number of bytes in the range
void ConsistencyChecker::UniqueState::getDefinitions(vector<ConsistencyChecker::OptimizeRecord*> &result, uintb offset, int4 size)
{
@@ -1396,7 +1386,7 @@ void ConsistencyChecker::examineVn(UniqueState &state,
else {
OptimizeRecord rec(offset,size);
rec.updateWrite(i,secnum);
state.set(offset,size,rec);
state.set(rec);
}
}
@@ -1750,13 +1740,6 @@ void ConsistencyChecker::optimizeAll(void)
}
}
ostream& operator<<(ostream &os, const ConsistencyChecker::OptimizeRecord &rec) {
os << "{writeop=" << rec.writeop << " readop=" << rec.readop << " inslot=" << rec.inslot <<
" writecount=" << rec.writecount << " readcount=" << rec.readcount <<
" opttype=" << rec.opttype << "}";
return os;
}
/// Sort based on the containing Varnode, then on the bit boundary
/// \param op2 is a field to compare with \b this
/// \return \b true if \b this should be sorted before the other field

View File

@@ -158,30 +158,32 @@ public:
mutable int4 opttype; ///< 0 = register read by a COPY, 1 = register written by a COPY (-1 otherwise)
/// \brief Construct a record, initializing counts
///
/// \param offset is the starting offset of the temporary range
/// \param size is the number of bytes in the range
OptimizeRecord(uintb offset, int4 size) {
this->offset = offset;
this->size = size;
writeop = -1; readop = -1; inslot=-1; writecount=0; readcount=0; writesection=-2; readsection=-2; opttype=-1;
}
void copyFromExcludingSize(OptimizeRecord &that);
void update(int4 opIdx, int4 slotIdx, int4 secNum);
void updateRead(int4 i, int4 inslot, int4 secNum);
void updateWrite(int4 i, int4 secNum);
void updateExport();
void updateCombine(OptimizeRecord &that);
OptimizeRecord(vector<OptimizeRecord *> &records); ///< Constructor merging the given overlapping records into a single record
void updateRead(int4 i, int4 inslot, int4 secNum); ///< Mark \b this range as being read by a specific op
void updateWrite(int4 i, int4 secNum); ///< Mark \b this range as being written by a specific op
void updateExport(void); ///< Mark \b this range as exported from the constructor
void updateCombine(OptimizeRecord &that); ///< Merge another record's read and write references into \b this
};
private:
/// \brief Container of OptimizeRecords for possibly overlapping temporary registers
class UniqueState {
map<uintb,OptimizeRecord> recs;
static uintb endOf(map<uintb,OptimizeRecord>::iterator &iter) { return iter->first + iter->second.size; }
OptimizeRecord coalesce(vector<OptimizeRecord*> &records);
map<uintb,OptimizeRecord>::iterator lesserIter(uintb offset);
map<uintb,OptimizeRecord> recs; ///< Map from unique space offset to OptimizeRecord
static uintb endOf(map<uintb,OptimizeRecord>::iterator &iter) { return iter->first + iter->second.size; } ///< Get ending offset record
map<uintb,OptimizeRecord>::iterator lesserIter(uintb offset); ///< Get last record less than the given offset
public:
void clear(void) { recs.clear(); }
void set(uintb offset, int4 size, OptimizeRecord &rec);
void getDefinitions(vector<OptimizeRecord*> &result, uintb offset, int4 size);
map<uintb,OptimizeRecord>::const_iterator begin(void) const { return recs.begin(); }
map<uintb,OptimizeRecord>::const_iterator end(void) const { return recs.end(); }
void clear(void) { recs.clear(); } ///< Clear all records
void set(OptimizeRecord &rec); ///< Add a new record to the collection
void getDefinitions(vector<OptimizeRecord*> &result, uintb offset, int4 size); ///< Get any definitions overlapping the given range
map<uintb,OptimizeRecord>::const_iterator begin(void) const { return recs.begin(); } ///< Get starting iterator to all records
map<uintb,OptimizeRecord>::const_iterator end(void) const { return recs.end(); } ///< Get ending iterator to all records
};
SleighCompile *compiler; ///< Parsed form of the SLEIGH file being examined
@@ -480,8 +482,6 @@ public:
int4 run_compilation(const string &filein,const string &fileout);
};
ostream& operator<<(ostream &os, const ConsistencyChecker::OptimizeRecord &rec);
extern SleighCompile *slgh; ///< A global reference to the SLEIGH compiler accessible to the parse functions
extern int yydebug; ///< Debug state for the SLEIGH parse functions