GP-6554 Refactor ConstructState allocation in ParserContext

2026-05-09 20:18:01 +08:00 · 2026-03-06 23:37:48 +00:00
parent a7a795b335
commit 5328fa2c6d
8 changed files with 374 additions and 217 deletions
@@ -457,7 +457,7 @@ RECURSIVE              = NO
 # excluded from the INPUT source files. This way you can easily exclude a 
 # subdirectory from a directory tree whose root is specified with the INPUT tag.

-EXCLUDE                = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc context.hh context.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc
+EXCLUDE                = unify.hh unify.cc rulecompile.hh rulecompile.cc slghparse.cc slghparse.hh slghscan.cc slghpattern.hh slghpattern.cc slghpatexpress.hh slghpatexpress.cc slghsymbol.hh slghsymbol.cc codedata.hh codedata.cc semantics.hh semantics.cc grammar.hh grammar.cc callgraph.hh callgraph.cc filemanage.hh filemanage.cc graph.hh graph.cc loadimage_bfd.hh loadimage_bfd.cc pcodecompile.cc pcodecompile.hh pcodeparse.hh pcodeparse.cc consolemain.cc sleighexample.cc xml.cc double.hh double.cc paramid.hh paramid.cc prefersplit.hh prefersplit.cc

 # The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
 # directories that are symbolic links (a Unix filesystem feature) are excluded 
@@ -4,9 +4,9 @@
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
- * 
+ *
 *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,6 +19,39 @@

 namespace ghidra {

+ConstructState::ConstructState(void)
+
+{
+  ct = (Constructor *)0;
+  resolve = (ConstructState **)0;
+  parent = (ConstructState *)0;
+  length = 0;
+  offset = 0;
+}
+
+/// The array holding pointers to child nodes is preallocated.
+/// \param numOperands is maximum number of children this node can have
+ConstructState::ConstructState(int4 numOperands)
+
+{
+  ct = (Constructor *)0;
+  parent = (ConstructState *)0;
+  length = 0;
+  offset = 0;
+  resolve = new ConstructState *[numOperands];
+  for(int4 i=0;i<numOperands;++i)
+    resolve[i] = (ConstructState *)0;
+}
+
+ConstructState::~ConstructState(void)
+
+{
+  if (resolve != (ConstructState **)0)
+    delete [] resolve;
+}
+
+/// \param ccache is the cache to use for formal context changes
+/// \param trans is the parent parser
 ParserContext::ParserContext(ContextCache *ccache,Translate *trans)

 {
@@ -35,15 +68,25 @@ ParserContext::ParserContext(ContextCache *ccache,Translate *trans)
  }
 }

-void ParserContext::initialize(int4 maxstate,int4 maxparam,AddrSpace *spc)
+ParserContext::~ParserContext(void)
+
+{
+  if (context != (uintm *)0)
+    delete [] context;
+  for(int4 i=0;i<state.size();++i)
+    delete state[i];
+}
+
+/// \param spc is the address space used for constants
+/// \param maxstate is the number of nodes to allocate (initially)
+void ParserContext::initialize(AddrSpace *spc,int4 maxstate)

 {
  const_space = spc;
  state.resize(maxstate);
-  state[0].parent = (ConstructState *)0;
  for(int4 i=0;i<maxstate;++i)
-    state[i].resolve.resize(maxparam);
-  base_state = &state[0];
+    state[i] = new ConstructState(MAX_OPERAND);
+  base_state = state[maxstate-1];
 }

 const Address &ParserContext::getN2addr(void) const
@@ -58,13 +101,17 @@ const Address &ParserContext::getN2addr(void) const
  return n2addr;
 }

+/// Get bytes from the instruction stream into a packed value assuming a big endian encoding.
+/// \param bytestart is the number of bytes to skip
+/// \param size is the number of bytes to pack
+/// \param off is the number of bytes in the instruction already read
+/// \return the packed bytes from the instruction
 uintm ParserContext::getInstructionBytes(int4 bytestart,int4 size,uint4 off) const

-{				// Get bytes from the instruction stream into a intm
-				// (assuming big endian format)
+{
  off += bytestart;
-  if (off >=16)
-    throw BadDataError("Instruction is using more than 16 bytes"); 
+  if (off >= MAX_INSTRUCTION_LEN)
+    throw BadDataError("Instruction is using more than " + to_string(MAX_INSTRUCTION_LEN) + " bytes");
  const uint1 *ptr = buf + off;
  uintm res = 0;
  for(int4 i=0;i<size;++i) {
@@ -74,12 +121,17 @@ uintm ParserContext::getInstructionBytes(int4 bytestart,int4 size,uint4 off) con
  return res;
 }

+/// Get bits from the instruction stream assuming big endian encoding.
+/// \param startbit is the offset of the first bit (within the instruction stream)
+/// \param size is the number of bits to grab
+/// \param off is the number of bytes in the instruction already read
+/// \return the requested range of bits (in the least significant positions and padded out with zero bits)
 uintm ParserContext::getInstructionBits(int4 startbit,int4 size,uint4 off) const

 {
  off += (startbit/8);
-  if (off >= 16)
-    throw BadDataError("Instruction is using more than 16 bytes");
+  if (off >= MAX_INSTRUCTION_LEN)
+    throw BadDataError("Instruction is using more than " + to_string(MAX_INSTRUCTION_LEN) + " bytes");
  const uint1 *ptr = buf + off;
  startbit = startbit % 8;
  int4 bytesize = (startbit+size-1)/8 + 1;
@@ -93,9 +145,12 @@ uintm ParserContext::getInstructionBits(int4 startbit,int4 size,uint4 off) const
  return res;
 }

+/// \param bytestart is the offset of the first byte to grab
+/// \param size is the number of bytes to grab
+/// \return the context bytes in a packed value
 uintm ParserContext::getContextBytes(int4 bytestart,int4 size) const

-{				// Get bytes from context into a uintm
+{
  int4 intstart = bytestart / sizeof(uintm);
  uintm res = context[ intstart ];
  int4 byteOffset = bytestart % sizeof(uintm);
@@ -112,6 +167,9 @@ uintm ParserContext::getContextBytes(int4 bytestart,int4 size) const
  return res;
 }

+/// \param startbit is the offset of the first bit
+/// \param size is the number of bits to return
+/// \return the requested range of bits (in the least significant positions and padded out with zero bits)
 uintm ParserContext::getContextBits(int4 startbit,int4 size) const

 {
@@ -131,6 +189,11 @@ uintm ParserContext::getContextBits(int4 startbit,int4 size) const
  return res;
 }

+/// \param sym is a symbol that resolves to the address where the setting takes effect
+/// \param num is the index of the context word being affected
+/// \param mask indicates the bits within the context word that are affected
+/// \param flow is \b true if the context change \e flows forward from the point where it is set
+/// \param point is the parse point where the change was made
 void ParserContext::addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point)

 {
@@ -191,9 +254,29 @@ void ParserContext::applyCommits(void)
  }
 }

+/// This can be called in the middle of a parse to accommodate larger constructor trees.
+/// \param amount is the number of additional nodes to add
+void ParserContext::expandState(int4 amount)
+
+{
+  state.insert(state.begin(),amount,(ConstructState *)0);
+  for(int4 i=0;i<amount;++i)
+    state[i] = new ConstructState(MAX_OPERAND);
+
+  alloc += amount;
+}
+
+/// \brief Initialize \b this from another walker assuming a given constructor and operand is the current position in the walk
+///
+/// The constructor tree state is simulated using only a single provided node.
+/// This allows TokenField to behave as if it were just parsed so its getValue() will return the correct value.
+/// \param ct is the given constructor
+/// \param index is the index of the operand
+/// \param tempstate is provided storage used to simulate the mid-walk tree node
+/// \param otherwalker is the walker with the complete parse state
 void ParserWalker::setOutOfBandState(Constructor *ct,int4 index,ConstructState *tempstate,const ParserWalker &otherwalker)

-{ // Initialize walker for future calls into getInstructionBytes assuming -ct- is the current position in the walk
+{
  const ConstructState *pt = otherwalker.point;
  int4 curdepth = otherwalker.depth;
  while(pt->ct != ct) {
@@ -220,17 +303,18 @@ void ParserWalker::setOutOfBandState(Constructor *ct,int4 index,ConstructState *
  breadcrumb[0] = 0;
 }

+/// This assumes all the current nodes operands have been parsed into the tree.
+/// \param length is the minimum length of the current constructor
+/// \param numopers is the number of operands
 void ParserWalkerChange::calcCurrentLength(int4 length,int4 numopers)

-{				// Calculate the length of the current constructor
-				// state assuming all its operands are constructed
+{
  length += point->offset;	// Convert relative length to absolute length
  for(int4 i=0;i<numopers;++i) {
    ConstructState *subpoint = point->resolve[i];
    int4 sublength = subpoint->length + subpoint->offset;
-				// Since subpoint->offset is an absolute offset
-				// (relative to beginning of instruction) sublength
-    if (sublength > length)	// is absolute and must be compared to absolute length
+				// Since subpoint->offset is an absolutee (relative to beginning of instruction)
+    if (sublength > length)	// sublength is absolute and must be compared to absolute length
      length = sublength;
  }
  point->length = length - point->offset; // Convert back to relative length
@@ -4,15 +4,17 @@
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
- * 
+ *
 *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+/// \file context.hh
+/// \brief Objects for describing the context around the parsing of an instruction by the SLEIGH engine
 #ifndef __CONTEXT_HH__
 #define __CONTEXT_HH__

@@ -21,184 +23,272 @@

 namespace ghidra {

-class Token {			// A multiple-byte sized chunk of pattern in a bitstream
-  string name;
-  int4 size;			// Number of bytes in token;
-  int4 index;			// Index of this token, for resolving offsets
-  bool bigendian;
+/// \brief A multiple-byte sized chunk of pattern in the instruction byte stream
+class Token {
+  string name;			///< Name of the token
+  int4 size;			///< Number of bytes in token
+  int4 index;			///< Index of \b this token, for resolving offsets
+  bool bigendian;		///< Set to \b true if encodings within \b this token are big endian
 public:
-  Token(const string &nm,int4 sz,bool be,int4 ind) : name(nm) { size = sz; bigendian=be; index = ind; }
-  int4 getSize(void) const { return size; }
-  bool isBigEndian(void) const { return bigendian; }
-  int4 getIndex(void) const { return index; }
-  const string &getName(void) const { return name; }
+  Token(const string &nm,int4 sz,bool be,int4 ind) : name(nm) { size = sz; bigendian=be; index = ind; }	///< Constructor
+  int4 getSize(void) const { return size; }		///< Get the size in bytes
+  bool isBigEndian(void) const { return bigendian; }	///< Return \b true if encodings within \b this are big endian
+  int4 getIndex(void) const { return index; }		///< Get the index associated with \b this token
+  const string &getName(void) const { return name; }	///< Get the name of the token
 };

-struct FixedHandle {		// A handle that is fully resolved
-  AddrSpace *space;
-  uint4 size;
-  AddrSpace *offset_space;	// Either null or where dynamic offset is stored
-  uintb offset_offset;		// Either static offset or ptr offset
-  uint4 offset_size;		// Size of pointer
-  AddrSpace *temp_space;	// Consistent temporary location for value
-  uintb temp_offset;
+/// \brief A resolved version of (or pointer to) a SLEIGH defined Varnode
+///
+/// For a static Varnode, this is the triple  (address space, offset, size) for the Varnode.
+/// For a dynamic Varnode, this also encodes the pointer Varnode containing the dynamic offset
+/// and a temporary storage location for the dereferenced value.
+struct FixedHandle {
+  AddrSpace *space;		///< The address space of the Varnode
+  uint4 size;			///< Number of bytes in the Varnode
+  AddrSpace *offset_space;	///< Null \e or the space where the dynamic offset is stored
+  uintb offset_offset;		///< The offset for the static Varnode \e or the offset for the pointer
+  uint4 offset_size;		///< Size of pointer
+  AddrSpace *temp_space;	///< Address space for temporary location for value
+  uintb temp_offset;		///< Offset of the temporary location
 };

 class Constructor;
+
+/// \brief A node in a tree of subconstructors
+///
+/// This knows its position in the tree (parent node, child nodes) and the underlying SLEIGH constructor that was matched.
+/// Child nodes correspond to the operands for the specific constructor.
 struct ConstructState {
-  Constructor *ct;
-  FixedHandle hand;
-  vector<ConstructState *> resolve;
-  ConstructState *parent;
-  int4 length;			// Length of this instantiation of the constructor
-  uint4 offset;			// Absolute offset (from start of instruction)
+  Constructor *ct;		///< The matched Constructor
+  FixedHandle hand;		///< Resolved Varnode associated with the Constructor
+  ConstructState **resolve;	///< An array of pointers to child nodes
+  ConstructState *parent;	///< Pointer to parent node
+  int4 length;			///< Length of this instantiation of the constructor
+  uint4 offset;			///< Absolute offset (from start of instruction)
+  ConstructState(void);		///< Construct a node with no children
+  ConstructState(int4 numOperands);	///< Construct a node with given number of possible children
+  ~ConstructState(void);	///< Destructor
 };

 class TripleSymbol;
-struct ContextSet {		// Instructions for setting a global context value
-  TripleSymbol *sym;		// Resolves to address where setting takes effect
-  ConstructState *point;	// Point at which context set was made
-  int4 num;			// Number of context word affected
-  uintm mask;			// Bits within word affected
-  uintm value;			// New setting for bits
-  bool flow;			// Does the new context flow from its set point
+
+/// \brief Command for globally setting a formal SLEIGH context value
+struct ContextSet {
+  TripleSymbol *sym;		///< Symbol resolving to address where setting takes effect
+  ConstructState *point;	///< Point at which context set was made
+  int4 num;			///< Index of the specific context word affected
+  uintm mask;			///< Bits within word affected
+  uintm value;			///< New setting for bits
+  bool flow;			///< Does the new context flow from its set point
 };

 class ParserWalker;		// Forward declaration
 class ParserWalkerChange;
 class Translate;

+/// \brief Context maintained while parsing a single instruction
+///
+/// This contains:
+///   - the bytes encoding the instruction
+///   - the tree structure of the SLEIGH Constructors encountered while parsing the instruction
+///   - any formal named SLEIGH context values referenced by the instruction
 class ParserContext {
  friend class ParserWalker;
  friend class ParserWalkerChange;
 public:
-  enum {			// Possible states of the ParserContext
-    uninitialized = 0,		// Instruction has not been parsed at all
-    disassembly = 1,		// Instruction is parsed in preparation for disassembly
-    pcode = 2			// Instruction is parsed in preparation for generating p-code
+  static constexpr int4 MAX_DEPTH = 32;			///< Maximum subconstructor depth in a single instruction
+  static constexpr int4 MAX_OPERAND = 20;		///< Maximum operands for a single constructor
+  static constexpr int4 MAX_INSTRUCTION_LEN = 16;	///< Maximum number of bytes in a single instruction
+  static constexpr int4 INITIAL_STATE_NUM = 64;		///< Recommended number of initial states
+  static constexpr int4 STATE_GROWTH = 64;		///< Number of states to add for each expansion
+  /// \brief Possible states of the ParserContext
+  enum parse_state {
+    uninitialized = 0,		///< Instruction has not been parsed at all
+    disassembly = 1,		///< Instruction is parsed in preparation for disassembly
+    pcode = 2			///< Instruction is parsed in preparation for generating p-code
  };
 private:
-  Translate *translate;		// Instruction parser
-  int4 parsestate;
-  AddrSpace *const_space;
-  uint1 buf[16];		// Buffer of bytes in the instruction stream
-  uintm *context;		// Pointer to local context
-  int4 contextsize;		// Number of entries in context array
-  ContextCache *contcache;   // Interface for getting/setting context
-  vector<ContextSet> contextcommit;
-  Address addr;		// Address of start of instruction
-  Address naddr;		// Address of next instruction
-  mutable Address n2addr;	// Address of instruction after the next
-  Address calladdr;		// For injections, this is the address of the call being overridden
-  vector<ConstructState> state; // Current resolved instruction
-  ConstructState *base_state;
-  int4 alloc;			// Number of ConstructState's allocated
-  int4 delayslot;		// delayslot depth
+  Translate *translate;			///< The parent instruction parser
+  parse_state parsestate;		///< Overall state of the parse
+  AddrSpace *const_space;		///< Address space for constants
+  uint1 buf[MAX_INSTRUCTION_LEN];	///< Buffer of bytes in the instruction stream
+  uintm *context;			///< Pointer to local context
+  int4 contextsize;			///< Number of entries in local context array
+  ContextCache *contcache;   		///< Interface for getting/setting context
+  vector<ContextSet> contextcommit;	///< Changes to SLEIGH context slated by this instruction
+  Address addr;				///< Address of start of instruction
+  Address naddr;			///< Address of next instruction
+  mutable Address n2addr;		///< Address of instruction after the next
+  Address calladdr;			///< For injections, this is the address of the call being overridden
+  vector<ConstructState *> state; 	///< Available nodes for the constructor tree
+  ConstructState *base_state;		///< Root node of the constructor tree
+  int4 alloc;				///< Number of unallocated ConstructState nodes remaining
+  int4 delayslot;			///< delayslot depth
 public:
-  ParserContext(ContextCache *ccache,Translate *trans);
-  ~ParserContext(void) { if (context != (uintm *)0) delete [] context; }
-  uint1 *getBuffer(void) { return buf; }
-  void initialize(int4 maxstate,int4 maxparam,AddrSpace *spc);
-  int4 getParserState(void) const { return parsestate; }
-  void setParserState(int4 st) { parsestate = st; }
-  void deallocateState(ParserWalkerChange &walker);
-  void allocateOperand(int4 i,ParserWalkerChange &walker);
-  void setAddr(const Address &ad) { addr = ad; n2addr = Address(); }
-  void setNaddr(const Address &ad) { naddr = ad; }
-  void setCalladdr(const Address &ad) { calladdr = ad; }
-  void addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point);
-  void clearCommits(void) { contextcommit.clear(); }
-  void applyCommits(void);
-  const Address &getAddr(void) const { return addr; }
-  const Address &getNaddr(void) const { return naddr; }
-  const Address &getN2addr(void) const;
-  const Address &getDestAddr(void) const { return calladdr; }
-  const Address &getRefAddr(void) const { return calladdr; }
-  AddrSpace *getCurSpace(void) const { return addr.getSpace(); }
-  AddrSpace *getConstSpace(void) const { return const_space; }
-  uintm getInstructionBytes(int4 byteoff,int4 numbytes,uint4 off) const;
-  uintm getContextBytes(int4 byteoff,int4 numbytes) const;
-  uintm getInstructionBits(int4 startbit,int4 size,uint4 off) const;
-  uintm getContextBits(int4 startbit,int4 size) const;
-  void setContextWord(int4 i,uintm val,uintm mask) { context[i] = (context[i]&(~mask))|(mask&val); }
-  void loadContext(void) { contcache->getContext(addr,context); }
-  int4 getLength(void) const { return base_state->length; }
-  void setDelaySlot(int4 val) { delayslot = val; }
-  int4 getDelaySlot(void) const { return delayslot; }
+  ParserContext(ContextCache *ccache,Translate *trans);	///< Constructor
+  ~ParserContext(void);					///< Destructor
+  uint1 *getBuffer(void) { return buf; }		///< Get bytes in the stream at the point this instruction is encoded
+  void initialize(AddrSpace *spc,int4 maxstate = INITIAL_STATE_NUM);	///< Preallocate nodes for constructor trees
+  parse_state getParserState(void) const { return parsestate; }		///< Get the overall state of the parse
+  void setParserState(parse_state st) { parsestate = st; }		///< Update the overall parse state
+  void deallocateState(ParserWalkerChange &walker);			///< Clear any existing constructor tree
+  void allocateOperand(int4 i,ParserWalkerChange &walker);		///< Allocate a new child node in the constructor tree
+  void setAddr(const Address &ad) { addr = ad; n2addr = Address(); }	///< Set the starting address of the instruction
+  void setNaddr(const Address &ad) { naddr = ad; }			///< Set the ending address of the instruction
+  void setCalladdr(const Address &ad) { calladdr = ad; }		///< Set the address of the call being overridden
+  void addCommit(TripleSymbol *sym,int4 num,uintm mask,bool flow,ConstructState *point);	///< Add a formal SLEIGH context change command
+  void clearCommits(void) { contextcommit.clear(); }			///< Clear all context commits
+  void applyCommits(void);						///< Apply any pending commits to the context cache
+  const Address &getAddr(void) const { return addr; }			///< Get the starting address of the current instruction
+  const Address &getNaddr(void) const { return naddr; }			///< Get the address of the next instruction
+  const Address &getN2addr(void) const;					///< Get the address of the instruction after the next
+  const Address &getDestAddr(void) const { return calladdr; }		///< Get the destination address (inst_dest) for the overriden call
+  const Address &getRefAddr(void) const { return calladdr; }		///< Get the reference address (inst_ref) for the p-code snippet
+  AddrSpace *getCurSpace(void) const { return addr.getSpace(); }	///< Get the address space of the current instruction
+  AddrSpace *getConstSpace(void) const { return const_space; }		///< Get the address space for constants
+  uintm getInstructionBytes(int4 byteoff,int4 numbytes,uint4 off) const;	///< Get the specified instruction bytes
+  uintm getContextBytes(int4 byteoff,int4 numbytes) const;		///< Get bytes from the local context
+  uintm getInstructionBits(int4 startbit,int4 size,uint4 off) const;	///< Get the specific range of bits from the instruction stream
+  uintm getContextBits(int4 startbit,int4 size) const;			///< Get the specific range of bits from the local context
+  void setContextWord(int4 i,uintm val,uintm mask) { context[i] = (context[i]&(~mask))|(mask&val); }	///< Modify a context word, using given mask and value
+  void loadContext(void) { contcache->getContext(addr,context); }	///< Pull context words associated with the starting address into the local array
+  int4 getLength(void) const { return base_state->length; }		///< Get the length of the current instruction
+  void setDelaySlot(int4 val) { delayslot = val; }			///< Set (the number of instruction bytes) in the delay slot
+  int4 getDelaySlot(void) const { return delayslot; }			///< Get the number of instruction bytes in the delay slot
+  void expandState(int4 amount);					///< Expand the number of available nodes for the constructor tree
 };
-  
-class ParserWalker {		// A class for walking the ParserContext
-  const ParserContext *const_context;
-  const ParserContext *cross_context;
+
+/// \brief A class for walking the constructor tree (ParserContext)
+class ParserWalker {
+private:
+  const ParserContext *const_context;		///< Context for the main instruction parse
+  const ParserContext *cross_context;		///< Context for an additional instruction parse needed to resolve a \e crossbuild
 protected:
-  ConstructState *point;	// The current node being visited
-  int4 depth;			// Depth of the current node
-  int4 breadcrumb[32];	// Path of operands from root
+  ConstructState *point;			///< The current node being visited
+  int4 depth;					///< Depth of the current node
+  int4 breadcrumb[ParserContext::MAX_DEPTH];	///< Path of operands from root
 public:
-  ParserWalker(const ParserContext *c) { const_context = c; cross_context = (const ParserContext *)0; }
-  ParserWalker(const ParserContext *c,const ParserContext *cross) { const_context = c; cross_context = cross; }
-  const ParserContext *getParserContext(void) const { return const_context; }
-  void baseState(void) { point = const_context->base_state; depth=0; breadcrumb[0] = 0; }
+  ParserWalker(const ParserContext *c) { const_context = c; cross_context = (const ParserContext *)0; }	///< Constructor
+  ParserWalker(const ParserContext *c,const ParserContext *cross) { const_context = c; cross_context = cross; }	///< Constructor for crossbuilds
+  const ParserContext *getParserContext(void) const { return const_context; }	///< Get the current context
+  void baseState(void) { point = const_context->base_state; depth=0; breadcrumb[0] = 0; }	///< Initialize for a new walk
  void setOutOfBandState(Constructor *ct,int4 index,ConstructState *tempstate,const ParserWalker &otherwalker);
-  bool isState(void) const { return (point != (ConstructState *)0); }
-  void pushOperand(int4 i) { breadcrumb[depth++] = i+1; point = point->resolve[i]; breadcrumb[depth] = 0; }
-  void popOperand(void) { point = point->parent; depth-= 1; }
+  bool isState(void) const { return (point != (ConstructState *)0); }	///< Return \b true if there are more nodes to traverse
+  void pushOperand(int4 i);						///< Make the indicated child (operand) the current node
+  void popOperand(void) { point = point->parent; depth-= 1; }		///< Make the parent constructor the current node
+
  uint4 getOffset(int4 i) const { if (i<0) return point->offset; 
-    ConstructState *op=point->resolve[i]; return op->offset + op->length; }
-  Constructor *getConstructor(void) const { return point->ct; }
-  int4 getOperand(void) const { return breadcrumb[depth]; }
-  FixedHandle &getParentHandle(void) { return point->hand; }
-  const FixedHandle &getFixedHandle(int4 i) const { return point->resolve[i]->hand; }
-  AddrSpace *getCurSpace(void) const { return const_context->getCurSpace(); }
-  AddrSpace *getConstSpace(void) const { return const_context->getConstSpace(); }
+    ConstructState *op=point->resolve[i]; return op->offset + op->length; }	///< Get the byte offset of the indicated operand within the instruction stream
+  Constructor *getConstructor(void) const { return point->ct; }		///< Get the current constructor
+  int4 getOperand(void) const { return breadcrumb[depth]; }		///< Get the operand index of the next constructor in the walk
+  FixedHandle &getParentHandle(void) { return point->hand; }		///< Get the resolved value associated with the current constructor
+  const FixedHandle &getFixedHandle(int4 i) const { return point->resolve[i]->hand; }	///< Get the resolved value associated with the indicated child operand
+  AddrSpace *getCurSpace(void) const { return const_context->getCurSpace(); }	///< Get the address space associated with the instruction stream
+  AddrSpace *getConstSpace(void) const { return const_context->getConstSpace(); }	///< Get the constant address space
+
+  /// \brief Get the starting address of the instruction
  const Address &getAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getAddr(); } return const_context->getAddr(); }
+  /// \brief Get the address of the next instruction
  const Address &getNaddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getNaddr();} return const_context->getNaddr(); }
+  /// \brief Get the address of the instruction after next
  const Address &getN2addr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getN2addr();} return const_context->getN2addr(); }
+  /// \brief Get the reference address (inst_ref) for the p-code snippet
  const Address &getRefAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getRefAddr();} return const_context->getRefAddr(); }
+  /// \brief Get the destination address (inst_dest) for the overridden call
  const Address &getDestAddr(void) const { if (cross_context != (const ParserContext *)0) { return cross_context->getDestAddr();} return const_context->getDestAddr(); }
-  int4 getLength(void) const { return const_context->getLength(); }
+
+  int4 getLength(void) const { return const_context->getLength(); }	///< Get the length of the instruction in bytes
+
+  /// \brief Get packed instruction bytes associated with the current constructor
+  ///
+  /// \param byteoff is an offset from the starting point associated with the constructor
+  /// \param numbytes is the number of bytes to pack
+  /// \return the packed instruction bytes in big endian encoding
  uintm getInstructionBytes(int4 byteoff,int4 numbytes) const {
    return const_context->getInstructionBytes(byteoff,numbytes,point->offset); }
+
+  /// \brief Get packed context bytes from the local context
+  ///
+  /// \param byteoff is the offset of the first byte to grab
+  /// \param numbytes is the number of bytes to grab
+  /// \return the context bytes in a packed value
  uintm getContextBytes(int4 byteoff,int4 numbytes) const {
    return const_context->getContextBytes(byteoff,numbytes); }
+
+  /// \brief Get bits from the instruction stream associated with the current constructor
+  ///
+  /// \param startbit is the offset of the first bit (relative to the starting point associated with the constructor)
+  /// \param size is the number of bits to grab
+  /// \return the requested range of bits (in the least significant positions and padded out with zero bits)
  uintm getInstructionBits(int4 startbit,int4 size) const {
    return const_context->getInstructionBits(startbit,size,point->offset); }
-  uintm getContextBits(int4 startbit,int4 size) const {
+
+  /// \brief Get a range of bits from the local context
+  ///
+  /// \param startbit is the offset of the first bit
+  /// \param size is the number of bits to return
+  /// \return the requested range of bits (in the least significant positions and padded out with zero bits)
+ uintm getContextBits(int4 startbit,int4 size) const {
    return const_context->getContextBits(startbit,size); }
 };

-class ParserWalkerChange : public ParserWalker { // Extension to walker that allows for on the fly modifications to tree
+/// \brief A walker extension that allows for on the fly modifications to the constructor tree
+///
+/// This is used to build the constructor tree as the instruction is parsed (Sleigh::resolve)
+class ParserWalkerChange : public ParserWalker {
  friend class ParserContext;
-  ParserContext *context;
+  ParserContext *context;		///< The (currently active) context
 public:
-  ParserWalkerChange(ParserContext *c) : ParserWalker(c) { context = c; }
-  ParserContext *getParserContext(void) { return context; }
-  ConstructState *getPoint(void) { return point; }
-  void setOffset(uint4 off) { point->offset = off; }
-  void setConstructor(Constructor *c) { point->ct = c; }
-  void setCurrentLength(int4 len) { point->length = len; }
-  void calcCurrentLength(int4 length,int4 numopers);
+  ParserWalkerChange(ParserContext *c) : ParserWalker(c) { context = c; }	///< Constructor
+  ParserContext *getParserContext(void) { return context; }	///< Get the currently active context
+  ConstructState *getPoint(void) { return point; }		///< Get the current
+  void setOffset(uint4 off) { point->offset = off; }		///< Get the current node in the constructor tree
+  void setConstructor(Constructor *c) { point->ct = c; }	///< Set the underlying Constructor for the current node
+  void setCurrentLength(int4 len) { point->length = len; }	///< Set the length associated with the current constructor
+  void calcCurrentLength(int4 length,int4 numopers);		///< Calculate the length of the current constructor
 };

+/// \brief Exception thrown by the SLEIGH engine
 struct SleighError : public LowlevelError {
-  SleighError(const string &s) : LowlevelError(s) {}
+  SleighError(const string &s) : LowlevelError(s) {}	///< Constructor
 };

+/// The tree is reset to a single root node and the walker is prepared for a new parse
+/// \param walker is the walker to initialize for a traversal
 inline void ParserContext::deallocateState(ParserWalkerChange &walker) {
-  alloc = 1;
+  alloc = state.size() - 2;	// Number of allocations left
  walker.context=this;
  walker.baseState();
 }

+/// The next available node is linked to the current active node in the walker at the given operand index.
+/// The child node becomes the new active node for the walker. The underlying constructor is not yet assigned.
+/// \param i is the operand index of the new child
+/// \param walker is the walker for the parse
 inline void ParserContext::allocateOperand(int4 i,ParserWalkerChange &walker) {
-  ConstructState *opstate = &state[alloc++];
+  if (i >= MAX_OPERAND)
+    throw LowlevelError("SLEIGH parser out of state space");
+  if (alloc < 0)
+    expandState(STATE_GROWTH);
+  ConstructState *opstate = state[alloc--];
  opstate->parent = walker.point;
  opstate->ct = (Constructor *)0;
  walker.point->resolve[i] = opstate;
+  if (walker.depth > MAX_DEPTH-2)
+    throw LowlevelError("SLEIGH exceeded maximum parse depth");
  walker.breadcrumb[walker.depth++] += 1;
  walker.point = opstate;
  walker.breadcrumb[walker.depth] = 0;
 }

+/// \param i is the index of child/operand
+inline void ParserWalker::pushOperand(int4 i) {
+  if (depth > ParserContext::MAX_DEPTH-2)
+    throw LowlevelError("SLEIGH exceeded maximum parse depth");
+  breadcrumb[depth++] = i+1;
+  point = point->resolve[i];
+  breadcrumb[depth] = 0;
+}
+
 } // End namespace ghidra
 #endif
@@ -382,7 +382,7 @@ void PcodeInjectLibrarySleigh::parseInject(InjectPayload *payload)
  }
  if (contextCache.pos == (ParserContext *)0) {	// Make sure we have a context
    contextCache.pos = new ParserContext((ContextCache *)0,(Translate *)0);
-    contextCache.pos->initialize(8,8,slgh->getConstantSpace());
+    contextCache.pos->initialize(slgh->getConstantSpace(),8);
  }
  PcodeSnippet compiler(slgh);
 //  compiler.clear();			// Not necessary unless we reuse
@@ -453,7 +453,7 @@ void DisassemblyCache::initialize(int4 min,int4 hashsize)
  hashtable = new ParserContext *[hashsize];
  for(int4 i=0;i<minimumreuse;++i) {
    ParserContext *pos = new ParserContext(contextcache,translate);
-    pos->initialize(75,20,constspace);
+    pos->initialize(constspace);
    list[i] = pos;
  }
  ParserContext *pos = list[0];
@@ -587,11 +587,11 @@ void Sleigh::initialize(DocumentStorage &store)
 /// \param addr is the given address of the instruction
 /// \param state is the desired parse state.
 /// \return the parse tree object (ParseContext)
-ParserContext *Sleigh::obtainContext(const Address &addr,int4 state) const
+ParserContext *Sleigh::obtainContext(const Address &addr,ParserContext::parse_state state) const

 {
  ParserContext *pos = discache->getParserContext(addr);
-  int4 curstate = pos->getParserState();
+  ParserContext::parse_state curstate = pos->getParserState();
  if (curstate >= state)
    return pos;
  if (curstate == ParserContext::uninitialized) {
@@ -4,9 +4,9 @@
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
- * 
+ *
 *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -167,7 +167,7 @@ class Sleigh : public SleighBase {
  mutable PcodeCacher pcode_cache;	///< Cache of p-code data just prior to emitting
  void clearForDelete(void);		///< Delete the context and disassembly caches
 protected:
-  ParserContext *obtainContext(const Address &addr,int4 state) const;
+  ParserContext *obtainContext(const Address &addr,ParserContext::parse_state state) const;
  void resolve(ParserContext &pos) const;	///< Generate a parse tree suitable for disassembly
  void resolveHandles(ParserContext &pos) const;	///< Prepare the parse tree for p-code generation
 public:
@@ -180,30 +180,35 @@ SubtableSymbol *WithBlock::getCurrentSubtable(const list<WithBlock> &stack)
  return (SubtableSymbol *)0;
 }

-void ConsistencyChecker::OptimizeRecord::copyFromExcludingSize(ConsistencyChecker::OptimizeRecord &that)
+/// \param records is the list of records to merge
+ConsistencyChecker::OptimizeRecord::OptimizeRecord(vector<ConsistencyChecker::OptimizeRecord*> &records)

 {
-  this->writeop = that.writeop;
-  this->readop = that.readop;
-  this->inslot = that.inslot;
-  this->writecount = that.writecount;
-  this->readcount = that.readcount;
-  this->writesection = that.writesection;
-  this->readsection = that.readsection;
-  this->opttype = that.opttype;
-}
+  uintb minOff = -1;
+  uintb maxOff = -1;
+  vector<OptimizeRecord*>::iterator iter;

-void ConsistencyChecker::OptimizeRecord::update(int4 opIdx, int4 slotIdx, int4 secNum)
-
-{
-  if (slotIdx >= 0) {
-    updateRead(opIdx, slotIdx, secNum);
+  for (iter = records.begin(); iter != records.end(); ++iter) {
+    if (minOff == -1 || (*iter)->offset < minOff) {
+      minOff = (*iter)->offset;
+    }
+    if (maxOff == -1 || (*iter)->offset + (*iter)->size > maxOff) {
+      maxOff = (*iter)->offset + (*iter)->size;
+    }
  }
-  else {
-    updateWrite(opIdx, secNum);
+
+  offset = minOff;
+  size = maxOff - minOff;
+  writeop = -1; readop = -1; inslot=-1; writecount=0; readcount=0; writesection=-2; readsection=-2; opttype=-1;
+
+  for (iter = records.begin(); iter != records.end(); ++iter) {
+    updateCombine(**iter);
  }
 }

+/// \param i is the index of the op reading the range
+/// \param inslot is the input slot of the op reading the range
+/// \param secNum is the constructor section number of the op
 void ConsistencyChecker::OptimizeRecord::updateRead(int4 i, int4 inslot, int4 secNum)

 {
@@ -213,6 +218,8 @@ void ConsistencyChecker::OptimizeRecord::updateRead(int4 i, int4 inslot, int4 se
  this->readsection = secNum;
 }

+/// \param i is the index of the op writing to the range
+/// \param secNum is the constructor section number of the op
 void ConsistencyChecker::OptimizeRecord::updateWrite(int4 i, int4 secNum)

 {
@@ -221,17 +228,18 @@ void ConsistencyChecker::OptimizeRecord::updateWrite(int4 i, int4 secNum)
  this->writesection = secNum;
 }

-void ConsistencyChecker::OptimizeRecord::updateExport()
+void ConsistencyChecker::OptimizeRecord::updateExport(void)

 {
  this->writeop = 0;
  this->readop = 0;
-  this->writecount = 2;
+  this->writecount = 2;		// Simulate a high count so the register cannot be optimized away
  this->readcount = 2;
  this->readsection = -2;
  this->writesection = -2;
 }

+/// \param that is the other record to pull read/write info from
 void ConsistencyChecker::OptimizeRecord::updateCombine(ConsistencyChecker::OptimizeRecord &that)

 {
@@ -1195,6 +1203,8 @@ void ConsistencyChecker::setPostOrder(SubtableSymbol *root)
  }
 }

+/// \param offset is the given offset
+/// \return an iterator to the last record before \b offset or end() if no records come before
 map<uintb,ConsistencyChecker::OptimizeRecord>::iterator ConsistencyChecker::UniqueState::lesserIter(uintb offset)

 {
@@ -1209,42 +1219,22 @@ map<uintb,ConsistencyChecker::OptimizeRecord>::iterator ConsistencyChecker::Uniq
  return std::prev(iter);
 }

-ConsistencyChecker::OptimizeRecord ConsistencyChecker::UniqueState::coalesce(vector<ConsistencyChecker::OptimizeRecord*> &records)
-
-{
-  uintb minOff = -1;
-  uintb maxOff = -1;
-  vector<OptimizeRecord*>::iterator iter;
-
-  for (iter = records.begin(); iter != records.end(); ++iter) {
-    if (minOff == -1 || (*iter)->offset < minOff) {
-      minOff = (*iter)->offset;
-    }
-    if (maxOff == -1 || (*iter)->offset + (*iter)->size > maxOff) {
-      maxOff = (*iter)->offset + (*iter)->size;
-    }
-  }
-
-  OptimizeRecord result(minOff, maxOff - minOff);
-
-  for (iter = records.begin(); iter != records.end(); ++iter) {
-    result.updateCombine(**iter);
-  }
-
-  return result;
-}
-
-void ConsistencyChecker::UniqueState::set(uintb offset, int4 size, OptimizeRecord &rec)
+/// Any overlaps with the new record are merged, maintaining a disjoint collection of records
+/// \param rec is the record to add
+void ConsistencyChecker::UniqueState::set(OptimizeRecord &rec)

 {
  vector<OptimizeRecord*> records;
-  getDefinitions(records, offset, size);
+  getDefinitions(records, rec.offset, rec.size);
  records.push_back(&rec);
-  OptimizeRecord coalesced = coalesce(records);
+  OptimizeRecord coalesced(records);
  recs.erase(recs.lower_bound(coalesced.offset), recs.lower_bound(coalesced.offset+coalesced.size));
  recs.insert(pair<uint4,OptimizeRecord>(coalesced.offset, coalesced));
 }

+/// \param result holds all the overlapping records
+/// \param offset is the start of the given range
+/// \param size is the number of bytes in the range
 void ConsistencyChecker::UniqueState::getDefinitions(vector<ConsistencyChecker::OptimizeRecord*> &result, uintb offset, int4 size)

 {
@@ -1396,7 +1386,7 @@ void ConsistencyChecker::examineVn(UniqueState &state,
  else {
    OptimizeRecord rec(offset,size);
    rec.updateWrite(i,secnum);
-    state.set(offset,size,rec);
+    state.set(rec);
  }
 }

@@ -1750,13 +1740,6 @@ void ConsistencyChecker::optimizeAll(void)
  }
 }

-ostream& operator<<(ostream &os, const ConsistencyChecker::OptimizeRecord &rec) {
-  os << "{writeop=" << rec.writeop << " readop=" << rec.readop << " inslot=" << rec.inslot <<
-        " writecount=" << rec.writecount << " readcount=" << rec.readcount <<
-	" opttype=" << rec.opttype << "}";
-  return os;
-}
-
 /// Sort based on the containing Varnode, then on the bit boundary
 /// \param op2 is a field to compare with \b this
 /// \return \b true if \b this should be sorted before the other field
@@ -158,30 +158,32 @@ public:
    mutable int4 opttype; ///< 0 = register read by a COPY, 1 = register written by a COPY (-1 otherwise)

    /// \brief Construct a record, initializing counts
+    ///
+    /// \param offset is the starting offset of the temporary range
+    /// \param size is the number of bytes in the range
    OptimizeRecord(uintb offset, int4 size) {
      this->offset = offset;
      this->size = size;
      writeop = -1; readop = -1; inslot=-1; writecount=0; readcount=0; writesection=-2; readsection=-2; opttype=-1;
    }
-    void copyFromExcludingSize(OptimizeRecord &that);
-    void update(int4 opIdx, int4 slotIdx, int4 secNum);
-    void updateRead(int4 i, int4 inslot, int4 secNum);
-    void updateWrite(int4 i, int4 secNum);
-    void updateExport();
-    void updateCombine(OptimizeRecord &that);
+    OptimizeRecord(vector<OptimizeRecord *> &records);	///< Constructor merging the given overlapping records into a single record
+    void updateRead(int4 i, int4 inslot, int4 secNum);	///< Mark \b this range as being read by a specific op
+    void updateWrite(int4 i, int4 secNum);		///< Mark \b this range as being written by a specific op
+    void updateExport(void);				///< Mark \b this range as exported from the constructor
+    void updateCombine(OptimizeRecord &that);		///< Merge another record's read and write references into \b this
  };
 private:
+  /// \brief Container of OptimizeRecords for possibly overlapping temporary registers
  class UniqueState {
-    map<uintb,OptimizeRecord> recs;
-    static uintb endOf(map<uintb,OptimizeRecord>::iterator &iter) { return iter->first + iter->second.size; }
-    OptimizeRecord coalesce(vector<OptimizeRecord*> &records);
-    map<uintb,OptimizeRecord>::iterator lesserIter(uintb offset);
+    map<uintb,OptimizeRecord> recs;	///< Map from unique space offset to OptimizeRecord
+    static uintb endOf(map<uintb,OptimizeRecord>::iterator &iter) { return iter->first + iter->second.size; }	///< Get ending offset record
+    map<uintb,OptimizeRecord>::iterator lesserIter(uintb offset);	///< Get last record less than the given offset
  public:
-    void clear(void) { recs.clear(); }
-    void set(uintb offset, int4 size, OptimizeRecord &rec);
-    void getDefinitions(vector<OptimizeRecord*> &result, uintb offset, int4 size);
-    map<uintb,OptimizeRecord>::const_iterator begin(void) const { return recs.begin(); }
-    map<uintb,OptimizeRecord>::const_iterator end(void) const { return recs.end(); }
+    void clear(void) { recs.clear(); }					///< Clear all records
+    void set(OptimizeRecord &rec);					///< Add a new record to the collection
+    void getDefinitions(vector<OptimizeRecord*> &result, uintb offset, int4 size);	///< Get any definitions overlapping the given range
+    map<uintb,OptimizeRecord>::const_iterator begin(void) const { return recs.begin(); }	///< Get starting iterator to all records
+    map<uintb,OptimizeRecord>::const_iterator end(void) const { return recs.end(); }		///< Get ending iterator to all records
  };

  SleighCompile *compiler;	///< Parsed form of the SLEIGH file being examined
@@ -480,8 +482,6 @@ public:
  int4 run_compilation(const string &filein,const string &fileout);
 };

-ostream& operator<<(ostream &os, const ConsistencyChecker::OptimizeRecord &rec);
-
 extern SleighCompile *slgh;		///< A global reference to the SLEIGH compiler accessible to the parse functions
 extern int yydebug;			///< Debug state for the SLEIGH parse functions