Merge remote-tracking branch 'origin/caheckman_recentBranches'

2026-05-22 08:33:46 +08:00 · 2020-05-22 13:29:33 -04:00
parent 7af55169c0 39cbcd33ae
commit 875eed4c3b
60 changed files with 2939 additions and 1043 deletions
@@ -252,9 +252,11 @@
        <H3><A name="c_cpp"/><A name="Options_C_C__""/>C/C++</H3>

        <BLOCKQUOTE>
-          <P>Create a C/C++ file containing all datatypes from the program's <A href=
-          "help/topics/DataTypeManagerPlugin/data_type_manager_description.htm">data type
-          manager</A> and all of the functions in the program.&nbsp;</P>
+          <P>Create a C/C++ file containing functions decompiled from the program and, optionally,
+          containing definitions of all datatypes from the program's
+          <A href="help/topics/DataTypeManagerPlugin/data_type_manager_description.htm">
+          data type manager</A>. The datatype definitions and a prototype declaration for each function
+          can be placed in a separate header file.&nbsp;</P>

          <H4>C/C++ Options</H4>

@@ -266,6 +268,14 @@
            <LI><B>Create C File (.c)</B> - Select to create a .c file.</LI>

            <LI><B>Use C++ Style Comments (//)</B> - Select to use // or /* style comments.</LI>
+            
+            <LI><B>Emit data-type definitions</B> - Select to export a C/C++ definition for each data-type.</LI>
+            
+            <LI><B>Function tags to filter</B> - Optionally list function tags to filter which functions are exported.
+            Multiple tags should be comma separated.</LI>
+            
+            <LI><B>Function tags excluded</B> - Select to exclude all tagged functions from being exported. Deselect to
+            export only tagged functions.</LI>
          </UL>
        </BLOCKQUOTE>

@@ -270,6 +270,7 @@ model {
 		                include "database.cc"
 		                include "cpool.cc"
 		                include "comment.cc"
+						include "stringmanage.cc"
 		                include "fspec.cc"
 		                include "action.cc"
 		                include "loadimage.cc"
@@ -320,6 +321,7 @@ model {
 		                include "cpool_ghidra.cc"
 		                include "ghidra_process.cc"
 		                include "comment_ghidra.cc"
+						include "string_ghidra.cc"
 		         //       include "callgraph.cc"			// uncomment for debug
 		         //       include "ifacedecomp.cc"		// uncomment for debug
 		         //       include "ifaceterm.cc"			// uncomment for debug
@@ -75,7 +75,7 @@ EXTERNAL_CONSOLEEXT_NAMES=$(subst .cc,,$(notdir $(EXTERNAL_CONSOLEEXT_SOURCE)))
 CORE=	xml space float address pcoderaw translate opcodes globalcontext
 # Additional core files for any projects that decompile
 DECCORE=capability architecture options graph cover block cast typeop database cpool \
-	comment fspec action loadimage grammar varnode op \
+	comment stringmanage fspec action loadimage grammar varnode op \
 	type variable varmap jumptable emulate emulateutil flow userop \
 	funcdata funcdata_block funcdata_op funcdata_varnode pcodeinject \
 	heritage prefersplit rangeutil ruleaction subflow blockaction merge double \
@@ -87,7 +87,7 @@ SLEIGH=	sleigh pcodeparse pcodecompile sleighbase slghsymbol \
 # Additional files for the GHIDRA specific build
 GHIDRA=	ghidra_arch inject_ghidra ghidra_translate loadimage_ghidra \
 	typegrp_ghidra database_ghidra ghidra_context cpool_ghidra \
-	ghidra_process comment_ghidra $(GHIDRAEXT_NAMES)
+	ghidra_process comment_ghidra string_ghidra $(GHIDRAEXT_NAMES)
 # Additional files specific to the sleigh compiler
 SLACOMP=slgh_compile slghparse slghscan
 # Additional special files that should not be considered part of the library
@@ -182,6 +182,12 @@ bool Action::setBreakPoint(uint4 tp,const string &specify)
  return false;
 }

+void Action::clearBreakPoints(void)
+
+{
+  breakpoint = 0;
+}
+
 /// If enabled, a warning will be printed whenever this action applies.
 /// The warning can be toggled for \b this Action or some sub-action by
 /// specifying its name.
@@ -371,6 +377,15 @@ void ActionGroup::addAction(Action *ac)
  list.push_back(ac);
 }

+void ActionGroup::clearBreakPoints(void)
+
+{
+  vector<Action *>::const_iterator iter;
+  for(iter=list.begin();iter!= list.end();++iter)
+    (*iter)->clearBreakPoints();
+  Action::clearBreakPoints();
+}
+
 Action *ActionGroup::clone(const ActionGroupList &grouplist) const

 {
@@ -870,6 +885,15 @@ int4 ActionPool::apply(Funcdata &data)
  return 0;			// Indicate successful completion
 }

+void ActionPool::clearBreakPoints(void)
+
+{
+  vector<Rule *>::const_iterator iter;
+  for(iter=allrules.begin();iter!=allrules.end();++iter)
+    (*iter)->clearBreakPoints();
+  Action::clearBreakPoints();
+}
+
 Action *ActionPool::clone(const ActionGroupList &grouplist) const

 {
@@ -955,13 +979,26 @@ ActionDatabase::~ActionDatabase(void)
    delete (*iter).second;
 }

-/// This provides the database with the single Action from which all other
-/// \e root Actions are derived.  The Action has a reserved name "universal"
-/// \param act is the universal Action
-void ActionDatabase::registerUniversal(Action *act)
+/// Clear out (possibly altered) root Actions. Reset the default groups.
+/// Set the default root action "decompile"
+void ActionDatabase::resetDefaults(void)

 {
-  registerAction(universalname,act);
+  Action *universalAction = (Action *)0;
+  map<string,Action *>::iterator iter;
+  iter = actionmap.find(universalname);
+  if (iter != actionmap.end())
+    universalAction = (*iter).second;
+  for(iter = actionmap.begin();iter!=actionmap.end();++iter) {
+    Action *curAction = (*iter).second;
+    if (curAction != universalAction)
+      delete curAction;		// Clear out any old (modified) root actions
+  }
+  actionmap.clear();
+  registerAction(universalname, universalAction);
+
+  buildDefaultGroups();
+  setCurrent("decompile");	// The default root action
 }

 const ActionGroupList &ActionDatabase::getGroup(const string &grp) const
@@ -1019,13 +1056,15 @@ Action *ActionDatabase::toggleAction(const string &grp, const string &basegrp,bo
 /// \param argv is a list of static char pointers, which must end with a NULL pointer, or a zero length string.
 void ActionDatabase::setGroup(const string &grp,const char **argv)

-{  ActionGroupList &curgrp( groupmap[ grp ] );
+{
+  ActionGroupList &curgrp( groupmap[ grp ] );
  curgrp.list.clear();		// Clear out any old members
  for(int4 i=0;;++i) {
    if (argv[i] == (char *)0) break;
    if (argv[i][0] == '\0') break;
    curgrp.list.insert( argv[i] );
  }
+  isDefaultGroups = false;
 }

 /// Copy an existing \e root Action by copying its grouplist, giving it a new name.
@@ -1038,6 +1077,7 @@ void ActionDatabase::cloneGroup(const string &oldname,const string &newname)
 {
  const ActionGroupList &curgrp(getGroup(oldname)); // Should already exist
  groupmap[ newname ] = curgrp;	// Copy the group
+  isDefaultGroups = false;
 }

 /// Add a group to the grouplist for a particular \e root Action.
@@ -1046,7 +1086,9 @@ void ActionDatabase::cloneGroup(const string &oldname,const string &newname)
 /// \param basegroup is the group to add
 /// \return \b true for a new addition, \b false is the group was already present
 bool ActionDatabase::addToGroup(const string &grp, const string &basegroup)
+
 {
+  isDefaultGroups = false;
  ActionGroupList &curgrp( groupmap[ grp ] );
  return curgrp.list.insert( basegroup ).second;
 }
@@ -1057,7 +1099,9 @@ bool ActionDatabase::addToGroup(const string &grp, const string &basegroup)
 /// \param basegrp is the group to remove
 /// \return \b true if the group existed and was removed
 bool ActionDatabase::removeFromGroup(const string &grp, const string &basegrp)
+
 {
+  isDefaultGroups = false;
  ActionGroupList &curgrp( groupmap[ grp ] );
  return (curgrp.list.erase(basegrp) > 0);
 }
@@ -99,6 +99,7 @@ public:
  virtual void printStatistics(ostream &s) const;		///< Dump statistics to stream
  int4 perform(Funcdata &data); 				///< Perform this action (if necessary)
  bool setBreakPoint(uint4 tp,const string &specify);		///< Set a breakpoint on this action
+  virtual void clearBreakPoints(void);				///< Clear all breakpoints set on \b this Action
  bool setWarning(bool val,const string &specify);		///< Set a warning on this action
  bool disableRule(const string &specify);			///< Disable a specific Rule within \b this
  bool enableRule(const string &specify);			///< Enable a specific Rule within \b this
@@ -147,6 +148,7 @@ public:
  ActionGroup(uint4 f,const string &nm) : Action(f,nm,"") {}	///< Construct given properties and a name
  virtual ~ActionGroup(void);				///< Destructor
  void addAction(Action *ac);				///< Add an Action to the group
+  virtual void clearBreakPoints(void);
  virtual Action *clone(const ActionGroupList &grouplist) const;
  virtual void reset(Funcdata &data);
  virtual void resetStats(void);
@@ -216,6 +218,7 @@ public:
  uint4 getNumApply(void) { return count_apply; }		///< Get number of successful applications
  void setBreak(uint4 tp) { breakpoint |= tp; }			///< Set a breakpoint on \b this Rule
  void clearBreak(uint4 tp) { breakpoint &= ~tp; }		///< Clear a breakpoint on \b this Rule
+  void clearBreakPoints(void) { breakpoint = 0; }		///< Clear all breakpoints on \b this Rule
  void turnOnWarnings(void) { flags |= warnings_on; }		///< Enable warnings for \b this Rule
  void turnOffWarnings(void) { flags &= ~warnings_on; }		///< Disable warnings for \b this Rule
  bool isDisabled(void) const { return ((flags & type_disable)!=0); }	///< Return \b true if \b this Rule is disabled
@@ -266,6 +269,7 @@ public:
  ActionPool(uint4 f,const string &nm) : Action(f,nm,"") {}	///< Construct providing properties and name
  virtual ~ActionPool(void);				///< Destructor
  void addRule(Rule *rl);				///< Add a Rule to the pool
+  virtual void clearBreakPoints(void);
  virtual Action *clone(const ActionGroupList &grouplist) const;
  virtual void reset(Funcdata &data);
  virtual void resetStats(void);
@@ -296,14 +300,16 @@ class ActionDatabase {
  string currentactname;			///< The name associated with the current root Action
  map<string,ActionGroupList> groupmap;		///< Map from root Action name to the grouplist it uses
  map<string,Action *> actionmap;		///< Map from name to root Action
+  bool isDefaultGroups;				///< \b true if only the default groups are set
  static const char universalname[];		///< The name of the \e universal root Action
  void registerAction(const string &nm,Action *act);	///< Register a \e root Action
+  void buildDefaultGroups(void);		///< Set up descriptions of preconfigured root Actions
  Action *getAction(const string &nm) const;				///< Look up a \e root Action by name
  Action *deriveAction(const string &baseaction,const string &grp);	///< Derive a \e root Action
 public:
-  ActionDatabase(void) { currentact = (Action *)0; }	///< Constructor
+  ActionDatabase(void) { currentact = (Action *)0; isDefaultGroups = false; }	///< Constructor
  ~ActionDatabase(void);				///< Destructor
-  void registerUniversal(Action *act);			///< Register the \e universal root Action
+  void resetDefaults(void);			///< (Re)set the default configuration
  Action *getCurrent(void) const { return currentact; }	///< Get the current \e root Action
  const string &getCurrentName(void) const { return currentactname; }	///< Get the name of the current \e root Action
  const ActionGroupList &getGroup(const string &grp) const;	///< Get a specific grouplist by name
@@ -314,6 +320,7 @@ public:
  void cloneGroup(const string &oldname,const string &newname);		///< Clone a \e root Action
  bool addToGroup(const string &grp,const string &basegroup);		///< Add a group to a \e root Action
  bool removeFromGroup(const string &grp,const string &basegroup);	///< Remove a group from a \e root Action
+  void universalAction(Architecture *glb);		///< Build the universal action
 };

 #endif
@@ -16,6 +16,7 @@
 // Set up decompiler for specific architectures

 #include "coreaction.hh"
+#include "flow.hh"
 #ifdef CPUI_RULECOMPILE
 #include "rulecompile.hh"
 #endif
@@ -86,17 +87,10 @@ Architecture::Architecture(void)

 {
  //  endian = -1;
-  trim_recurse_max = 5;		// Reasonable default value
-  max_implied_ref = 2;		// 2 is best, in specific cases a higher number might be good
-  max_term_duplication = 2;	// 2 and 3 (4) are pretty reasonable
-  max_basetype_size = 10;	// Needs to be 8 or bigger
+  resetDefaultsInternal();
  min_funcsymbol_size = 1;
  aggressive_ext_trim = false;
-  readonlypropagate = false;
-  infer_pointers = true;
  funcptr_align = 0;
-  flowoptions = 0;
-  alias_block_level = 2;	// Block structs and arrays by default
  defaultfp = (ProtoModel *)0;
  defaultReturnAddr.space = (AddrSpace *)0;
  evalfp_current = (ProtoModel *)0;
@@ -106,6 +100,7 @@ Architecture::Architecture(void)
  loader = (LoadImage *)0;
  pcodeinjectlib = (PcodeInjectLibrary *)0;
  commentdb = (CommentDatabase *)0;
+  stringManager = (StringManager *)0;
  cpool = (ConstantPool *)0;
  symboltab = new Database(this);
  context = (ContextDatabase *)0;
@@ -158,6 +153,8 @@ Architecture::~Architecture(void)
    delete pcodeinjectlib;
  if (commentdb != (CommentDatabase *)0)
    delete commentdb;
+  if (stringManager != (StringManager *)0)
+    delete stringManager;
  if (cpool != (ConstantPool *)0)
    delete cpool;
  if (context != (ContextDatabase *)0)
@@ -274,6 +271,7 @@ void Architecture::clearAnalysis(Funcdata *fd)
  fd->clear();			// Clear stuff internal to function
  // Clear out any analysis generated comments
  commentdb->clearType(fd->getAddress(),Comment::warning|Comment::warningheader);
+  stringManager->clear();
 }

 /// Symbols do not necessarily need to be available for the decompiler.
@@ -411,6 +409,7 @@ void Architecture::saveXml(ostream &s) const
  symboltab->saveXml(s);
  context->saveXml(s);
  commentdb->saveXml(s);
+  stringManager->saveXml(s);
  if (!cpool->empty())
    cpool->saveXml(s);
  s << "</save_state>\n";
@@ -443,6 +442,8 @@ void Architecture::restoreXml(DocumentStorage &store)
      context->restoreXml(subel,this);
    else if (subel->getName() == "commentdb")
      commentdb->restoreXml(subel,this);
+    else if (subel->getName() == "stringmanage")
+      stringManager->restoreXml(subel,this);
    else if (subel->getName() == "constantpool")
      cpool->restoreXml(subel,*types);
    else if (subel->getName() == "optionslist")
@@ -508,8 +509,8 @@ void Architecture::buildAction(DocumentStorage &store)

 {
  parseExtraRules(store);	// Look for any additional rules
-  universal_action(this);
-  allacts.setCurrent("decompile");
+  allacts.universalAction(this);
+  allacts.resetDefaults();
 }

 /// This builds the database which holds the status registers setings and other
@@ -581,6 +582,14 @@ void Architecture::buildCommentDB(DocumentStorage &store)
  commentdb = new CommentDatabaseInternal();
 }

+/// Build container that holds decoded strings
+/// \param store may hold configuration information
+void Architecture::buildStringManager(DocumentStorage &store)
+
+{
+  stringManager = new StringManagerUnicode(this,2048);
+}
+
 /// Some processor models (Java byte-code) need a database of constants.
 /// The database is always built, but may remain empty.
 /// \param store may hold configuration information
@@ -1243,6 +1252,7 @@ void Architecture::init(DocumentStorage &store)
  buildContext(store);
  buildTypegrp(store);
  buildCommentDB(store);
+  buildStringManager(store);
  buildConstantPool(store);

  restoreFromSpec(store);
@@ -1253,6 +1263,31 @@ void Architecture::init(DocumentStorage &store)
  fillinReadOnlyFromLoader();
 }

+void Architecture::resetDefaultsInternal(void)
+
+{
+  trim_recurse_max = 5;
+  max_implied_ref = 2;		// 2 is best, in specific cases a higher number might be good
+  max_term_duplication = 2;	// 2 and 3 (4) are reasonable
+  max_basetype_size = 10;	// Needs to be 8 or bigger
+  flowoptions = FlowInfo::error_toomanyinstructions;
+  max_instructions = 100000;
+  infer_pointers = true;
+  readonlypropagate = false;
+  alias_block_level = 2;	// Block structs and arrays by default
+}
+
+/// Reset options that can be modified by the OptionDatabase. This includes
+/// options specific to this class and options under PrintLanguage and ActionDatabase
+void Architecture::resetDefaults(void)
+
+{
+  resetDefaultsInternal();
+  allacts.resetDefaults();
+  for(int4 i=0;i<printlist.size();++i)
+    printlist[i]->resetDefaults();
+}
+
 Address SegmentedResolver::resolve(uintb val,int4 sz,const Address &point,uintb &fullEncoding)

 {
@@ -28,6 +28,7 @@
 #include "loadimage.hh"
 #include "globalcontext.hh"
 #include "comment.hh"
+#include "stringmanage.hh"
 #include "userop.hh"
 #include "options.hh"
 #include "transform.hh"
@@ -130,6 +131,7 @@ public:
  vector<AddrSpace *> inferPtrSpaces;	///< Set of address spaces in which a pointer constant is inferable
  int4 funcptr_align;		///< How many bits of alignment a function ptr has
  uint4 flowoptions;            ///< options passed to flow following engine
+  uint4 max_instructions;	///< Maximum instructions that can be processed in one function
  int4 alias_block_level;	///< Aliases blocked by 0=none, 1=struct, 2=array, 3=all
  vector<Rule *> extra_pool_rules; ///< Extra rules that go in the main pool (cpu specific, experimental)

@@ -146,6 +148,7 @@ public:
  PcodeInjectLibrary *pcodeinjectlib;	///< Pcode injection manager
  RangeList nohighptr;          ///< Ranges for which high-level pointers are not possible
  CommentDatabase *commentdb;	///< Comments for this architecture
+  StringManager *stringManager;	///< Manager of decoded strings
  ConstantPool *cpool;		///< Deferred constant values
  PrintLanguage *print;	        ///< Current high-level language printer
  vector<PrintLanguage *> printlist;	///< List of high-level language printers supported
@@ -164,6 +167,8 @@ public:
 #endif
  Architecture(void);		///< Construct an uninitialized Architecture
  void init(DocumentStorage &store); ///< Load the image and configure architecture
+  void resetDefaultsInternal(void);	///< Reset default values for options specific to Architecture
+  void resetDefaults(void);		///< Reset defaults values for options owned by \b this
  ProtoModel *getModel(const string &nm) const;		///< Get a specific PrototypeModel
  bool hasModel(const string &nm) const;		///< Does this Architecture have a specific PrototypeModel
  bool highPtrPossible(const Address &loc,int4 size) const; ///< Are pointers possible to the given location?
@@ -224,6 +229,7 @@ protected:

  virtual void buildTypegrp(DocumentStorage &store);		///< Build the data-type factory/container
  virtual void buildCommentDB(DocumentStorage &store);		///< Build the comment database
+  virtual void buildStringManager(DocumentStorage &store);	///< Build the string manager
  virtual void buildConstantPool(DocumentStorage &store);	///< Build the constant pool
  virtual void buildInstructions(DocumentStorage &store);	///< Register the p-code operations
  virtual void buildAction(DocumentStorage &store);		///< Build the Action framework
@@ -232,9 +232,14 @@ Datatype *CastStrategyC::castStandard(Datatype *reqtype,Datatype *curtype,
    isptr = true;
  }
  if (curtype == reqtype) return (Datatype *)0;	// Different typedefs could point to the same type
-  if ((reqbase->getMetatype()==TYPE_VOID)||(reqbase->getMetatype()==TYPE_VOID))
+  if ((reqbase->getMetatype()==TYPE_VOID)||(curtype->getMetatype()==TYPE_VOID))
    return (Datatype *)0;	// Don't cast from or to VOID
-  if (reqbase->getSize() != curbase->getSize()) return reqtype; // Always cast change in size
+  if (reqbase->getSize() != curbase->getSize()) {
+    if (reqbase->isVariableLength() && isptr && reqbase->hasSameVariableBase(curbase)) {
+      return (Datatype *)0;	// Don't need a cast
+    }
+    return reqtype; // Otherwise, always cast change in size
+  }
  switch(reqbase->getMetatype()) {
  case TYPE_UNKNOWN:
    return (Datatype *)0;
@@ -370,7 +375,7 @@ Datatype *CastStrategyJava::castStandard(Datatype *reqtype,Datatype *curtype,
  if ((reqbase->getMetatype()==TYPE_PTR)||(curbase->getMetatype()==TYPE_PTR))
    return (Datatype *)0;		// There must be explicit cast op between objects, so assume no cast necessary

-  if ((reqbase->getMetatype()==TYPE_VOID)||(reqbase->getMetatype()==TYPE_VOID))
+  if ((reqbase->getMetatype()==TYPE_VOID)||(curtype->getMetatype()==TYPE_VOID))
    return (Datatype *)0;	// Don't cast from or to VOID
  if (reqbase->getSize() != curbase->getSize()) return reqtype; // Always cast change in size
  switch(reqbase->getMetatype()) {
@@ -759,6 +759,15 @@ bool ConditionalExecution::verify(void)
  return true;
 }

+/// Set up for testing ConditionalExecution on multiple iblocks
+/// \param f is the function to do testing on
+ConditionalExecution::ConditionalExecution(Funcdata *f)
+
+{
+  fd = f;
+  buildHeritageArray();	// Cache an array depending on the particular heritage pass
+}
+
 /// The given block is tested as a possible \b iblock. If this configuration
 /// works and is not a \b directsplit, \b true is returned.
 /// If the configuration works as a \b directsplit, then recursively check that
@@ -772,7 +781,6 @@ bool ConditionalExecution::trial(BlockBasic *ib)

 {
  iblock = ib;
-  buildHeritageArray();
  if (!verify()) return false;

  PcodeOp *cbranch_copy;
@@ -160,7 +160,7 @@ class ConditionalExecution {
  void fixReturnOp(void);
  bool verify(void);				///< Verify that we have a removable \b iblock
 public:
-  ConditionalExecution(Funcdata *f) { fd = f; }	///< Constructor
+  ConditionalExecution(Funcdata *f);		///< Constructor
  bool trial(BlockBasic *ib);			///< Test for a modifiable configuration around the given block
  void execute(void);				///< Eliminate the unnecessary path join at \b iblock
 };
@@ -170,29 +170,31 @@ int main(int argc,char **argv)
 {
  const char *initscript = (const char *)0;

-  vector<string> extrapaths;
-  int4 i=1;
-  while((i<argc)&&(argv[i][0]=='-')) {
-    if (argv[i][1]=='i')
-      initscript = argv[++i];
-    else if (argv[i][1]=='s')
-      extrapaths.push_back(argv[++i]);
-    i += 1;
-  }
-
-  string ghidraroot = FileManage::discoverGhidraRoot(argv[0]);
-  if (ghidraroot.size() == 0) {
-    const char *sleighhomepath = getenv("SLEIGHHOME");
-    if (sleighhomepath == (const char *)0) {
-      if (extrapaths.empty()) {
-	cerr << "Could not discover root of Ghidra installation" << endl;
-	exit(1);
-      }
+  {
+    vector<string> extrapaths;
+    int4 i = 1;
+    while ((i < argc) && (argv[i][0] == '-')) {
+      if (argv[i][1] == 'i')
+	initscript = argv[++i];
+      else if (argv[i][1] == 's')
+	extrapaths.push_back(argv[++i]);
+      i += 1;
    }
-    else
-      ghidraroot = sleighhomepath;
+
+    string ghidraroot = FileManage::discoverGhidraRoot(argv[0]);
+    if (ghidraroot.size() == 0) {
+      const char *sleighhomepath = getenv("SLEIGHHOME");
+      if (sleighhomepath == (const char*) 0) {
+	if (extrapaths.empty()) {
+	  cerr << "Could not discover root of Ghidra installation" << endl;
+	  exit(1);
+	}
+      }
+      else
+	ghidraroot = sleighhomepath;
+    }
+    startDecompilerLibrary(ghidraroot.c_str(), extrapaths);
  }
-  startDecompilerLibrary(ghidraroot.c_str(),extrapaths);

  IfaceStatus *status;
  try {
@@ -607,6 +607,7 @@ int4 ActionLaneDivide::apply(Funcdata &data)
    if (allStorageProcessed) break;
  }
  data.clearLanedAccessMap();
+  data.setLanedRegGenerated();
  return 0;
 }

@@ -1209,31 +1210,12 @@ int4 ActionDeindirect::apply(Funcdata &data)
  return 0;
 }

-/// Check if the given Varnode has a matching LanedRegister record. If so, add its
-/// storage location to the given function's laned access list.
-/// \param data is the given function
-/// \param vn is the given Varnode
-void ActionVarnodeProps::markLanedVarnode(Funcdata &data,Varnode *vn)
-
-{
-  if (vn->isConstant()) return;
-  Architecture *glb = data.getArch();
-  const LanedRegister *lanedRegister  = glb->getLanedRegister(vn->getAddr(),vn->getSize());
-  if (lanedRegister != (const LanedRegister *)0)
-    data.markLanedVarnode(vn,lanedRegister);
-}
-
 int4 ActionVarnodeProps::apply(Funcdata &data)

 {
  Architecture *glb = data.getArch();
  bool cachereadonly = glb->readonlypropagate;
-  int4 minLanedSize = 1000000;		// Default size meant to filter no Varnodes
-  if (!data.isLanedRegComplete()) {
-    int4 sz = glb->getMinimumLanedRegisterSize();
-    if (sz > 0)
-      minLanedSize = sz;
-  }
+  int4 pass = data.getHeritagePass();
  VarnodeLocSet::const_iterator iter;
  Varnode *vn;

@@ -1242,9 +1224,29 @@ int4 ActionVarnodeProps::apply(Funcdata &data)
    vn = *iter++;		// Advance iterator in case vn is deleted
    if (vn->isAnnotation()) continue;
    int4 vnSize = vn->getSize();
-    if (vnSize >= minLanedSize)
-      markLanedVarnode(data, vn);
-    if (vn->hasActionProperty()) {
+    if (vn->isAutoLiveHold()) {
+      if (pass > 0) {
+	if (vn->isWritten()) {
+	  PcodeOp *loadOp = vn->getDef();
+	  if (loadOp->code() == CPUI_LOAD) {
+	    Varnode *ptr = loadOp->getIn(1);
+	    if (ptr->isConstant() || ptr->isReadOnly())
+	      continue;
+	    if (ptr->isWritten()) {
+	      PcodeOp *copyOp = ptr->getDef();
+	      if (copyOp->code() == CPUI_COPY) {
+		ptr = copyOp->getIn(0);
+		if (ptr->isConstant() || ptr->isReadOnly())
+		  continue;
+	      }
+	    }
+	  }
+	}
+	vn->clearAutoLiveHold();
+	count += 1;
+      }
+    }
+    else if (vn->hasActionProperty()) {
      if (cachereadonly&&vn->isReadOnly()) {
 	if (data.fillinReadOnly(vn)) // Try to replace vn with its lookup in LoadImage
 	  count += 1;
@@ -3404,6 +3406,84 @@ uintb ActionDeadCode::gatherConsumedReturn(Funcdata &data)
  return consumeVal;
 }

+/// \brief Determine if the given Varnode may eventually collapse to a constant
+///
+/// Recursively check if the Varnode is either:
+///   - Copied from a constant
+///   - The result of adding constants
+///   - Loaded from a pointer that is a constant
+///
+/// \param vn is the given Varnode
+/// \param addCount is the number of CPUI_INT_ADD operations seen so far
+/// \param loadCount is the number of CPUI_LOAD operations seen so far
+/// \return \b true if the Varnode (might) collapse to a constant
+bool ActionDeadCode::isEventualConstant(Varnode *vn,int4 addCount,int4 loadCount)
+
+{
+  if (vn->isConstant()) return true;
+  if (!vn->isWritten()) return false;
+  PcodeOp *op = vn->getDef();
+  while(op->code() == CPUI_COPY) {
+    vn = op->getIn(0);
+    if (vn->isConstant()) return true;
+    if (!vn->isWritten()) return false;
+    op = vn->getDef();
+  }
+  switch(op->code()) {
+    case CPUI_INT_ADD:
+      if (addCount > 0) return false;
+      if (!isEventualConstant(op->getIn(0),addCount+1,loadCount))
+	return false;
+      return isEventualConstant(op->getIn(1),addCount+1,loadCount);
+    case CPUI_LOAD:
+      if (loadCount > 0) return false;
+      return isEventualConstant(op->getIn(1),0,loadCount+1);
+    case CPUI_INT_LEFT:
+    case CPUI_INT_RIGHT:
+    case CPUI_INT_SRIGHT:
+    case CPUI_INT_MULT:
+      if (!op->getIn(1)->isConstant())
+	return false;
+      return isEventualConstant(op->getIn(0),addCount,loadCount);
+    case CPUI_INT_ZEXT:
+    case CPUI_INT_SEXT:
+      return isEventualConstant(op->getIn(0),addCount,loadCount);
+    default:
+      break;
+  }
+  return false;
+}
+
+/// \brief Check if there are any unconsumed LOADs that may be from volatile addresses.
+///
+/// It may be too early to remove certain LOAD operations even though their result isn't
+/// consumed because it be of a volatile address with side effects.  If a LOAD meets this
+/// criteria, it is added to the worklist and \b true is returned.
+/// \param data is the function being analyzed
+/// \return \b true if there was at least one LOAD added to the worklist
+bool ActionDeadCode::lastChanceLoad(Funcdata &data,vector<Varnode *> &worklist)
+
+{
+  if (data.getHeritagePass() > 1) return false;
+  if (data.isJumptableRecoveryOn()) return false;
+  list<PcodeOp *>::const_iterator iter = data.beginOp(CPUI_LOAD);
+  list<PcodeOp *>::const_iterator enditer = data.endOp(CPUI_LOAD);
+  bool res = false;
+  while(iter != enditer) {
+    PcodeOp *op = *iter;
+    ++iter;
+    if (op->isDead()) continue;
+    Varnode *vn = op->getOut();
+    if (vn->isConsumeVacuous()) continue;
+    if (isEventualConstant(op->getIn(1), 0, 0)) {
+      pushConsumed(~(uintb)0, vn, worklist);
+      vn->setAutoLiveHold();
+      res = true;
+    }
+  }
+  return res;
+}
+
 int4 ActionDeadCode::apply(Funcdata &data)

 {
@@ -3446,11 +3526,11 @@ int4 ActionDeadCode::apply(Funcdata &data)

    op->clearIndirectSource();
    if (op->isCall()) {
-      if (op->code() == CPUI_CALLOTHER) {
+      // Postpone setting consumption on CALL and CALLIND inputs
+      if (op->isCallWithoutSpec()) {
 	for(i=0;i<op->numInput();++i)
 	  pushConsumed(~((uintb)0),op->getIn(i),worklist);
      }
-      // Postpone setting consumption on CALL and CALLIND inputs
      if (!op->isAssignment())
 	continue;
    }
@@ -3497,6 +3577,11 @@ int4 ActionDeadCode::apply(Funcdata &data)
  while(!worklist.empty())
    propagateConsumed(worklist);

+  if (lastChanceLoad(data, worklist)) {
+    while(!worklist.empty())
+      propagateConsumed(worklist);
+  }
+
  for(i=0;i<manage->numSpaces();++i) {
    spc = manage->getSpace(i);
    if (spc == (AddrSpace *)0 || !spc->doesDeadcode()) continue;
@@ -4724,11 +4809,12 @@ void TermOrder::sortTerms(void)
  sort(sorter.begin(),sorter.end(),additiveCompare);
 }

-/// Build the default \e root Actions: decompile, jumptable, normalize, paramid, register, firstpass
-/// \param allacts is the database that will hold the \e root Actions
-void build_defaultactions(ActionDatabase &allacts)
+/// (Re)build the default \e root Actions: decompile, jumptable, normalize, paramid, register, firstpass
+void ActionDatabase::buildDefaultGroups(void)

 {
+  if (isDefaultGroups) return;
+  groupmap.clear();
  const char *members[] = { "base", "protorecovery", "protorecovery_a", "deindirect", "localrecovery",
 			    "deadcode", "typerecovery", "stackptrflow",
 			    "blockrecovery", "stackvars", "deadcontrolflow", "switchnorm",
@@ -4737,36 +4823,37 @@ void build_defaultactions(ActionDatabase &allacts)
 			    "segment", "returnsplit", "nodejoin", "doubleload", "doubleprecis",
 			    "unreachable", "subvar", "floatprecision", 
 			    "conditionalexe", "" };
-  allacts.setGroup("decompile",members);
+  setGroup("decompile",members);

  const char *jumptab[] = { "base", "noproto", "localrecovery", "deadcode", "stackptrflow",
 			    "stackvars", "analysis", "segment", "subvar", "conditionalexe", "" };
-  allacts.setGroup("jumptable",jumptab);
+  setGroup("jumptable",jumptab);

 const  char *normali[] = { "base", "protorecovery", "protorecovery_b", "deindirect", "localrecovery",
 			    "deadcode", "stackptrflow", "normalanalysis",
 			    "stackvars", "deadcontrolflow", "analysis", "fixateproto", "nodejoin",
 			    "unreachable", "subvar", "floatprecision", "normalizebranches",
 			    "conditionalexe", "" };
-  allacts.setGroup("normalize",normali);
+  setGroup("normalize",normali);

  const  char *paramid[] = { "base", "protorecovery", "protorecovery_b", "deindirect", "localrecovery",
                             "deadcode", "typerecovery", "stackptrflow", "siganalysis",
                             "stackvars", "deadcontrolflow", "analysis", "fixateproto",
                             "unreachable", "subvar", "floatprecision",
                             "conditionalexe", "" };
-  allacts.setGroup("paramid",paramid);
+  setGroup("paramid",paramid);

  const char *regmemb[] = { "base", "analysis", "subvar", "" };
-  allacts.setGroup("register",regmemb);
+  setGroup("register",regmemb);

  const char *firstmem[] = { "base", "" };
-  allacts.setGroup("firstpass",firstmem);
+  setGroup("firstpass",firstmem);
+  isDefaultGroups = true;
 }

 /// Construct the \b universal Action that contains all possible components
 /// \param conf is the Architecture that will use the Action
-void universal_action(Architecture *conf)
+void ActionDatabase::universalAction(Architecture *conf)

 {
  vector<Rule *>::iterator iter;
@@ -4778,9 +4865,8 @@ void universal_action(Architecture *conf)
  ActionGroup *actstackstall;
  AddrSpace *stackspace = conf->getStackSpace();

-  build_defaultactions(conf->allacts);
  act = new ActionRestartGroup(Action::rule_onceperfunc,"universal",1);
-  conf->allacts.registerUniversal(act);
+  registerAction(universalname,act);

  act->addAction( new ActionStart("base"));
  act->addAction( new ActionConstbase("base"));
@@ -211,9 +211,7 @@ public:
 ///   - Read-only Varnodes are converted to the underlying constant
 ///   - Volatile Varnodes are converted read/write functions
 ///   - Varnodes whose values are not consumed are replaced with constant 0 Varnodes
-///   - Large Varnodes are flagged for lane analysis
 class ActionVarnodeProps : public Action {
-  void markLanedVarnode(Funcdata &data,Varnode *vn);	///< Mark possible laned register storage
 public:
  ActionVarnodeProps(const string &g) : Action(0,"varnodeprops",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
@@ -543,6 +541,8 @@ class ActionDeadCode : public Action {
  static bool neverConsumed(Varnode *vn,Funcdata &data);
  static void markConsumedParameters(FuncCallSpecs *fc,vector<Varnode *> &worklist);
  static uintb gatherConsumedReturn(Funcdata &data);
+  static bool isEventualConstant(Varnode *vn,int4 addCount,int4 loadCount);
+  static bool lastChanceLoad(Funcdata &data,vector<Varnode *> &worklist);
 public:
  ActionDeadCode(const string &g) : Action(0,"deadcode",g) {}	///< Constructor
  virtual Action *clone(const ActionGroupList &grouplist) const {
@@ -553,7 +553,7 @@ void ParamListStandard::assignMap(const vector<Datatype *> &proto,bool isinput,T
 	  spc = typefactory.getArch()->getDefaultDataSpace();
 	int4 pointersize = spc->getAddrSize();
 	int4 wordsize = spc->getWordSize();
-	Datatype *pointertp = typefactory.getTypePointerAbsolute(pointersize,proto[i],wordsize);
+	Datatype *pointertp = typefactory.getTypePointer(pointersize,proto[i],wordsize);
 	res.back().addr = assignAddress(pointertp,status);
 	res.back().type = pointertp;
 	res.back().flags = Varnode::indirectstorage;
@@ -1102,7 +1102,7 @@ void ParamListStandardOut::assignMap(const vector<Datatype *> &proto,bool isinpu
      spc = typefactory.getArch()->getDefaultDataSpace();
    int4 pointersize = spc->getAddrSize();
    int4 wordsize = spc->getWordSize();
-    Datatype *pointertp = typefactory.getTypePointerAbsolute(pointersize, proto[0], wordsize);
+    Datatype *pointertp = typefactory.getTypePointer(pointersize, proto[0], wordsize);
    res.back().addr = assignAddress(pointertp,status);
    if (res.back().addr.isInvalid())
      throw ParamUnassignedError("Cannot assign return value as a pointer");
@@ -36,6 +36,7 @@ Funcdata::Funcdata(const string &nm,Scope *scope,const Address &addr,int4 sz)
  high_level_index = 0;
  cast_phase_index = 0;
  glb = scope->getArch();
+  minLanedSize = glb->getMinimumLanedRegisterSize();
  name = nm;

  size = sz;
@@ -69,6 +70,7 @@ void Funcdata::clear(void)
  clean_up_index = 0;
  high_level_index = 0;
  cast_phase_index = 0;
+  minLanedSize = glb->getMinimumLanedRegisterSize();

  localmap->clearUnlocked();	// Clear non-permanent stuff
  localmap->resetLocalWindow();
@@ -134,7 +136,7 @@ void Funcdata::startProcessing(void)
    warningHeader("This is an inlined function");
  Address baddr(baseaddr.getSpace(),0);
  Address eaddr(baseaddr.getSpace(),~((uintb)0));
-  followFlow(baddr,eaddr,0);
+  followFlow(baddr,eaddr);
  structureReset();
  sortCallSpecs();		// Must come after structure reset
  heritage.buildInfoList();
@@ -343,7 +345,7 @@ void Funcdata::spacebaseConstant(PcodeOp *op,int4 slot,SymbolEntry *entry,const

  Symbol *sym = entry->getSymbol();
  Datatype *entrytype = sym->getType();
-  Datatype *ptrentrytype = glb->types->getTypePointer(sz,entrytype,spaceid->getWordSize());
+  Datatype *ptrentrytype = glb->types->getTypePointerStripArray(sz,entrytype,spaceid->getWordSize());
  bool typelock = sym->isTypeLocked();
  if (typelock && (entrytype->getMetatype() == TYPE_UNKNOWN))
    typelock = false;
@@ -56,13 +56,13 @@ class Funcdata {
    restart_pending = 0x200,	///< Analysis must be restarted (because of new override info)
    unimplemented_present = 0x400,	///< Set if function contains unimplemented instructions
    baddata_present = 0x800,	///< Set if function flowed into bad data
-    double_precis_on = 0x1000,	///< Set if we are performing double precision recovery
-    big_varnodes_generated = 0x2000	///< Set when search for laned registers is complete
+    double_precis_on = 0x1000	///< Set if we are performing double precision recovery
  };
  uint4 flags;			///< Boolean properties associated with \b this function
  uint4 clean_up_index;		///< Creation index of first Varnode created after start of cleanup
  uint4 high_level_index;	///< Creation index of first Varnode created after HighVariables are created
  uint4 cast_phase_index;	///< Creation index of first Varnode created after ActionSetCasts
+  uint4 minLanedSize;		///< Minimum Varnode size to check as LanedRegister
  Architecture *glb;		///< Global configuration data
  string name;			///< Name of function
  int4 size;			///< Number of bytes of binary data in function body
@@ -132,8 +132,7 @@ public:
  bool isTypeRecoveryOn(void) const { return ((flags&typerecovery_on)!=0); }	///< Has data-type recovery processes started
  bool hasNoCode(void) const { return ((flags & no_code)!=0); }		///< Return \b true if \b this function has no code body
  void setNoCode(bool val) { if (val) flags |= no_code; else flags &= ~no_code; }	///< Toggle whether \b this has a body
-  bool isLanedRegComplete(void) const { return ((flags&big_varnodes_generated)!=0); }	///< Have potential laned registers been generated
-  void setLanedRegGenerated(void) { flags |= big_varnodes_generated; }	///< Mark that laned registers have been collected
+  void setLanedRegGenerated(void) { minLanedSize = 1000000; }	///< Mark that laned registers have been collected

  /// \brief Toggle whether \b this is being used for jump-table recovery
  ///
@@ -161,7 +160,7 @@ public:
  void startCleanUp(void) { clean_up_index = vbank.getCreateIndex(); }	///< Start \e clean-up phase
  uint4 getCleanUpIndex(void) const { return clean_up_index; }	///< Get creation index at the start of \b clean-up phase

-  void followFlow(const Address &baddr,const Address &eadddr,uint4 insn_max);
+  void followFlow(const Address &baddr,const Address &eadddr);
  void truncatedFlow(const Funcdata *fd,const FlowInfo *flow);
  bool inlineFlow(Funcdata *inlinefd,FlowInfo &flow,PcodeOp *callop);
  void overrideFlow(const Address &addr,uint4 type);
@@ -201,6 +200,8 @@ public:
  Varnode *findSpacebaseInput(AddrSpace *id) const;
  void spacebaseConstant(PcodeOp *op,int4 slot,SymbolEntry *entry,const Address &rampoint,uintb origval,int4 origsize);

+  int4 getHeritagePass(void) const { return heritage.getPass(); }	///< Get overall count of heritage passes
+
  /// \brief Get the number of heritage passes performed for the given address space
  ///
  /// \param spc is the address space
@@ -350,7 +351,7 @@ public:
  /// \brief End of (input or free) Varnodes at a given storage address
  VarnodeDefSet::const_iterator endDef(uint4 fl,const Address &addr) const { return vbank.endDef(fl,addr); }

-  void markLanedVarnode(Varnode *vn,const LanedRegister *lanedReg);	///< Mark Varnode as potential laned register
+  void checkForLanedRegister(int4 size,const Address &addr);	///< Check for a potential laned register
  map<VarnodeData,const LanedRegister *>::const_iterator beginLaneAccess(void) const { return lanedMap.begin(); }	///< Beginning iterator over laned accesses
  map<VarnodeData,const LanedRegister *>::const_iterator endLaneAccess(void) const { return lanedMap.end(); }	///< Ending iterator over laned accesses
  void clearLanedAccessMap(void) { lanedMap.clear(); }	///< Clear records from the laned access list
@@ -434,7 +435,7 @@ public:
  void opMarkNonPrinting(PcodeOp *op) { op->setFlag(PcodeOp::nonprinting); }	///< Mark PcodeOp as not being printed
  void opMarkSpecialPrint(PcodeOp *op) { op->setAdditionalFlag(PcodeOp::special_print); }	///< Mark PcodeOp as needing special printing
  void opMarkNoCollapse(PcodeOp *op) { op->setFlag(PcodeOp::nocollapse); }	///< Mark PcodeOp as not collapsible
-  void opMarkCpoolTransformed(PcodeOp *op) { op->setFlag(PcodeOp::is_cpool_transformed); }	///< Mark cpool record was visited
+  void opMarkCpoolTransformed(PcodeOp *op) { op->setAdditionalFlag(PcodeOp::is_cpool_transformed); }	///< Mark cpool record was visited
  void opMarkCalculatedBool(PcodeOp *op) { op->setFlag(PcodeOp::calculated_bool); }	///< Mark PcodeOp as having boolean output
  void opMarkSpacebasePtr(PcodeOp *op) { op->setFlag(PcodeOp::spacebase_ptr); }	///< Mark PcodeOp as LOAD/STORE from spacebase ptr
  void opClearSpacebasePtr(PcodeOp *op) { op->clearFlag(PcodeOp::spacebase_ptr); }	///< Unmark PcodeOp as using spacebase ptr
@@ -508,7 +509,9 @@ public:
  void switchEdge(FlowBlock *inblock,BlockBasic *outbefore,FlowBlock *outafter);
  void spliceBlockBasic(BlockBasic *bl);	///< Merge the given basic block with the block it flows into
  void installSwitchDefaults(void);		///< Make sure default switch cases are properly labeled
-  static bool replaceLessequal(Funcdata &data,PcodeOp *op);	///< Replace INT_LESSEQUAL and INT_SLESSEQUAL expressions
+  bool replaceLessequal(PcodeOp *op);		///< Replace INT_LESSEQUAL and INT_SLESSEQUAL expressions
+  bool distributeIntMultAdd(PcodeOp *op);	///< Distribute constant coefficient to additive input
+  bool collapseIntMultMult(Varnode *vn);	///< Collapse constant coefficients for two chained CPUI_INT_MULT
  static bool compareCallspecs(const FuncCallSpecs *a,const FuncCallSpecs *b);

 #ifdef OPACTION_DEBUG
@@ -793,7 +793,7 @@ void Funcdata::nodeSplitCloneVarnode(PcodeOp *op,PcodeOp *newop)
  uint4 vflags = opvn->getFlags();
  vflags &= (Varnode::externref | Varnode::volatil | Varnode::incidental_copy |
 	     Varnode::readonly | Varnode::persist |
-	     Varnode::addrtied | Varnode::addrforce | Varnode::auto_live);
+	     Varnode::addrtied | Varnode::addrforce);
  newvn->setFlags(vflags);
 }

@@ -700,12 +700,10 @@ void Funcdata::markIndirectCreation(PcodeOp *indop,bool possibleOutput)
 /// \brief Generate raw p-code for the function
 ///
 /// Follow flow from the entry point generating PcodeOps for each instruction encountered.
-/// The caller can provide a bounding range that constrains where control can flow to
-/// and can also provide a maximum number of instructions that will be followed.
+/// The caller can provide a bounding range that constrains where control can flow to.
 /// \param baddr is the beginning of the constraining range
 /// \param eaddr is the end of the constraining range
-/// \param insn_max is the maximum number of instructions to follow
-void Funcdata::followFlow(const Address &baddr,const Address &eaddr,uint4 insn_max)
+void Funcdata::followFlow(const Address &baddr,const Address &eaddr)

 {
  if (!obank.empty()) {
@@ -719,8 +717,7 @@ void Funcdata::followFlow(const Address &baddr,const Address &eaddr,uint4 insn_m
  FlowInfo flow(*this,obank,bblocks,qlst);
  flow.setRange(baddr,eaddr);
  flow.setFlags(fl);
-  if (insn_max != 0)
-    flow.setMaximumInstructions(insn_max);
+  flow.setMaximumInstructions(glb->max_instructions);
  flow.generateOps();
  size = flow.getSize();
  // Cannot keep track of function sizes in general because of non-contiguous functions
@@ -972,10 +969,9 @@ void Funcdata::overrideFlow(const Address &addr,uint4 type)
 ///   - `c <= x`   with  `c-1 < x`   OR
 ///   - `x <= c`   with  `x < c+1`
 ///
-/// \param data is the function being analyzed
 /// \param op is comparison PcodeOp
 /// \return true if a valid replacement was performed
-bool Funcdata::replaceLessequal(Funcdata &data,PcodeOp *op)
+bool Funcdata::replaceLessequal(PcodeOp *op)

 {
  Varnode *vn;
@@ -998,17 +994,105 @@ bool Funcdata::replaceLessequal(Funcdata &data,PcodeOp *op)
  if (op->code() == CPUI_INT_SLESSEQUAL) {
    if ((val<0)&&(val+diff>0)) return false; // Check for sign overflow
    if ((val>0)&&(val+diff<0)) return false;
-    data.opSetOpcode(op,CPUI_INT_SLESS);
+    opSetOpcode(op,CPUI_INT_SLESS);
  }
  else {			// Check for unsigned overflow
    if ((diff==-1)&&(val==0)) return false;
    if ((diff==1)&&(val==-1)) return false;
-    data.opSetOpcode(op,CPUI_INT_LESS);
+    opSetOpcode(op,CPUI_INT_LESS);
  }
  uintb res = (val+diff) & calc_mask(vn->getSize());
-  Varnode *newvn = data.newConstant(vn->getSize(),res);
+  Varnode *newvn = newConstant(vn->getSize(),res);
  newvn->copySymbol(vn);	// Preserve data-type (and any Symbol info)
-  data.opSetInput(op,newvn,i);
+  opSetInput(op,newvn,i);
+  return true;
+}
+
+/// If a term has a multiplicative coefficient, but the underlying term is still additive,
+/// in some situations we may need to distribute the coefficient before simplifying further.
+/// The given PcodeOp is a INT_MULT where the second input is a constant. We also
+/// know the first input is formed with INT_ADD. Distribute the coefficient to the INT_ADD inputs.
+/// \param op is the given PcodeOp
+/// \return \b true if the action was performed
+bool Funcdata::distributeIntMultAdd(PcodeOp *op)
+
+{
+  Varnode *newvn0,*newvn1;
+  PcodeOp *addop = op->getIn(0)->getDef();
+  Varnode *vn0 = addop->getIn(0);
+  Varnode *vn1 = addop->getIn(1);
+  if ((vn0->isFree())&&(!vn0->isConstant())) return false;
+  if ((vn1->isFree())&&(!vn1->isConstant())) return false;
+  uintb coeff = op->getIn(1)->getOffset();
+  int4 size = op->getOut()->getSize();
+				// Do distribution
+  if (vn0->isConstant()) {
+    uintb val = coeff * vn0->getOffset();
+    val &= calc_mask(size);
+    newvn0 = newConstant(size,val);
+  }
+  else {
+    PcodeOp *newop0 = newOp(2,op->getAddr());
+    opSetOpcode(newop0,CPUI_INT_MULT);
+    newvn0 = newUniqueOut(size,newop0);
+    opSetInput(newop0, vn0, 0); // To first input of original add
+    Varnode *newcvn = newConstant(size,coeff);
+    opSetInput(newop0, newcvn, 1);
+    opInsertBefore(newop0, op);
+  }
+
+  if (vn1->isConstant()) {
+    uintb val = coeff * vn1->getOffset();
+    val &= calc_mask(size);
+    newvn1 = newConstant(size,val);
+  }
+  else {
+    PcodeOp *newop1 = newOp(2,op->getAddr());
+    opSetOpcode(newop1,CPUI_INT_MULT);
+    newvn1 = newUniqueOut(size,newop1);
+    opSetInput(newop1, vn1, 0); // To second input of original add
+    Varnode *newcvn = newConstant(size,coeff);
+    opSetInput(newop1, newcvn, 1);
+    opInsertBefore(newop1, op);
+  }
+
+  opSetInput( op, newvn0, 0); // new ADD's inputs are outputs of new MULTs
+  opSetInput( op, newvn1, 1);
+  opSetOpcode(op, CPUI_INT_ADD);
+
+  return true;
+}
+
+/// If:
+///   - The given Varnode is defined by a CPUI_INT_MULT.
+///   - The second input to the INT_MULT is a constant.
+///   - The first input is defined by another CPUI_INT_MULT,
+///   - This multiply is also by a constant.
+///
+/// The constants are combined and \b true is returned.
+/// Otherwise no change is made and \b false is returned.
+/// \param vn is the given Varnode
+/// \return \b true if a change was made
+bool Funcdata::collapseIntMultMult(Varnode *vn)
+
+{
+  if (!vn->isWritten()) return false;
+  PcodeOp *op = vn->getDef();
+  if (op->code() != CPUI_INT_MULT) return false;
+  Varnode *constVnFirst = op->getIn(1);
+  if (!constVnFirst->isConstant()) return false;
+  if (!op->getIn(0)->isWritten()) return false;
+  PcodeOp *otherMultOp = op->getIn(0)->getDef();
+  if (otherMultOp->code() != CPUI_INT_MULT) return false;
+  Varnode *constVnSecond = otherMultOp->getIn(1);
+  if (!constVnSecond->isConstant()) return false;
+  Varnode *invn = otherMultOp->getIn(0);
+  if (invn->isFree()) return false;
+  int4 size = invn->getSize();
+  uintb val = (constVnFirst->getOffset() * constVnSecond->getOffset()) & calc_mask(size);
+  Varnode *newvn = newConstant(size,val);
+  opSetInput(op,newvn,1);
+  opSetInput(op,invn,0);
  return true;
 }

@@ -1109,7 +1193,7 @@ void opFlipInPlaceExecute(Funcdata &data,vector<PcodeOp *> &fliplist)
 	data.opSwapInput(op,0,1);

 	if ((opc == CPUI_INT_LESSEQUAL)||(opc == CPUI_INT_SLESSEQUAL))
-	  Funcdata::replaceLessequal(data,op);
+	  data.replaceLessequal(op);
      }
    }
  }
@@ -85,6 +85,9 @@ Varnode *Funcdata::newUnique(int4 s,Datatype *ct)
    ct = glb->types->getBase(s,TYPE_UNKNOWN);
  Varnode *vn = vbank.createUnique(s,ct);
  assignHigh(vn);
+  if (s >= minLanedSize)
+    checkForLanedRegister(s, vn->getAddr());
+
 				// No chance of matching localmap
  return vn;
 }
@@ -104,6 +107,8 @@ Varnode *Funcdata::newVarnodeOut(int4 s,const Address &m,PcodeOp *op)
  op->setOutput(vn);
  assignHigh(vn);

+  if (s >= minLanedSize)
+    checkForLanedRegister(s,m);
  uint4 vflags = 0;
  SymbolEntry *entry = localmap->queryProperties(m,s,op->getAddr(),vflags);
  if (entry != (SymbolEntry *)0)
@@ -126,6 +131,8 @@ Varnode *Funcdata::newUniqueOut(int4 s,PcodeOp *op)
  Varnode *vn = vbank.createDefUnique(s,ct,op);
  op->setOutput(vn);
  assignHigh(vn);
+  if (s >= minLanedSize)
+    checkForLanedRegister(s, vn->getAddr());
  // No chance of matching localmap
  return vn;
 }
@@ -147,6 +154,8 @@ Varnode *Funcdata::newVarnode(int4 s,const Address &m,Datatype *ct)
  vn = vbank.create(s,m,ct);
  assignHigh(vn);

+  if (s >= minLanedSize)
+    checkForLanedRegister(s,m);
  uint4 vflags=0;
  SymbolEntry *entry = localmap->queryProperties(vn->getAddr(),vn->getSize(),Address(),vflags);
  if (entry != (SymbolEntry *)0)	// Let entry try to force type
@@ -248,7 +257,7 @@ Varnode *Funcdata::cloneVarnode(const Varnode *vn)
  // These are the flags we allow to be cloned
  vflags &= (Varnode::annotation | Varnode::externref |
 	     Varnode::readonly | Varnode::persist |
-	     Varnode::addrtied | Varnode::addrforce | Varnode::auto_live |
+	     Varnode::addrtied | Varnode::addrforce |
 	     Varnode::indirect_creation | Varnode::incidental_copy |
 	     Varnode::volatil | Varnode::mapped);
  newvn->setFlags(vflags);
@@ -280,19 +289,21 @@ void Funcdata::destroyVarnode(Varnode *vn)
  vbank.destroy(vn);
 }

-/// Record the given Varnode as a potential laned register access.
-/// The address and size of the Varnode is recorded, anticipating that new
-/// Varnodes at the same storage location may be created
-/// \param vn is the given Varnode to mark
-/// \param lanedReg is the laned register record to associate with the Varnode
-void Funcdata::markLanedVarnode(Varnode *vn,const LanedRegister *lanedReg)
+/// Check if the given storage range is a potential laned register.
+/// If so, record the storage with the matching laned register record.
+/// \param s is the size of the storage range in bytes
+/// \param addr is the starting address of the storage range
+void Funcdata::checkForLanedRegister(int4 size,const Address &addr)

 {
+  const LanedRegister *lanedRegister  = glb->getLanedRegister(addr,size);
+  if (lanedRegister == (const LanedRegister *)0)
+    return;
  VarnodeData storage;
-  storage.space = vn->getSpace();
-  storage.offset = vn->getOffset();
-  storage.size = vn->getSize();
-  lanedMap[storage] = lanedReg;
+  storage.space = addr.getSpace();
+  storage.offset = addr.getOffset();
+  storage.size = size;
+  lanedMap[storage] = lanedRegister;
 }

 /// Look up the Symbol visible in \b this function's Scope and return the HighVariable
@@ -492,7 +503,7 @@ void Funcdata::transferVarnodeProperties(Varnode *vn,Varnode *newVn,int4 lsbOffs
 {
  uintb newConsume = (vn->getConsume() >> 8*lsbOffset) & calc_mask(newVn->getSize());

-  uint4 vnFlags = vn->getFlags() & (Varnode::directwrite|Varnode::addrforce|Varnode::auto_live);
+  uint4 vnFlags = vn->getFlags() & (Varnode::directwrite|Varnode::addrforce);

  newVn->setFlags(vnFlags);	// Preserve addrforce setting
  newVn->setConsume(newConsume);
@@ -794,7 +805,7 @@ void Funcdata::calcNZMask(void)

 /// \brief Update Varnode properties based on (new) Symbol information
 ///
-/// Boolean properties \b addrtied, \b addrforce, \b auto_live, and \b nolocalalias
+/// Boolean properties \b addrtied, \b addrforce, and \b nolocalalias
 /// for Varnodes are updated based on new Symbol information they map to.
 /// The caller can elect to update data-type information as well, where Varnodes
 /// and their associated HighVariables have their data-type finalized based symbols.
@@ -863,7 +874,7 @@ bool Funcdata::syncVarnodesWithSymbols(const ScopeLocal *lm,bool typesyes)
 /// to point to the first Varnode after the affected set.
 ///
 /// The only properties that can be effectively changed with this
-/// routine are \b mapped, \b addrtied, \b addrforce, \b auto_live, and \b nolocalalias.
+/// routine are \b mapped, \b addrtied, \b addrforce, and \b nolocalalias.
 /// HighVariable splits must occur if \b addrtied is cleared.
 ///
 /// If the given data-type is non-null, an attempt is made to update all the Varnodes
@@ -884,13 +895,13 @@ bool Funcdata::syncVarnodesWithSymbol(VarnodeLocSet::const_iterator &iter,uint4
 				// We take special care with the addrtied flag
 				// as we cannot set it here if it is clear
 				// We can CLEAR but not SET the addrtied flag
-				// If addrtied is cleared, so should addrforce and auto_live
+				// If addrtied is cleared, so should addrforce
  if ((flags&Varnode::addrtied)==0) // Is the addrtied flags cleared
-    mask |= Varnode::addrtied | Varnode::addrforce | Varnode::auto_live;
+    mask |= Varnode::addrtied | Varnode::addrforce;
  // We can set the nolocalalias flag, but not clear it
  // If nolocalalias is set, then addrforce should be cleared
  if ((flags&Varnode::nolocalalias)!=0)
-    mask |= Varnode::nolocalalias | Varnode::addrforce | Varnode::auto_live;
+    mask |= Varnode::nolocalalias | Varnode::addrforce;
  flags &= mask;

  vn = *iter;
@@ -19,6 +19,7 @@
 #include "ghidra_translate.hh"
 #include "typegrp_ghidra.hh"
 #include "comment_ghidra.hh"
+#include "string_ghidra.hh"
 #include "cpool_ghidra.hh"
 #include "inject_ghidra.hh"

@@ -346,6 +347,12 @@ void ArchitectureGhidra::buildCommentDB(DocumentStorage &store)
  commentdb = new CommentDatabaseGhidra(this);
 }

+void ArchitectureGhidra::buildStringManager(DocumentStorage &store)
+
+{
+  stringManager = new GhidraStringManager(this,2048);
+}
+
 void ArchitectureGhidra::buildConstantPool(DocumentStorage &store)

 {
@@ -615,6 +622,49 @@ void ArchitectureGhidra::getBytes(uint1 *buf,int4 size,const Address &inaddr)
  readResponseEnd(sin);
 }

+void ArchitectureGhidra::getStringData(vector<uint1> &buffer,const Address &addr,Datatype *ct,int4 maxBytes,bool &isTrunc)
+
+{
+  sout.write("\000\000\001\004",4);
+  writeStringStream(sout,"getString");
+  sout.write("\000\000\001\016",4); // Beginning of string header
+  addr.saveXml(sout,maxBytes);
+  sout.write("\000\000\001\017",4);
+  writeStringStream(sout,ct->getName());
+  sout.write("\000\000\001\016",4); // Beginning of string header
+  sout << dec << (int8)ct->getId();	// Pass as a signed integer
+  sout.write("\000\000\001\017",4);
+
+  sout.write("\000\000\001\005",4);
+  sout.flush();
+
+  readToResponse(sin);
+  int4 type = readToAnyBurst(sin);
+  if (type == 12) {
+    int4 c = sin.get();
+    uint4 size = (c-0x20);
+    c = sin.get();
+    size ^= ((c-0x20)<<6);
+    isTrunc = (sin.get() != 0);
+    buffer.reserve(size);
+    uint1 *dblbuf = new uint1[size * 2];
+    sin.read((char *)dblbuf,size*2);
+    for (int4 i=0; i < size; i++) {
+      buffer.push_back(((dblbuf[i*2]-'A') << 4) | (dblbuf[i*2 + 1]-'A'));
+    }
+    delete [] dblbuf;
+    type = readToAnyBurst(sin);
+    if (type != 13)
+      throw JavaError("alignment","Expecting byte alignment end");
+    type = readToAnyBurst(sin);
+  }
+  if ((type&1)==1) {
+    // Leave the buffer empty
+  }
+  else
+    throw JavaError("alignment","Expecting end of query response");
+}
+
 /// \brief Retrieve p-code to inject for a specific context
 ///
 /// The particular injection is named and is of one of the types:
@@ -74,6 +74,7 @@ class ArchitectureGhidra : public Architecture {
  virtual PcodeInjectLibrary *buildPcodeInjectLibrary(void);
  virtual void buildTypegrp(DocumentStorage &store);
  virtual void buildCommentDB(DocumentStorage &store);
+  virtual void buildStringManager(DocumentStorage &store);
  virtual void buildConstantPool(DocumentStorage &store);
  virtual void buildContext(DocumentStorage &store);
  virtual void buildSpecFile(DocumentStorage &store);
@@ -124,6 +125,7 @@ public:

  bool getSendParamMeasures(void) const { return sendParamMeasures; }	///< Get the current setting for emitting parameter info

+  virtual void getStringData(vector<uint1> &buffer,const Address &addr,Datatype *ct,int4 maxBytes,bool &isTrunc);
  virtual void printMessage(const string &message) const;

  static void segvHandler(int4 sig);				///< Handler for a segment violation (SIGSEGV) signal
@@ -17,44 +17,47 @@
 #include "flow.hh"
 #include "blockaction.hh"

-#ifdef OPACTION_DEBUG
+#ifdef __REMOTE_SOCKET__

 #include "ifacedecomp.hh"

 static IfaceStatus *ghidra_dcp = (IfaceStatus *)0;
+static RemoteSocket *remote = (RemoteSocket *)0;

-void turn_on_debugging(Funcdata *fd)
+/// \brief Establish a debug console for decompilation of the given function
+///
+/// Attempt to connect to a UNIX domain socket and direct the i/o streams to
+/// the decompiler console interface.  The socket must have been previously established
+/// by another process.
+/// From the command-line,  `nc -l -U /tmp/ghidrasocket` for example.
+void connect_to_console(Funcdata *fd)

 {
-  if (ghidra_dcp == (IfaceStatus *)0) {
-    ghidra_dcp = new IfaceStatus("[ghidradbg]> ",cin,cout);
-    ghidra_dcp->optr = (ostream *)0;
-    ghidra_dcp->fileoptr = (ostream *)0;
-    IfaceCapability::registerAllCommands(ghidra_dcp);
+  if (remote == (RemoteSocket *)0) {
+    remote = new RemoteSocket();
+    if (remote->open("/tmp/ghidrasocket")) {
+      ghidra_dcp = new IfaceStatus("[ghidradbg]> ",*remote->getInputStream(),*remote->getOutputStream());
+      IfaceCapability::registerAllCommands(ghidra_dcp);
+    }
  }
-  // Check if debug script exists
-  ifstream is("ghidracom.txt");
-  if (!is) return;
-  is.close();
-  
+  if (!remote->isSocketOpen())
+    return;
+
  IfaceDecompData *decomp_data = (IfaceDecompData *)ghidra_dcp->getData("decompile");
  decomp_data->fd = fd;
  decomp_data->conf = fd->getArch();
-  ghidra_dcp->pushScript("ghidracom.txt","ghidradbg> ");
-  ghidra_dcp->optr = new ofstream("ghidrares.txt");
-  ghidra_dcp->fileoptr = ghidra_dcp->optr;
-  decomp_data->conf->setDebugStream(ghidra_dcp->optr);
+  ostream *oldPrintStream = decomp_data->conf->print->getOutputStream();
+  bool emitXml = decomp_data->conf->print->emitsXml();
+  decomp_data->conf->setDebugStream(remote->getOutputStream());
+  decomp_data->conf->print->setOutputStream(remote->getOutputStream());
+  decomp_data->conf->print->setXML(false);
+  ghidra_dcp->reset();
  mainloop(ghidra_dcp);
-  ghidra_dcp->popScript();
-}
-
-void turn_off_debugging(Funcdata *fd)
-
-{
-  if (ghidra_dcp->optr != (ostream *)0) {
-    delete ghidra_dcp->optr;
-    ghidra_dcp->optr = (ostream *)0;
-  }
+  decomp_data->conf->clearAnalysis(fd);
+  decomp_data->conf->print->setOutputStream(oldPrintStream);
+  decomp_data->conf->print->setXML(emitXml);
+  fd->debugDisable();
+  decomp_data->conf->allacts.getCurrent()->clearBreakPoints();
 }

 #endif
@@ -213,9 +216,13 @@ void DeregisterProgram::loadParameters(void)
 void DeregisterProgram::rawAction(void)

 {
-#ifdef OPACTION_DEBUG
+#ifdef __REMOTE_SOCKET__
    if (ghidra_dcp != (IfaceStatus *)0)
      delete ghidra_dcp;
+    if (remote != (RemoteSocket *)0)
+      delete remote;
+    ghidra_dcp = (IfaceStatus *)0;
+    remote = (RemoteSocket *)0;
 #endif
  if (ghidra != (ArchitectureGhidra *)0) {
    res = 1;
@@ -245,6 +252,7 @@ void FlushNative::rawAction(void)
  ghidra->symboltab->deleteSubScopes(globscope); // Flush cached function and globals database
  ghidra->types->clearNoncore(); // Reset type information
  ghidra->commentdb->clear();	// Clear any comments
+  ghidra->stringManager->clear();	// Clear string decodings
  ghidra->cpool->clear();
  res = 0;
 }
@@ -283,14 +291,11 @@ void DecompileAt::rawAction(void)
    throw LowlevelError(s.str());
  }
  if (!fd->isProcStarted()) {
-#ifdef OPACTION_DEBUG
-    turn_on_debugging(fd);
+#ifdef __REMOTE_SOCKET__
+    connect_to_console(fd);
 #endif
    ghidra->allacts.getCurrent()->reset( *fd );
    ghidra->allacts.getCurrent()->perform( *fd );
-#ifdef OPACTION_DEBUG
-    turn_off_debugging(fd);
-#endif
  }

  sout.write("\000\000\001\016",4);
@@ -433,6 +438,7 @@ void SetOptions::rawAction(void)
 {
  res = false;

+  ghidra->resetDefaults();
  ghidra->options->restoreXml(doc->getRoot());
  delete doc;
  doc = (Document *)0;
@@ -230,9 +230,8 @@ public:
  virtual void rawAction(void);
 };

-#ifdef OPACTION_DEBUG
-extern void turn_on_debugging(Funcdata *fd);
-extern void turn_off_debugging(Funcdata *fd);
+#ifdef __REMOTE_SOCKET__
+extern void connect_to_console(Funcdata *fd);
 #endif

 #endif
@@ -654,7 +654,7 @@ Datatype *PointerModifier::modType(Datatype *base,const TypeDeclarator *decl,Arc
 {
  int4 addrsize = glb->getDefaultDataSpace()->getAddrSize();
  Datatype *restype;
-  restype = glb->types->getTypePointerAbsolute(addrsize,base,glb->getDefaultDataSpace()->getWordSize());
+  restype = glb->types->getTypePointer(addrsize,base,glb->getDefaultDataSpace()->getWordSize());
  return restype;
 }

@@ -1037,7 +1037,7 @@ Datatype *CParse::newStruct(const string &ident,vector<TypeDeclarator *> *declis
    sublist.back().offset = -1;	// Let typegrp figure out offset
  }

-  if (!glb->types->setFields(sublist,res,-1)) {
+  if (!glb->types->setFields(sublist,res,-1,0)) {
    setError("Bad structure definition");
    glb->types->destroyType(res);
    return (Datatype *)0;
@@ -800,10 +800,15 @@ bool Heritage::protectFreeStores(AddrSpace *spc,vector<PcodeOp *> &freeStores)
    ++iter;
    if (op->isDead()) continue;
    Varnode *vn = op->getIn(1);
-    if (vn->isWritten()) {
-      PcodeOp *copyOp = vn->getDef();
-      if (copyOp->code() == CPUI_COPY)
-	vn = copyOp->getIn(0);
+    while (vn->isWritten()) {
+      PcodeOp *defOp = vn->getDef();
+      OpCode opc = defOp->code();
+      if (opc == CPUI_COPY)
+	vn = defOp->getIn(0);
+      else if (opc == CPUI_INT_ADD && defOp->getIn(1)->isConstant())
+	vn = defOp->getIn(0);
+      else
+	break;
    }
    if (vn->isFree() && vn->getSpace() == spc) {
      fd->opMarkSpacebasePtr(op);	// Mark op as spacebase STORE, even though we're not sure
@@ -913,8 +918,17 @@ bool Heritage::discoverIndexedStackPointers(AddrSpace *spc,vector<PcodeOp *> &fr
 	}
 	case CPUI_STORE:
 	{
-	  if (curNode.traversals != 0) {
-	    generateStoreGuard(curNode, op, spc);
+	  if (op->getIn(1) == curNode.vn) {	// Make sure the STORE pointer comes from our path
+	    if (curNode.traversals != 0) {
+	      generateStoreGuard(curNode, op, spc);
+	    }
+	    else {
+	      // If there were no traversals (of non-constant ADD or MULTIEQUAL) then the
+	      // pointer is equal to the stackpointer plus a constant (through an indirect is possible)
+	      // This will likely get resolved in the next heritage pass, but we leave the
+	      // spacebaseptr mark on, so that that the indirects don't get removed
+	      fd->opMarkSpacebasePtr(op);
+	    }
 	  }
 	  break;
 	}
@@ -2340,7 +2354,7 @@ const LoadGuard *Heritage::getStoreGuard(PcodeOp *op) const

 /// \brief Get the number times heritage was performed for the given address space
 ///
-/// A negative number indicates the number of passes to be wait before the first
+/// A negative number indicates the number of passes to wait before the first
 /// heritage will occur.
 /// \param spc is the given address space
 /// \return the number of heritage passes performed
@@ -2350,7 +2364,7 @@ int4 Heritage::numHeritagePasses(AddrSpace *spc) const
  const HeritageInfo *info = getInfo(spc);
  if (!info->isHeritaged())
    throw LowlevelError("Trying to calculate passes for non-heritaged space");
-  return (info->delay - pass);
+  return (pass - info->delay);
 }

 /// Record that Varnodes have been removed from the given space so that we can
@@ -219,7 +219,7 @@ class Heritage {
  /// \brief Get the heritage status for the given address space
  HeritageInfo *getInfo(AddrSpace *spc) { return &(infolist[spc->getIndex()]); }

-  /// \brief Get the heriage status for the given address space
+  /// \brief Get the heritage status for the given address space
  const HeritageInfo *getInfo(AddrSpace *spc) const { return &(infolist[spc->getIndex()]); }

  void splitJoinLevel(vector<Varnode *> &lastcombo,vector<Varnode *> &nextlev,JoinRecord *joinrec);
@@ -267,6 +267,8 @@ class Heritage {
 public:
  Heritage(Funcdata *data);	///< Constructor

+  int4 getPass(void) const { return pass; }	///< Get overall count of heritage passes
+
  /// \brief Get the pass number when the given address was heritaged
  ///
  /// \param addr is the given address
@@ -127,6 +127,7 @@ void IfaceDecompCapability::registerCommands(IfaceStatus *status)
  status->registerCom(new IfcCallFixup(),"fixup","call");
  status->registerCom(new IfcCallOtherFixup(),"fixup","callother");
  status->registerCom(new IfcVolatile(),"volatile");
+  status->registerCom(new IfcReadonly(),"readonly");
  status->registerCom(new IfcPreferSplit(),"prefersplit");
  status->registerCom(new IfcStructureBlocks(),"structure","blocks");
  status->registerCom(new IfcAnalyzeRange(), "analyze","range");
@@ -224,6 +225,14 @@ IfaceDecompData::~IfaceDecompData(void)
 // fd will get deleted with Database
 }

+void IfaceDecompData::allocateCallGraph(void)
+
+{
+  if (cgraph != (CallGraph *)0)
+    delete cgraph;
+  cgraph = new CallGraph(conf);
+}
+
 void IfaceDecompData::abortFunction(ostream &s)

 {				// Clear references to current function
@@ -360,10 +369,10 @@ static void IfcFollowFlow(ostream &s,IfaceDecompData *dcp,const Address &offset,
    if (size==0) {
      Address baddr(dcp->fd->getAddress().getSpace(),0);
      Address eaddr(dcp->fd->getAddress().getSpace(),dcp->fd->getAddress().getSpace()->getHighest());
-      dcp->fd->followFlow(baddr,eaddr,0);
+      dcp->fd->followFlow(baddr,eaddr);
    }
    else
-      dcp->fd->followFlow(offset,offset+size,0);
+      dcp->fd->followFlow(offset,offset+size);
    s << "Function " << dcp->fd->getName() << ": ";
    dcp->fd->getAddress().printRaw(s);
    s << endl;
@@ -2095,10 +2104,7 @@ void IfcDuplicateHash::iterationCallback(Funcdata *fd)
 void IfcCallGraphBuild::execute(istream &s)

 { // Build call graph from existing function starts
-  if (dcp->cgraph != (CallGraph *)0)
-    delete dcp->cgraph;
-
-  dcp->cgraph = new CallGraph(dcp->conf);
+  dcp->allocateCallGraph();

  dcp->cgraph->buildAllNodes();		// Build a node in the graph for existing symbols
  quick = false;
@@ -2145,11 +2151,7 @@ void IfcCallGraphBuild::iterationCallback(Funcdata *fd)
 void IfcCallGraphBuildQuick::execute(istream &s)

 { // Build call graph from existing function starts, do only disassembly
-  if (dcp->cgraph != (CallGraph *)0)
-    delete dcp->cgraph;
-
-  dcp->cgraph = new CallGraph(dcp->conf);
-
+  dcp->allocateCallGraph();
  dcp->cgraph->buildAllNodes();	// Build a node in the graph for existing symbols
  quick = true;
  iterateFunctionsAddrOrder();
@@ -2198,7 +2200,7 @@ void IfcCallGraphLoad::execute(istream &s)
  DocumentStorage store;
  Document *doc = store.parseDocument(is);

-  dcp->cgraph = new CallGraph(dcp->conf);
+  dcp->allocateCallGraph();
  dcp->cgraph->restoreXml(doc->getRoot());
  *status->optr << "Successfully read in callgraph" << endl;

@@ -2304,6 +2306,22 @@ void IfcVolatile::execute(istream &s)
  *status->optr << "Successfully marked range as volatile" << endl;
 }

+void IfcReadonly::execute(istream &s)
+
+{
+  int4 size = 0;
+  if (dcp->conf == (Architecture *)0)
+    throw IfaceExecutionError("No load image present");
+  Address addr = parse_machaddr(s,size,*dcp->conf->types); // Read required address
+
+  if (size == 0)
+    throw IfaceExecutionError("Must specify a size");
+  Range range( addr.getSpace(), addr.getOffset(), addr.getOffset() + (size-1));
+  dcp->conf->symboltab->setPropertyRange(Varnode::readonly,range);
+
+  *status->optr << "Successfully marked range as readonly" << endl;
+}
+
 void IfcPreferSplit::execute(istream &s)

 { // Mark a particular storage location as something we would prefer to split
@@ -2727,6 +2745,7 @@ void mainloop(IfaceStatus *status) {
  for(;;) {
    while(!status->isStreamFinished()) {
      status->writePrompt();
+      status->optr->flush();
      execute(status,dcp);
    }
    if (status->done) break;
@@ -54,6 +54,7 @@ public:
 #endif
  IfaceDecompData(void);
  virtual ~IfaceDecompData(void);
+  void allocateCallGraph(void);
  void abortFunction(ostream &s);
  void clearArchitecture(void);
 };
@@ -531,6 +532,11 @@ public:
  virtual void execute(istream &s);
 };

+class IfcReadonly : public IfaceDecompCommand {
+public:
+  virtual void execute(istream &s);
+};
+
 class IfcPreferSplit : public IfaceDecompCommand {
 public:
  virtual void execute(istream &s);
@@ -14,6 +14,12 @@
 * limitations under the License.
 */
 #include "interface.hh"
+#ifdef __REMOTE_SOCKET__
+#include "sys/socket.h"
+#include "sys/un.h"
+#include "unistd.h"
+#include "ext/stdio_filebuf.h"
+#endif

 vector<IfaceCapability *> IfaceCapability::thelist;

@@ -30,6 +36,84 @@ void IfaceCapability::registerAllCommands(IfaceStatus *status)
    thelist[i]->registerCommands(status);
 }

+#ifdef __REMOTE_SOCKET__
+
+RemoteSocket::RemoteSocket(void)
+
+{
+  fileDescriptor = 0;
+  inbuf = (basic_filebuf<char> *)0;
+  outbuf = (basic_filebuf<char> *)0;
+  inStream = (istream *)0;
+  outStream = (ostream *)0;
+  isOpen = false;
+}
+
+void RemoteSocket::close(void)
+
+{
+  if (inStream != (istream *)0) {
+    delete inStream;
+    inStream = (istream *)0;
+  }
+  if (outStream != (ostream *)0) {
+    delete outStream;
+    outStream = (ostream *)0;
+  }
+  if (inbuf != (basic_filebuf<char> *)0) {
+    // Destroying the buffer should automatically close the socket
+    delete inbuf;
+    inbuf = (basic_filebuf<char> *)0;
+  }
+  if (outbuf != (basic_filebuf<char> *)0) {
+    delete outbuf;
+    outbuf = (basic_filebuf<char> *)0;
+  }
+  isOpen = false;
+}
+
+bool RemoteSocket::open(const string &filename)
+
+{
+  if (isOpen) return false;
+  if ((fileDescriptor = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
+    throw IfaceError("Could not create socket");
+  struct sockaddr_un addr;
+  addr.sun_family = AF_UNIX;
+  int4 len = filename.length();
+  if (len >= sizeof(addr.sun_path))
+    throw IfaceError("Socket name too long");
+  memcpy(addr.sun_path,filename.c_str(),len);
+  addr.sun_path[len] = '\0';
+  len += sizeof(addr.sun_family);
+  if (connect(fileDescriptor, (struct sockaddr *)&addr, len) < 0) {
+    ::close(fileDescriptor);
+    return false;
+  }
+
+  fdopen(fileDescriptor, "r");
+  inbuf = new __gnu_cxx::stdio_filebuf<char>(fileDescriptor,ios::in);
+  fdopen(fileDescriptor, "w");
+  outbuf = new __gnu_cxx::stdio_filebuf<char>(fileDescriptor,ios::out);
+  inStream = new istream(inbuf);
+  outStream = new ostream(outbuf);
+  isOpen = true;
+  return true;
+}
+
+bool RemoteSocket::isSocketOpen(void)
+
+{
+  if (!isOpen) return false;
+  if (inStream->eof()) {
+    close();
+    return false;
+  }
+  return true;
+}
+
+#endif
+
 IfaceStatus::IfaceStatus(const string &prmpt,istream &is,ostream &os,int4 mxhist)

 {
@@ -75,6 +159,15 @@ void IfaceStatus::popScript(void)
  inerror = false;
 }

+void IfaceStatus::reset(void)
+
+{
+  while(!inputstack.empty())
+    popScript();
+  errorisdone = false;
+  done = false;
+}
+
 void IfaceStatus::saveHistory(const string &line)

 {				// Save line in circular history buffer
@@ -4,9 +4,9 @@
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
- * 
+ *
 *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -44,6 +44,32 @@

 using namespace std;

+#ifdef __REMOTE_SOCKET__
+
+/// \brief A wrapper around a UNIX domain socket
+///
+/// The open() command attempts to connect to given socket name,
+/// which must have been previously established by an external process.
+/// The socket is bound to a C++ istream and ostream.
+class RemoteSocket {
+  int fileDescriptor;		///< Descriptor for the socket
+  basic_filebuf<char> *inbuf;	///< Input buffer associated with the socket
+  basic_filebuf<char> *outbuf;	///< Output buffer for the socket
+  istream *inStream;		///< The C++ input stream
+  ostream *outStream;		///< The C++ output stream
+  bool isOpen;			///< Has the socket been opened
+public:
+  RemoteSocket(void);				///< Constructor
+  ~RemoteSocket(void) { close(); }		///< Destructor
+  bool open(const string &filename);		///< Connect to the given socket
+  bool isSocketOpen(void);			///< Return \b true if the socket is ready to transfer data
+  istream *getInputStream(void) { return inStream; }	///< Get the input stream
+  ostream *getOutputStream(void) { return outStream; }	///< Get the output stream
+  void close(void);				///< Close the streams and socket
+};
+
+#endif
+
 struct IfaceError {
  string explain;		// Explanatory string
  IfaceError(const string &s) { explain = s; }
@@ -136,6 +162,7 @@ public:
  void setErrorIsDone(bool val) { errorisdone = val; }
  void pushScript(const string &filename,const string &newprompt);
  void popScript(void);
+  void reset(void);
  int4 getNumInputStreamSize(void) const { return inputstack.size(); }
  void writePrompt(void) { *optr << prompt; }
  void registerCom(IfaceCommand *fptr, const char *nm1,
@@ -627,6 +627,9 @@ void PcodeOpBank::addToCodeList(PcodeOp *op)
  case CPUI_STORE:
    op->codeiter = storelist.insert(storelist.end(),op);
    break;
+  case CPUI_LOAD:
+    op->codeiter = loadlist.insert(loadlist.end(), op);
+    break;
  case CPUI_RETURN:
    op->codeiter = returnlist.insert(returnlist.end(),op);
    break;
@@ -648,6 +651,9 @@ void PcodeOpBank::removeFromCodeList(PcodeOp *op)
  case CPUI_STORE:
    storelist.erase(op->codeiter);
    break;
+  case CPUI_LOAD:
+    loadlist.erase(op->codeiter);
+    break;
  case CPUI_RETURN:
    returnlist.erase(op->codeiter);
    break;
@@ -663,6 +669,7 @@ void PcodeOpBank::clearCodeLists(void)

 {
  storelist.clear();
+  loadlist.clear();
  returnlist.clear();
  useroplist.clear();
 }
@@ -896,6 +903,8 @@ list<PcodeOp *>::const_iterator PcodeOpBank::begin(OpCode opc) const
  switch(opc) {
  case CPUI_STORE:
    return storelist.begin();
+  case CPUI_LOAD:
+    return loadlist.begin();
  case CPUI_RETURN:
    return returnlist.begin();
  case CPUI_CALLOTHER:
@@ -912,6 +921,8 @@ list<PcodeOp *>::const_iterator PcodeOpBank::end(OpCode opc) const
  switch(opc) {
  case CPUI_STORE:
    return storelist.end();
+  case CPUI_LOAD:
+    return loadlist.end();
  case CPUI_RETURN:
    return returnlist.end();
  case CPUI_CALLOTHER:
@@ -95,7 +95,7 @@ public:
    spacebase_ptr = 0x4000000,	///< Loads or stores from a dynamic pointer into a spacebase
    indirect_creation = 0x8000000,  ///< Output varnode is created by indirect effect
    calculated_bool = 0x10000000, ///< Output has been determined to be a 1-bit boolean value
-    is_cpool_transformed = 0x20000000, ///< Have we checked for cpool transforms
+    has_callspec = 0x20000000,	///< Op has a call specification associated with it
    ptrflow = 0x40000000,	///< Op consumes or produces a ptr
    indirect_store = 0x80000000	///< CPUI_INDIRECT is caused by CPUI_STORE
  };
@@ -107,7 +107,8 @@ public:
    special_print = 0x10,	///< Op is marked for special printing
    modified = 0x20,		///< This op has been modified by the current action
    warning = 0x40,		///< Warning has been generated for this op
-    incidental_copy = 0x80	///< Treat this as \e incidental for parameter recovery algorithms
+    incidental_copy = 0x80,	///< Treat this as \e incidental for parameter recovery algorithms
+    is_cpool_transformed = 0x100 ///< Have we checked for cpool transforms
  };
 private:
  TypeOp *opcode;		///< Pointer to class providing behavioral details of the operation
@@ -164,6 +165,8 @@ public:
  bool isDead(void) const { return ((flags&PcodeOp::dead)!=0); } ///< Return \b true if this op is dead
  bool isAssignment(void) const { return (output!=(Varnode *)0); } ///< Return \b true is this op has an output
  bool isCall(void) const { return ((flags&PcodeOp::call)!=0); } ///< Return \b true if this op indicates call semantics
+  /// \brief Return \b true if this op acts as call but does not have a full specification
+  bool isCallWithoutSpec(void) const { return ((flags&(PcodeOp::call|PcodeOp::has_callspec))==PcodeOp::call); }
  bool isMarker(void) const { return ((flags&PcodeOp::marker)!=0); } ///< Return \b true is a special SSA form op
  bool isIndirectCreation(void) const { return ((flags&PcodeOp::indirect_creation)!=0); } ///< Return \b true if op creates a varnode indirectly
  bool isIndirectStore(void) const { return ((flags&PcodeOp::indirect_store)!=0); }	///< Return \b true if \b this INDIRECT is caused by STORE
@@ -203,7 +206,7 @@ public:
  /// \brief Return \b true if output is 1-bit boolean
  bool isCalculatedBool(void) const { return ((flags&(PcodeOp::calculated_bool|PcodeOp::booloutput))!=0); }
  /// \brief Return \b true if we have already examined this cpool
-  bool isCpoolTransformed(void) const { return ((flags&PcodeOp::is_cpool_transformed)!=0); }
+  bool isCpoolTransformed(void) const { return ((addlflags&PcodeOp::is_cpool_transformed)!=0); }
  bool isCollapsible(void) const; ///< Return \b true if this can be collapsed to a COPY of a constant
  /// \brief Return \b true if this LOADs or STOREs from a dynamic \e spacebase pointer
  bool usesSpacebasePtr(void) const { return ((flags&PcodeOp::spacebase_ptr)!=0); }
@@ -247,6 +250,7 @@ class PcodeOpBank {
  list<PcodeOp *> deadlist;		///< List of \e dead PcodeOps
  list<PcodeOp *> alivelist;		///< List of \e alive PcodeOps
  list<PcodeOp *> storelist;		///< List of STORE PcodeOps
+  list<PcodeOp *> loadlist;		///< list of LOAD PcodeOps
  list<PcodeOp *> returnlist;		///< List of RETURN PcodeOps
  list<PcodeOp *> useroplist;		///< List of user-defined PcodeOps
  list<PcodeOp *> deadandgone;		///< List of retired PcodeOps
@@ -80,6 +80,7 @@ OptionDatabase::OptionDatabase(Architecture *g)
  registerOption(new OptionJumpLoad());
  registerOption(new OptionToggleRule());
  registerOption(new OptionAliasBlock());
+  registerOption(new OptionMaxInstruction());
 }

 OptionDatabase::~OptionDatabase(void)
@@ -816,3 +817,19 @@ string OptionAliasBlock::apply(Architecture *glb,const string &p1,const string &
    return "Alias block level unchanged";
  return "Alias block level set to " + p1;
 }
+
+string OptionMaxInstruction::apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const
+
+{
+  if (p1.size() == 0)
+    throw ParseError("Must specify number of instructions");
+
+  int4 newMax = -1;
+  istringstream s1(p1);
+  s1.unsetf(ios::dec | ios::hex | ios::oct); // Let user specify base
+  s1 >> newMax;
+  if (newMax < 0)
+    throw ParseError("Bad maxinstruction parameter");
+  glb->max_instructions = newMax;
+  return "Maximum instructions per function set";
+}
@@ -264,4 +264,10 @@ public:
  virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
 };

+class OptionMaxInstruction : public ArchOption {
+public:
+  OptionMaxInstruction(void) { name="maxinstruction"; }	///< Constructor
+  virtual string apply(Architecture *glb,const string &p1,const string &p2,const string &p3) const;
+};
+
 #endif
@@ -344,6 +344,12 @@ void EmitXml::spaces(int4 num,int4 bump)
  }
 }

+void EmitXml::resetDefaults(void)
+
+{
+  resetDefaultsInternal();
+}
+
 int4 TokenSplit::countbase = 0;

 /// Emit markup or content corresponding to \b this token on a low-level emitter.
@@ -536,15 +542,15 @@ void TokenSplit::printDebug(ostream &s) const
 }
 #endif

-EmitPrettyPrint::EmitPrettyPrint(int4 mls) 
-  : EmitXml(), scanqueue( 3*mls ), tokqueue( 3*mls )
+EmitPrettyPrint::EmitPrettyPrint(void)
+  : EmitXml(), scanqueue( 3*100 ), tokqueue( 3*100 )

 {
  lowlevel = new EmitNoXml();	// Do not emit xml by default
-  maxlinesize = mls;
  spaceremain = maxlinesize;
  needbreak = false;
  commentmode = false;
+  resetDefaultsPrettyPrint();
 }

 EmitPrettyPrint::~EmitPrettyPrint(void)
@@ -1213,3 +1219,11 @@ void EmitPrettyPrint::setMaxLineSize(int4 val)
  spaceremain = maxlinesize;
  clear();
 }
+
+void EmitPrettyPrint::resetDefaults(void)
+
+{
+  lowlevel->resetDefaults();
+  resetDefaultsInternal();
+  resetDefaultsPrettyPrint();
+}
@@ -80,8 +80,9 @@ protected:
  int4 indentlevel;			///< Current indent level (in fixed width characters)
  int4 parenlevel;			///< Current depth of parentheses
  int4 indentincrement;			///< Change in indentlevel per level of nesting
+  void resetDefaultsInternal(void) { indentincrement = 2; }	///< Set options to default values for EmitXml
 public:
-  EmitXml(void) { s = (ostream *)0; indentlevel=0; parenlevel=0; indentincrement=2; }	///< Constructor
+  EmitXml(void) { s = (ostream *)0; indentlevel=0; parenlevel=0; resetDefaultsInternal(); }	///< Constructor

  /// \brief Possible types of syntax highlighting
  enum syntax_highlight {
@@ -196,6 +197,9 @@ public:
  /// \return \b true if \b this produces an XML markup of its emitted source code
  virtual bool emitsXml(void) const { return true; }

+  /// \brief (Re)set the default emitting options
+  virtual void resetDefaults(void);
+
  /// \brief Get the current parentheses depth
  ///
  /// \return the current number of open parenthetical groups
@@ -649,9 +653,11 @@ template<typename _type>
 void circularqueue<_type>::setMax(int4 sz)

 {
-  delete [] cache;
-  max = sz;
-  cache = new _type [ sz ];
+  if (max != sz) {
+    delete [] cache;
+    max = sz;
+    cache = new _type [ sz ];
+  }
  left = 1;			// This operation empties queue
  right = 0;
 }
@@ -721,8 +727,9 @@ class EmitPrettyPrint : public EmitXml {
  void print(const TokenSplit &tok);	///< Output the given token to the low-level emitter
  void advanceleft(void);	///< Emit tokens that have been fully committed
  void scan(void);		///< Process a new token
+  void resetDefaultsPrettyPrint(void) { setMaxLineSize(100); }
 public:
-  EmitPrettyPrint(int4 mls);	///< Construct with an initial maximum line size
+  EmitPrettyPrint(void);	///< Construct with an initial maximum line size
  virtual ~EmitPrettyPrint(void);
  virtual int4 beginDocument(void);
  virtual void endDocument(int4 id);
@@ -768,6 +775,7 @@ public:
  virtual int4 getMaxLineSize(void) const { return maxlinesize; }
  virtual void setCommentFill(const string &fill) { commentfill = fill; }
  virtual bool emitsXml(void) const { return lowlevel->emitsXml(); }
+  virtual void resetDefaults(void);
  void setXML(bool val);	///< Toggle whether the low-level emitter emits XML markup or not
 };

@@ -94,12 +94,6 @@ PrintLanguage *PrintCCapability::buildLanguage(Architecture *glb)
 PrintC::PrintC(Architecture *g,const string &nm) : PrintLanguage(g,nm)

 {
-  option_NULL = false;
-  option_inplace_ops = false;
-  option_convention = true;
-  option_nocasts = false;
-  option_unplaced = false;
-  option_hide_exts = true;
  nullToken = "NULL";
  
  // Set the flip tokens
@@ -111,7 +105,7 @@ PrintC::PrintC(Architecture *g,const string &nm) : PrintLanguage(g,nm)
  not_equal.negate = &equal;

  castStrategy = new CastStrategyC();
-  setCStyleComments();
+  resetDefaultsPrintC();
 }

 /// Push nested components of a data-type declaration onto a stack, so we can access it bottom up
@@ -651,6 +645,13 @@ void PrintC::opPtradd(const PcodeOp *op)
 {
  bool printval = isSet(print_load_value|print_store_value);
  uint4 m = mods & ~(print_load_value|print_store_value);
+  if (!printval) {
+    TypePointer *tp = (TypePointer *)op->getIn(0)->getHigh()->getType();
+    if (tp->getMetatype() == TYPE_PTR) {
+      if (tp->getPtrTo()->getMetatype() == TYPE_ARRAY)
+	printval = true;
+    }
+  }
  if (printval)			// Use array notation if we need value
    pushOp(&subscript,op);
  else				// just a '+'
@@ -1170,7 +1171,7 @@ void PrintC::printUnicode(ostream &s,int4 onechar) const
      s << "\\x" << setfill('0') << setw(8) << hex << onechar;
    return;
  }
-  writeUtf8(s, onechar);		// emit normally
+  StringManager::writeUtf8(s, onechar);		// emit normally
 }

 void PrintC::pushType(const Datatype *ct)
@@ -1210,32 +1211,6 @@ bool PrintC::doEmitWideCharPrefix(void) const
  return true;
 }

-/// \brief Check if the byte buffer has a (unicode) string terminator
-///
-/// \param buffer is the byte buffer
-/// \param size is the number of bytes in the buffer
-/// \param charsize is the presumed size (in bytes) of character elements
-/// \return \b true if a string terminator is found
-bool PrintC::hasCharTerminator(uint1 *buffer,int4 size,int4 charsize)
-
-{
-  for(int4 i=0;i<size;i+=charsize) {
-    bool isTerminator = true;
-    for(int4 j=0;j<charsize;++j) {
-      if (buffer[i+j] != 0) {	// Non-zero bytes means character can't be a null terminator
-	isTerminator = false;
-	break;
-      }
-    }
-    if (isTerminator) return true;
-  }
-  return false;
-}
-
-#define STR_LITERAL_BUFFER_MAXSIZE 2048
-#define STR_LITERAL_BUFFER_INCREMENT 32
-
-
 /// \brief Print a quoted (unicode) string at the given address.
 ///
 /// Data for the string is obtained directly from the LoadImage.  The bytes are checked
@@ -1243,43 +1218,40 @@ bool PrintC::hasCharTerminator(uint1 *buffer,int4 size,int4 charsize)
 /// pass, the string is emitted.
 /// \param s is the output stream to print to
 /// \param addr is the address of the string data within the LoadImage
-/// \param charsize is the number of bytes in an encoded element (i.e. UTF8, UTF16, or UTF32)
+/// \param charType is the underlying character data-type
 /// \return \b true if a proper string was found and printed to the stream
-bool PrintC::printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const
+bool PrintC::printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const

 {
-  uint1 buffer[STR_LITERAL_BUFFER_MAXSIZE+4]; // Additional buffer for get_codepoint skip readahead
-  int4 curBufferSize = 0;
-  bool foundTerminator = false;
-  try {
-    do {
-      uint4 newBufferSize = curBufferSize + STR_LITERAL_BUFFER_INCREMENT;
-      glb->loader->loadFill(buffer+curBufferSize,STR_LITERAL_BUFFER_INCREMENT,addr + curBufferSize);
-      foundTerminator = hasCharTerminator(buffer+curBufferSize,STR_LITERAL_BUFFER_INCREMENT,charsize);
-      curBufferSize = newBufferSize;
-    } while ((curBufferSize < STR_LITERAL_BUFFER_MAXSIZE)&&(!foundTerminator));
-  } catch(DataUnavailError &err) {
+  StringManager *manager = glb->stringManager;
+
+  // Retrieve UTF8 version of string
+  bool isTrunc = false;
+  const vector<uint1> &buffer(manager->getStringData(addr, charType, isTrunc));
+  if (buffer.empty())
    return false;
-  }
-  buffer[curBufferSize] = 0;		// Make sure bytes for final codepoint read are initialized
-  buffer[curBufferSize+1] = 0;
-  buffer[curBufferSize+2] = 0;
-  buffer[curBufferSize+3] = 0;
-  bool bigend = glb->translate->isBigEndian();
-  bool res;
-  if (isCharacterConstant(buffer,curBufferSize,charsize)) {
-    if (doEmitWideCharPrefix() && charsize > 1)
-      s << 'L';			// Print symbol indicating wide character
-    s << '"';
-    if (!escapeCharacterData(s,buffer,curBufferSize,charsize,bigend))
-      s << "...\" /* TRUNCATED STRING LITERAL */";
-    else s << '"';
-     
-    res = true;
-  }
+  if (doEmitWideCharPrefix() && charType->getSize() > 1 && !charType->isOpaqueString())
+    s << 'L';			// Print symbol indicating wide character
+  s << '"';
+  escapeCharacterData(s,buffer.data(),buffer.size(),1,glb->translate->isBigEndian());
+  if (isTrunc)
+    s << "...\" /* TRUNCATED STRING LITERAL */";
  else
-    res = false;
-  return res;
+    s << '"';
+
+  return true;
+}
+
+void PrintC::resetDefaultsPrintC(void)
+
+{
+  option_convention = true;
+  option_hide_exts = true;
+  option_inplace_ops = false;
+  option_nocasts = false;
+  option_NULL = false;
+  option_unplaced = false;
+  setCStyleComments();
 }

 /// \brief Push a single character constant to the RPN stack
@@ -1367,7 +1339,7 @@ bool PrintC::pushPtrCharConstant(uintb val,const TypePointer *ct,const Varnode *

  ostringstream str;
  Datatype *subct = ct->getPtrTo();
-  if (!printCharacterConstant(str,stringaddr,subct->getSize()))
+  if (!printCharacterConstant(str,stringaddr,subct))
    return false;		// Can we get a nice ASCII string

  pushAtom(Atom(str.str(),vartoken,EmitXml::const_color,op,vn));
@@ -1571,7 +1543,7 @@ void PrintC::pushSymbol(const Symbol *sym,const Varnode *vn,const PcodeOp *op)
      SymbolEntry *entry = sym->getFirstWholeMap();
      if (entry != (SymbolEntry *)0) {
 	ostringstream s;
-	if (printCharacterConstant(s,entry->getAddr(),subct->getSize())) {
+	if (printCharacterConstant(s,entry->getAddr(),subct)) {
 	  pushAtom(Atom(s.str(),vartoken,EmitXml::const_color,op,vn));
 	  return;
 	}
@@ -1931,6 +1903,13 @@ void PrintC::emitGotoStatement(const FlowBlock *bl,const FlowBlock *exp_bl,
  emit->endStatement(id);
 }

+void PrintC::resetDefaults(void)
+
+{
+  PrintLanguage::resetDefaults();
+  resetDefaultsPrintC();
+}
+
 void PrintC::adjustTypeOperators(void)

 {
@@ -1950,25 +1929,6 @@ void PrintC::setCommentStyle(const string &nm)
    throw LowlevelError("Unknown comment style. Use \"c\" or \"cplusplus\"");
 }

-bool PrintC::isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const
-
-{
-  // Return true if this looks like a c-string
-  // If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
-  // per character.  For UTF16, the surrogate reserved area gives at least some check.
-  if (buf == (const uint1 *)0) return false;
-  bool bigend = glb->translate->isBigEndian();
-  int4 i=0;
-  int4 skip = charsize;
-  while(i<size) {
-    int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
-    if (codepoint < 0) return false;
-    if (codepoint == 0) break;
-    i += skip;
-  }
-  return true;
-}
-
 /// \brief Emit the definition of the given data-type
 ///
 /// This is currently limited to a 'struct' or 'enum' definitions. The
@@ -157,8 +157,8 @@ protected:
  void opFunc(const PcodeOp *op);			///< Push a \e functional expression based on the given p-code op to the RPN stack
  void opTypeCast(const PcodeOp *op);			///< Push the given p-code op using type-cast syntax to the RPN stack
  void opHiddenFunc(const PcodeOp *op);			///< Push the given p-code op as a hidden token
-  static bool hasCharTerminator(uint1 *buffer,int4 size,int4 charsize);
-  bool printCharacterConstant(ostream &s,const Address &addr,int4 charsize) const;
+  bool printCharacterConstant(ostream &s,const Address &addr,Datatype *charType) const;
+  void resetDefaultsPrintC(void);			///< Set default values for options specific to PrintC
  virtual void pushConstant(uintb val,const Datatype *ct,
 			    const Varnode *vn,const PcodeOp *op);
  virtual bool pushEquate(uintb val,int4 sz,const EquateSymbol *sym,
@@ -200,9 +200,9 @@ public:
  void setDisplayUnplaced(bool val) { option_unplaced = val; }	///< Toggle whether \e unplaced comments are displayed in the header
  void setHideImpliedExts(bool val) { option_hide_exts = val; }	///< Toggle whether implied extensions are hidden
  virtual ~PrintC(void) {}
+  virtual void resetDefaults(void);
  virtual void adjustTypeOperators(void);
  virtual void setCommentStyle(const string &nm);
-  virtual bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const;
  virtual void docTypeDefinitions(const TypeFactory *typegrp);
  virtual void docAllGlobals(void);
  virtual void docSingleGlobal(const Symbol *sym);
@@ -190,7 +190,7 @@ void PrintJava::printUnicode(ostream &s,int4 onechar) const
      s << "\\ux" << setfill('0') << setw(8) << hex << onechar;
    return;
  }
-  writeUtf8(s, onechar);		// Emit normally
+  StringManager::writeUtf8(s, onechar);		// Emit normally
 }

 void PrintJava::opLoad(const PcodeOp *op)
@@ -61,13 +61,10 @@ PrintLanguage::PrintLanguage(Architecture *g,const string &nm)
  castStrategy = (CastStrategy *)0;
  name = nm;
  curscope = (Scope *)0;
-  emit = new EmitPrettyPrint(100);
+  emit = new EmitPrettyPrint();

-  mods = 0;
  pending = 0;
-  line_commentindent = 20;
-  instr_comment_type = Comment::user2 | Comment::warning;
-  head_comment_type = Comment::header | Comment::warningheader;
+  resetDefaultsInternal();
 }

 PrintLanguage::~PrintLanguage(void)
@@ -481,136 +478,6 @@ bool PrintLanguage::unicodeNeedsEscape(int4 codepoint)
  return false;
 }

-/// Encode the given unicode codepoint as UTF8 (1, 2, 3, or 4 bytes) and
-/// write the bytes to the stream.
-/// \param s is the output stream
-/// \param codepoint is the unicode codepoint
-void PrintLanguage::writeUtf8(ostream &s,int4 codepoint)
-
-{
-  uint1 bytes[4];
-  int4 size;
-
-  if (codepoint < 0)
-    throw LowlevelError("Negative unicode codepoint");
-  if (codepoint < 128) {
-    s.put((uint1)codepoint);
-    return;
-  }
-  int4 bits = mostsigbit_set(codepoint) + 1;
-  if (bits > 21)
-    throw LowlevelError("Bad unicode codepoint");
-  if (bits < 12) {	// Encode with two bytes
-    bytes[0] = 0xc0 ^ ((codepoint >> 6)&0x1f);
-    bytes[1] = 0x80 ^ (codepoint & 0x3f);
-    size = 2;
-  }
-  else if (bits < 17) {
-    bytes[0] = 0xe0 ^ ((codepoint >> 12)&0xf);
-    bytes[1] = 0x80 ^ ((codepoint >> 6)&0x3f);
-    bytes[2] = 0x80 ^ (codepoint & 0x3f);
-    size = 3;
-  }
-  else {
-    bytes[0] = 0xf0 ^ ((codepoint >> 18) & 7);
-    bytes[1] = 0x80 ^ ((codepoint >> 12) & 0x3f);
-    bytes[2] = 0x80 ^ ((codepoint >> 6) & 0x3f);
-    bytes[3] = 0x80 ^ (codepoint & 0x3f);
-    size = 4;
-  }
-  s.write((char *)bytes, size);
-}
-
-/// Pull the first two bytes from the byte array and combine them in the indicated endian order
-/// \param buf is the byte array
-/// \param bigend is \b true to request big endian encoding
-/// \return the decoded UTF16 element
-inline int4 PrintLanguage::readUtf16(const uint1 *buf,bool bigend)
-
-{
-  int4 codepoint;
-  if (bigend) {
-    codepoint = buf[0];
-    codepoint <<= 8;
-    codepoint += buf[1];
-  }
-  else {
-    codepoint = buf[1];
-    codepoint <<= 8;
-    codepoint += buf[0];
-  }
-  return codepoint;
-}
-
-/// \brief Extract the next \e unicode \e codepoint from an array of character data
-///
-/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
-/// \param buf is a pointer to the bytes in the character array
-/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
-/// \param bigend is \b true for big endian encoding of the UTF element
-/// \param skip is a reference for passing back the number of bytes consumed
-/// \return the codepoint or -1 if the encoding is invalid
-int4 PrintLanguage::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip)
-
-{
-  int4 codepoint;
-  int4 sk = 0;
-  if (charsize==2) {		// UTF-16
-    codepoint = readUtf16(buf,bigend);
-    sk += 2;
-    if ((codepoint>=0xD800)&&(codepoint<=0xDBFF)) { // high surrogate
-      int4 trail=readUtf16(buf+2,bigend);
-      sk += 2;
-      if ((trail<0xDC00)||(trail>0xDFFF)) return -1; // Bad trail
-      codepoint = (codepoint<<10) + trail + (0x10000 - (0xD800 << 10) - 0xDC00);
-    }
-    else if ((codepoint>=0xDC00)&&(codepoint<=0xDFFF)) return -1; // trail before high
-  }
-  else if (charsize==1) {	// UTF-8
-    int4 val = buf[0];
-    if ((val&0x80)==0) {
-      codepoint = val;
-      sk = 1;
-    }
-    else if ((val&0xe0)==0xc0) {
-      int4 val2 = buf[1];
-      sk = 2;
-      if ((val2&0xc0)!=0x80) return -1; // Not a valid UTF8-encoding
-      codepoint = ((val&0x1f)<<6) | (val2 & 0x3f);
-    }
-    else if ((val&0xf0)==0xe0) {
-      int4 val2 = buf[1];
-      int4 val3 = buf[2];
-      sk = 3;
-      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)) return -1; // invalid encoding
-      codepoint = ((val&0xf)<<12) | ((val2&0x3f)<<6) | (val3 & 0x3f);
-    }
-    else if ((val&0xf8)==0xf0) {
-      int4 val2 = buf[1];
-      int4 val3 = buf[2];
-      int4 val4 = buf[3];
-      sk = 4;
-      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)||((val4&0xc0)!=0x80)) return -1;	// invalid encoding
-      codepoint = ((val&7)<<18) | ((val2&0x3f)<<12) | ((val3&0x3f)<<6) | (val4 & 0x3f);
-    }
-    else
-      return -1;
-  }
-  else if (charsize == 4) {	// UTF-32
-    sk = 4;
-    if (bigend)
-      codepoint = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
-    else
-      codepoint = (buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0];
-  }
-  else
-    return -1;
-  if (codepoint >= 0xd800 && codepoint <= 0xdfff)
-    return -1;		// Reserved for surrogates, invalid codepoints
-  skip = sk;
-  return codepoint;
-}
-
 /// \brief Emit a byte buffer to the stream as unicode characters.
 ///
 /// Characters are emitted until we reach a terminator character or \b count bytes is consumed.
@@ -627,7 +494,7 @@ bool PrintLanguage::escapeCharacterData(ostream &s,const uint1 *buf,int4 count,i
  int4 skip = charsize;
  int4 codepoint = 0;
  while(i<count) {
-    codepoint = getCodepoint(buf+i,charsize,bigend,skip);
+    codepoint = StringManager::getCodepoint(buf+i,charsize,bigend,skip);
    if (codepoint == 0 || codepoint == -1) break;
    printUnicode(s,codepoint);
    i += skip;
@@ -692,6 +559,15 @@ void PrintLanguage::opUnary(const OpToken *tok,const PcodeOp *op)
  pushVnImplied(op->getIn(0),op,mods);
 }

+void PrintLanguage::resetDefaultsInternal(void)
+
+{
+  mods = 0;
+  head_comment_type = Comment::header | Comment::warningheader;
+  line_commentindent = 20;
+  instr_comment_type = Comment::user2 | Comment::warning;
+}
+
 /// The comment will get emitted as a single line using the high-level language's
 /// delimiters with the given indent level
 /// \param indent is the number of characters to indent
@@ -767,6 +643,13 @@ void PrintLanguage::setFlat(bool val)
    mods &= ~flat;
 }

+void PrintLanguage::resetDefaults(void)
+
+{
+  emit->resetDefaults();
+  resetDefaultsInternal();
+}
+
 void PrintLanguage::clear(void)

 {
@@ -267,14 +267,13 @@ protected:
  void emitOp(const ReversePolish &entry);				///< Send an operator token from the RPN to the emitter
  void emitAtom(const Atom &atom);					///< Send an variable token from the RPN to the emitter
  static bool unicodeNeedsEscape(int4 codepoint);			///< Determine if the given codepoint needs to be escaped
-  static void writeUtf8(ostream &s,int4 codepoint);			///< Write unicode character to stream in UTF8 encoding
-  static int4 readUtf16(const uint1 *buf,bool bigend);			///< Read a 2-byte UTF16 element from a byte array
-  static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip);
  bool escapeCharacterData(ostream &s,const uint1 *buf,int4 count,int4 charsize,bool bigend) const;
  void recurse(void);							///< Emit from the RPN stack as much as possible
  void opBinary(const OpToken *tok,const PcodeOp *op);			///< Push a binary operator onto the RPN stack
  void opUnary(const OpToken *tok,const PcodeOp *op);			///< Push a unary operator onto the RPN stack
  int4 getPending(void) const { return pending; }			///< Get the number of pending nodes yet to be put on the RPN stack
+  void resetDefaultsInternal(void);					///< Reset options to default for PrintLanguage
+

  /// \brief Print a single unicode character as a \e character \e constant for the high-level language
  ///
@@ -421,6 +420,7 @@ public:
  void setFlat(bool val);						///< Set whether nesting code structure should be emitted

  virtual void adjustTypeOperators(void)=0;				///< Set basic data-type information for p-code operators
+  virtual void resetDefaults(void);					///< Set printing options to their default value
  virtual void clear(void);						///< Clear the RPN stack and the low-level emitter
  virtual void setIntegerFormat(const string &nm);			///< Set the default integer format

@@ -431,14 +431,6 @@ public:
  /// \param nm is the configuration description
  virtual void setCommentStyle(const string &nm)=0;

-  /// \brief Decide is the given byte array looks like a character string
-  ///
-  /// This looks for encodings and/or a terminator that is appropriate for the high-level language
-  /// \param buf is a pointer to the byte array
-  /// \param size is the number of bytes in the array
-  /// \param charsize is the size in bytes of the encoding element (i.e. UTF8, UTF16, etc.) to assume
-  virtual bool isCharacterConstant(const uint1 *buf,int4 size,int4 charsize) const=0;
-
  /// \brief Emit definitions of data-types
  ///
  /// \param typegrp is the container for the data-types that should be defined
@@ -28,16 +28,51 @@
 #include "action.hh"

 /// \brief Structure for sorting out pointer expression trees
+///
+/// Given a base pointer of known data-type and an additive expression involving
+/// the pointer, group the terms of the expression into:
+///   - A constant multiple of the base data-type
+///   - Non-constant multiples of the base data-type
+///   - An constant offset to a sub-component of the base data-type
+///   - An remaining terms
+///
+/// The \e multiple terms are rewritten using a CPUI_PTRADD. The constant offset
+/// is rewritten using a CPUI_PTRSUB.  Other terms are added back in.  Analysis may cause
+/// multiplication (CPUI_INT_MULT) by a constant to be distributed to its CPUI_INT_ADD input.
 class AddTreeState {
-public:
+  Funcdata &data;		///< The function containing the expression
+  PcodeOp *baseOp;		///< Base of the ADD tree
  Varnode *ptr;			///< The pointer varnode
-  int4 size;			///< Size of ptr type in question
+  const TypePointer *ct;	///< The pointer data-type
+  const Datatype *baseType;	///< The base data-type being pointed at
+  int4 ptrsize;			///< Size of the pointer
+  int4 size;			///< Size of data-type being pointed to (in address units)
+  uintb ptrmask;		///< Mask for modulo calculations in ptr space
+  uintb offset;			///< Number of bytes we dig into the base data-type
+  uintb correct;		///< Number of bytes being double counted
  vector<Varnode *> multiple;	///< Varnodes which are multiples of size
-  vector<uintb> coeff;		///< Associated constant multiple
+  vector<intb> coeff;		///< Associated constant multiple
  vector<Varnode *> nonmult;	///< Varnodes which are not multiples
+  PcodeOp *distributeOp;	///< A CPUI_INT_MULT op that needs to be distributed
  uintb multsum;		///< Sum of multiple constants
  uintb nonmultsum;		///< Sum of non-multiple constants
-  bool valid;			///< Full tree search was performed
+  bool preventDistribution;	///< Do not distribute "multiply by constant" operation
+  bool isDistributeUsed;	///< Are terms produced by distributing used
+  bool isSubtype;		///< Is there a sub-type (using CPUI_PTRSUB)
+  bool valid;			///< Set to \b true if the whole expression can be transformed
+  uint4 findArrayHint(void) const;	///< Look for evidence of an array in a sub-component
+  bool hasMatchingSubType(uintb off,uint4 arrayHint,uintb *newoff) const;
+  bool checkMultTerm(Varnode *vn,PcodeOp *op, uintb treeCoeff);	///< Accumulate details of INT_MULT term and continue traversal if appropriate
+  bool checkTerm(Varnode *vn, uintb treeCoeff);			///< Accumulate details of given term and continue tree traversal
+  bool spanAddTree(PcodeOp *op, uintb treeCoeff);		///< Walk the given sub-tree accumulating details
+  void calcSubtype(void);		///< Calculate final sub-type offset
+  Varnode *buildMultiples(void);	///< Build part of tree that is multiple of base size
+  Varnode *buildExtra(void);		///< Build part of tree not accounted for by multiples or \e offset
+  void buildTree(void);			///< Build the transformed ADD tree
+  void clear(void);			///< Reset for a new ADD tree traversal
+public:
+  AddTreeState(Funcdata &d,PcodeOp *op,int4 slot);	///< Construct given root of ADD tree and pointer
+  bool apply(void);		///< Attempt to transform the pointer expression
 };

 class RuleEarlyRemoval : public Rule {
@@ -58,7 +93,6 @@ public:
 // };
 class RuleCollectTerms : public Rule {
  static Varnode *getMultCoeff(Varnode *vn,uintb &coef);	///< Get the multiplicative coefficient
-  static int4 doDistribute(Funcdata &data,PcodeOp *op);		///< Distribute coefficient within one term
 public:
  RuleCollectTerms(const string &g) : Rule(g, 0, "collect_terms") {}	///< Constructor
  virtual Rule *clone(const ActionGroupList &grouplist) const {
@@ -993,9 +1027,7 @@ public:
  virtual int4 applyOp(PcodeOp *op,Funcdata &data);
 };
 class RulePtrArith : public Rule {
-  static bool checkTerm(Varnode *vn,AddTreeState *state);
-  static bool spanAddTree(PcodeOp *op,AddTreeState *state);
-  static int4 transformPtr(PcodeOp *bottom_op,PcodeOp *ptr_op,int4 slot,Funcdata &data);
+  static bool verifyAddTreeBottom(PcodeOp *op,int4 slot);
 public:
  RulePtrArith(const string &g) : Rule(g, 0, "ptrarith") {}	///< Constructor
  virtual Rule *clone(const ActionGroupList &grouplist) const {
@@ -0,0 +1,46 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "string_ghidra.hh"
+
+GhidraStringManager::GhidraStringManager(ArchitectureGhidra *g,int4 max)
+  : StringManager(max)
+{
+  glb = g;
+  testBuffer = new uint1[max];
+}
+
+GhidraStringManager::~GhidraStringManager(void)
+
+{
+  delete [] testBuffer;
+}
+
+const vector<uint1> &GhidraStringManager::getStringData(const Address &addr,Datatype *charType,bool &isTrunc)
+
+{
+  map<Address,StringData>::iterator iter;
+  iter = stringMap.find(addr);
+  if (iter != stringMap.end()) {
+    isTrunc = (*iter).second.isTruncated;
+    return (*iter).second.byteData;
+  }
+
+  StringData &stringData(stringMap[addr]);
+  stringData.isTruncated = false;
+  glb->getStringData(stringData.byteData, addr, charType, maximumChars,stringData.isTruncated);
+  isTrunc = stringData.isTruncated;
+  return stringData.byteData;
+}
@@ -0,0 +1,39 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/// \file ghidra_string.hh
+/// \brief Implementation of the StringManager through the ghidra client
+
+#ifndef __STRING_GHIDRA__
+#define __STRING_GHIDRA__
+
+#include "ghidra_arch.hh"
+
+/// \brief Implementation of the StringManager that queries through the ghidra client
+///
+/// This acts as a front end to Ghidra's string formats and encodings.
+/// The client translates any type of string into a UTF8 representation, and this
+/// class stores it for final presentation.  Escaping the UTF8 string is left up
+/// to the PrintLanguage.
+class GhidraStringManager : public StringManager {
+  ArchitectureGhidra *glb;		///< The ghidra client interface
+  uint1 *testBuffer;			///< Temporary storage for storing bytes from client
+public:
+  GhidraStringManager(ArchitectureGhidra *g,int4 max);	///< Constructor
+  virtual ~GhidraStringManager(void);
+  virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc);
+};
+
+#endif
@@ -0,0 +1,392 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "stringmanage.hh"
+#include "architecture.hh"
+
+/// \param max is the maximum number of characters to allow before truncating string
+StringManager::StringManager(int4 max)
+
+{
+  maximumChars = max;
+}
+
+StringManager::~StringManager(void)
+
+{
+  clear();
+}
+
+/// Encode the given unicode codepoint as UTF8 (1, 2, 3, or 4 bytes) and
+/// write the bytes to the stream.
+/// \param s is the output stream
+/// \param codepoint is the unicode codepoint
+void StringManager::writeUtf8(ostream &s,int4 codepoint)
+
+{
+  uint1 bytes[4];
+  int4 size;
+
+  if (codepoint < 0)
+    throw LowlevelError("Negative unicode codepoint");
+  if (codepoint < 128) {
+    s.put((uint1)codepoint);
+    return;
+  }
+  int4 bits = mostsigbit_set(codepoint) + 1;
+  if (bits > 21)
+    throw LowlevelError("Bad unicode codepoint");
+  if (bits < 12) {	// Encode with two bytes
+    bytes[0] = 0xc0 ^ ((codepoint >> 6)&0x1f);
+    bytes[1] = 0x80 ^ (codepoint & 0x3f);
+    size = 2;
+  }
+  else if (bits < 17) {
+    bytes[0] = 0xe0 ^ ((codepoint >> 12)&0xf);
+    bytes[1] = 0x80 ^ ((codepoint >> 6)&0x3f);
+    bytes[2] = 0x80 ^ (codepoint & 0x3f);
+    size = 3;
+  }
+  else {
+    bytes[0] = 0xf0 ^ ((codepoint >> 18) & 7);
+    bytes[1] = 0x80 ^ ((codepoint >> 12) & 0x3f);
+    bytes[2] = 0x80 ^ ((codepoint >> 6) & 0x3f);
+    bytes[3] = 0x80 ^ (codepoint & 0x3f);
+    size = 4;
+  }
+  s.write((char *)bytes, size);
+}
+
+/// Returns \b true if the data is some kind of complete string.
+/// A given character data-type can be used as a hint for the encoding.
+/// The string decoding can be cached internally.
+/// \param addr is the given address
+/// \param charType is the given character data-type
+/// \return \b true if the address represents string data
+bool StringManager::isString(const Address &addr,Datatype *charType)
+
+{
+  bool isTrunc;		// unused here
+  const vector<uint1> &buffer(getStringData(addr,charType,isTrunc));
+  return !buffer.empty();
+}
+
+/// Write \<stringmanage> tag, with \<string> sub-tags.
+/// \param s is the stream to write to
+void StringManager::saveXml(ostream &s) const
+
+{
+  s << "<stringmanage>\n";
+
+  map<Address,StringData>::const_iterator iter1;
+  for(iter1=stringMap.begin();iter1!=stringMap.end();++iter1) {
+    s << "<string>\n";
+    (*iter1).first.saveXml(s);
+    const StringData &stringData( (*iter1).second );
+    s << " <bytes";
+    a_v_b(s, "trunc", stringData.isTruncated);
+    s << ">\n" << setfill('0');
+    for(int4 i=0;stringData.byteData.size();++i) {
+      s << hex << setw(2) << (int4)stringData.byteData[i];
+      if (i%20 == 19)
+	s << "\n  ";
+    }
+    s << "\n </bytes>\n";
+  }
+  s << "</stringmanage>\n";
+}
+
+/// Read \<stringmanage> tag, with \<string> sub-tags.
+/// \param el is the root tag element
+/// \param m is the manager for looking up AddressSpaces
+void StringManager::restoreXml(const Element *el, const AddrSpaceManager *m)
+
+{
+  const List &list(el->getChildren());
+  List::const_iterator iter1;
+  for (iter1 = list.begin(); iter1 != list.end(); ++iter1) {
+    List::const_iterator iter2 = (*iter1)->getChildren().begin();
+    Address addr = Address::restoreXml(*iter2, m);
+    ++iter2;
+    StringData &stringData(stringMap[addr]);
+    stringData.isTruncated = xml_readbool((*iter2)->getAttributeValue("trunc"));
+    istringstream is((*iter2)->getContent());
+    int4 val;
+    char c1, c2;
+    is >> ws;
+    c1 = is.get();
+    c2 = is.get();
+    while ((c1 > 0) && (c2 > 0)) {
+      if (c1 <= '9')
+	c1 = c1 - '0';
+      else if (c1 <= 'F')
+	c1 = c1 + 10 - 'A';
+      else
+	c1 = c1 + 10 - 'a';
+      if (c2 <= '9')
+	c2 = c2 - '0';
+      else if (c2 <= 'F')
+	c2 = c2 + 10 - 'A';
+      else
+	c2 = c2 + 10 - 'a';
+      val = c1 * 16 + c2;
+      stringData.byteData.push_back((uint1) val);
+      is >> ws;
+      c1 = is.get();
+      c2 = is.get();
+    }
+  }
+}
+
+/// \param buffer is the byte buffer
+/// \param size is the number of bytes in the buffer
+/// \param charsize is the presumed size (in bytes) of character elements
+/// \return \b true if a string terminator is found
+bool StringManager::hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize)
+
+{
+  for(int4 i=0;i<size;i+=charsize) {
+    bool isTerminator = true;
+    for(int4 j=0;j<charsize;++j) {
+      if (buffer[i+j] != 0) {	// Non-zero bytes means character can't be a null terminator
+	isTerminator = false;
+	break;
+      }
+    }
+    if (isTerminator) return true;
+  }
+  return false;
+}
+
+/// Pull the first two bytes from the byte array and combine them in the indicated endian order
+/// \param buf is the byte array
+/// \param bigend is \b true to request big endian encoding
+/// \return the decoded UTF16 element
+inline int4 StringManager::readUtf16(const uint1 *buf,bool bigend)
+
+{
+  int4 codepoint;
+  if (bigend) {
+    codepoint = buf[0];
+    codepoint <<= 8;
+    codepoint += buf[1];
+  }
+  else {
+    codepoint = buf[1];
+    codepoint <<= 8;
+    codepoint += buf[0];
+  }
+  return codepoint;
+}
+
+/// One or more bytes is consumed from the array, and the number of bytes used is passed back.
+/// \param buf is a pointer to the bytes in the character array
+/// \param charsize is 1 for UTF8, 2 for UTF16, or 4 for UTF32
+/// \param bigend is \b true for big endian encoding of the UTF element
+/// \param skip is a reference for passing back the number of bytes consumed
+/// \return the codepoint or -1 if the encoding is invalid
+int4 StringManager::getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip)
+
+{
+  int4 codepoint;
+  int4 sk = 0;
+  if (charsize==2) {		// UTF-16
+    codepoint = readUtf16(buf,bigend);
+    sk += 2;
+    if ((codepoint>=0xD800)&&(codepoint<=0xDBFF)) { // high surrogate
+      int4 trail=readUtf16(buf+2,bigend);
+      sk += 2;
+      if ((trail<0xDC00)||(trail>0xDFFF)) return -1; // Bad trail
+      codepoint = (codepoint<<10) + trail + (0x10000 - (0xD800 << 10) - 0xDC00);
+    }
+    else if ((codepoint>=0xDC00)&&(codepoint<=0xDFFF)) return -1; // trail before high
+  }
+  else if (charsize==1) {	// UTF-8
+    int4 val = buf[0];
+    if ((val&0x80)==0) {
+      codepoint = val;
+      sk = 1;
+    }
+    else if ((val&0xe0)==0xc0) {
+      int4 val2 = buf[1];
+      sk = 2;
+      if ((val2&0xc0)!=0x80) return -1; // Not a valid UTF8-encoding
+      codepoint = ((val&0x1f)<<6) | (val2 & 0x3f);
+    }
+    else if ((val&0xf0)==0xe0) {
+      int4 val2 = buf[1];
+      int4 val3 = buf[2];
+      sk = 3;
+      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)) return -1; // invalid encoding
+      codepoint = ((val&0xf)<<12) | ((val2&0x3f)<<6) | (val3 & 0x3f);
+    }
+    else if ((val&0xf8)==0xf0) {
+      int4 val2 = buf[1];
+      int4 val3 = buf[2];
+      int4 val4 = buf[3];
+      sk = 4;
+      if (((val2&0xc0)!=0x80)||((val3&0xc0)!=0x80)||((val4&0xc0)!=0x80)) return -1;	// invalid encoding
+      codepoint = ((val&7)<<18) | ((val2&0x3f)<<12) | ((val3&0x3f)<<6) | (val4 & 0x3f);
+    }
+    else
+      return -1;
+  }
+  else if (charsize == 4) {	// UTF-32
+    sk = 4;
+    if (bigend)
+      codepoint = (buf[0]<<24) + (buf[1]<<16) + (buf[2]<<8) + buf[3];
+    else
+      codepoint = (buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0];
+  }
+  else
+    return -1;
+  if (codepoint >= 0xd800 && codepoint <= 0xdfff)
+    return -1;		// Reserved for surrogates, invalid codepoints
+  skip = sk;
+  return codepoint;
+}
+
+/// \param g is the underlying architecture (and loadimage)
+/// \param max is the maximum number of bytes to allow in a decoded string
+StringManagerUnicode::StringManagerUnicode(Architecture *g,int4 max)
+  : StringManager(max)
+{
+  glb = g;
+  testBuffer = new uint1[max];
+}
+
+StringManagerUnicode::~StringManagerUnicode(void)
+
+{
+  delete [] testBuffer;
+}
+
+const vector<uint1> &StringManagerUnicode::getStringData(const Address &addr,Datatype *charType,bool &isTrunc)
+
+{
+  map<Address,StringData>::iterator iter;
+  iter = stringMap.find(addr);
+  if (iter != stringMap.end()) {
+    isTrunc = (*iter).second.isTruncated;
+    return (*iter).second.byteData;
+  }
+
+  StringData &stringData(stringMap[addr]);		// Allocate (initially empty) byte vector
+  stringData.isTruncated = false;
+  isTrunc = false;
+
+  if (charType->isOpaqueString())		// Cannot currently test for an opaque encoding
+    return stringData.byteData;			// Return the empty buffer
+
+  int4 curBufferSize = 0;
+  int4 charsize = charType->getSize();
+  bool foundTerminator = false;
+
+  try {
+    do {
+      int4 amount = 32;	// Grab 32 bytes of image at a time
+      uint4 newBufferSize = curBufferSize + amount;
+      if (newBufferSize > maximumChars) {
+	newBufferSize = maximumChars;
+	amount = newBufferSize - curBufferSize;
+	if (amount == 0) {
+	  return stringData.byteData;		// Could not find terminator
+	}
+      }
+      glb->loader->loadFill(testBuffer + curBufferSize, amount,
+			    addr + curBufferSize);
+      foundTerminator = hasCharTerminator(testBuffer + curBufferSize, amount,
+					  charsize);
+      curBufferSize = newBufferSize;
+    } while (!foundTerminator);
+  } catch (DataUnavailError &err) {
+    return stringData.byteData;			// Return the empty buffer
+  }
+
+  int4 numChars = checkCharacters(testBuffer, curBufferSize, charsize);
+  if (numChars < 0)
+    return stringData.byteData;		// Return the empty buffer (invalid encoding)
+  if (charsize == 1 && numChars < maximumChars) {
+    stringData.byteData.reserve(curBufferSize);
+    stringData.byteData.assign(testBuffer,testBuffer+curBufferSize);
+  }
+  else {
+    // We need to translate to UTF8 and/or truncate
+    ostringstream s;
+    if (!writeUnicode(s, testBuffer, curBufferSize, charsize))
+      return stringData.byteData;		// Return the empty buffer
+    string resString = s.str();
+    int4 newSize = resString.size();
+    stringData.byteData.reserve(newSize + 1);
+    const uint1 *ptr = (const uint1 *)resString.c_str();
+    stringData.byteData.assign(ptr,ptr+newSize);
+    stringData.byteData[newSize] = 0;		// Make sure there is a null terminator
+  }
+  stringData.isTruncated = (numChars >= maximumChars);
+  isTrunc = stringData.isTruncated;
+  return stringData.byteData;
+}
+
+/// Check that the given buffer contains valid unicode.
+/// If the string is encoded in UTF8 or ASCII, we get (on average) a bit of check
+/// per character.  For UTF16, the surrogate reserved area gives at least some check.
+/// \param buf is the byte array to check
+/// \param size is the size of the buffer in bytes
+/// \param charsize is the UTF encoding (1=UTF8, 2=UTF16, 4=UTF32)
+/// \return the number of characters or -1 if there is an invalid encoding
+int4 StringManagerUnicode::checkCharacters(const uint1 *buf,int4 size,int4 charsize) const
+
+{
+  if (buf == (const uint1 *)0) return -1;
+  bool bigend = glb->translate->isBigEndian();
+  int4 i=0;
+  int4 count=0;
+  int4 skip = charsize;
+  while(i<size) {
+    int4 codepoint = getCodepoint(buf+i,charsize,bigend,skip);
+    if (codepoint < 0) return -1;
+    if (codepoint == 0) break;
+    count += 1;
+    i += skip;
+  }
+  return count;
+}
+
+/// Assume the buffer contains a null terminated unicode encoded string.
+/// Write the characters out (as UTF8) to the stream.
+/// \param s is the output stream
+/// \param buffer is the given byte buffer
+/// \param size is the number of bytes in the buffer
+/// \param charsize specifies the encoding (1=UTF8 2=UTF16 4=UTF32)
+/// \return \b true if the byte array contains valid unicode
+bool StringManagerUnicode::writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize)
+
+{
+  bool bigend = glb->translate->isBigEndian();
+  int4 i=0;
+  int4 count=0;
+  int4 skip = charsize;
+  while(i<size) {
+    int4 codepoint = getCodepoint(buffer+i,charsize,bigend,skip);
+    if (codepoint < 0) return false;
+    if (codepoint == 0) break;		// Terminator
+    writeUtf8(s, codepoint);
+    i += skip;
+    count += 1;
+    if (count >= maximumChars)
+      break;
+  }
+  return true;
+}
@@ -0,0 +1,83 @@
+/* ###
+ * IP: GHIDRA
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/// \file stringmanage.hh
+/// \brief Classes for decoding and storing string data
+
+#ifndef __STRING_MANAGE__
+#define __STRING_MANAGE__
+
+#include "type.hh"
+
+class Architecture;
+
+/// \brief Storage for decoding and storing strings associated with an address
+///
+/// Looks at data in the loadimage to determine if it represents a "string".
+/// Decodes the string for presentation in the output.
+/// Stores the decoded string until its needed for presentation.
+class StringManager {
+protected:
+  class StringData {
+  public:
+    bool isTruncated;		// \b true if the the string is truncated
+    vector<uint1> byteData;	// UTF8 encoded string data
+  };
+  map<Address,StringData> stringMap;	///< Map from address to string data
+  int4 maximumChars;			///< Maximum characters in a string before truncating
+public:
+  StringManager(int4 max);		///< Constructor
+  virtual ~StringManager(void);		///< Destructor
+
+  void clear(void) { stringMap.clear(); }			///< Clear out any cached strings
+
+  bool isString(const Address &addr,Datatype *charType);	// Determine if data at the given address is a string
+
+  /// \brief Retrieve string data at the given address as a UTF8 byte array
+  ///
+  /// If the address does not represent string data, a zero length vector is returned. Otherwise,
+  /// the string data is fetched, converted to a UTF8 encoding, cached and returned.
+  /// \param addr is the given address
+  /// \param charType is a character data-type indicating the encoding
+  /// \param isTrunc passes back whether the string is truncated
+  /// \return the byte array of UTF8 data
+  virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc)=0;
+
+  void saveXml(ostream &s) const;	///< Save cached strings to a stream as XML
+  void restoreXml(const Element *el,const AddrSpaceManager *m);	///< Restore string cache from XML
+
+  static bool hasCharTerminator(const uint1 *buffer,int4 size,int4 charsize);	///< Check for a unicode string terminator
+  static int4 readUtf16(const uint1 *buf,bool bigend);	///< Read a UTF16 code point from a byte array
+  static void writeUtf8(ostream &s,int4 codepoint);	///< Write unicode character to stream in UTF8 encoding
+  static int4 getCodepoint(const uint1 *buf,int4 charsize,bool bigend,int4 &skip);	///< Extract next \e unicode \e codepoint
+};
+
+/// \brief An implementation of StringManager that understands terminated unicode strings
+///
+/// This class understands UTF8, UTF16, and UTF32 encodings.  It reports a string if its
+/// sees a valid encoding that is null terminated.
+class StringManagerUnicode : public StringManager {
+  Architecture *glb;		///< Underlying architecture
+  uint1 *testBuffer;		///< Temporary buffer for pulling in loadimage bytes
+  int4 checkCharacters(const uint1 *buf,int4 size,int4 charsize) const;	///< Make sure buffer has valid bounded set of unicode
+public:
+  StringManagerUnicode(Architecture *g,int4 max);	///< Constructor
+  virtual ~StringManagerUnicode(void);
+
+  virtual const vector<uint1> &getStringData(const Address &addr,Datatype *charType,bool &isTrunc);
+  bool writeUnicode(ostream &s,uint1 *buffer,int4 size,int4 charsize);	///< Translate/copy unicode to UTF8
+};
+
+#endif
@@ -46,7 +46,7 @@ void print_data(ostream &s,uint1 *buffer,int4 size,const Address &baseaddr)
 	s << "   ";
      else
 	s << setfill('0') << setw(2) << hex << (uint4) buffer[start+i-addr] << ' ';
-      
+
    }
    s << "  ";
    for(i=0;i<16;++i)
@@ -63,6 +63,20 @@ void print_data(ostream &s,uint1 *buffer,int4 size,const Address &baseaddr)
  }
 }

+/// If \b this and the other given data-type are both variable length and come from the
+/// the same base data-type, return \b true.
+/// \param ct is the other given data-type to compare with \b this
+/// \return \b true if they are the same variable length data-type.
+bool Datatype::hasSameVariableBase(const Datatype *ct) const
+
+{
+  if (!isVariableLength()) return false;
+  if (!ct->isVariableLength()) return false;
+  uint8 thisId = hashSize(id, size);
+  uint8 themId = hashSize(ct->id, ct->size);
+  return (thisId == themId);
+}
+
 /// Print a raw description of the type to stream. Intended for debugging.
 /// Not intended to produce parsable C.
 /// \param s is the output stream
@@ -90,7 +104,33 @@ Datatype *Datatype::getSubType(uintb off,uintb *newoff) const
  return (Datatype *)0;
 }

-/// Compare \b this with another data-type.
+/// Find the first component data-type after the given offset that is (or contains)
+/// an array, and pass back the difference between the component's start and the given offset.
+/// Return the component data-type or null if no array is found.
+/// \param off is the given offset into \b this data-type
+/// \param newoff is used to pass back the offset difference
+/// \param elSize is used to pass back the array element size
+/// \return the component data-type or null
+Datatype *Datatype::nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const
+
+{
+  return (TypeArray *)0;
+}
+
+/// Find the first component data-type before the given offset that is (or contains)
+/// an array, and pass back the difference between the component's start and the given offset.
+/// Return the component data-type or null if no array is found.
+/// \param off is the given offset into \b this data-type
+/// \param newoff is used to pass back the offset difference
+/// \param elSize is used to pass back the array element size
+/// \return the component data-type or null
+Datatype *Datatype::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
+
+{
+  return (TypeArray *)0;
+}
+
+// Compare \b this with another data-type.
 /// 0 (equality) means the data-types are functionally equivalent (even if names differ)
 /// Smaller types come earlier. More specific types come earlier.
 /// \param op is the data-type to compare with \b this
@@ -240,8 +280,13 @@ void Datatype::saveXmlBasic(ostream &s) const

 {
  a_v(s,"name",name);
-  if (id != 0) {
-    s << " id=\"0x" << hex << id << '\"';
+  uint8 saveId;
+  if (isVariableLength())
+    saveId = hashSize(id, size);
+  else
+    saveId = id;
+  if (saveId != 0) {
+    s << " id=\"0x" << hex << saveId << '\"';
  }
  a_v_i(s,"size",size);
  string metastring;
@@ -249,6 +294,10 @@ void Datatype::saveXmlBasic(ostream &s) const
  a_v(s,"metatype",metastring);
  if ((flags & coretype)!=0)
    a_v_b(s,"core",true);
+  if (isVariableLength())
+    a_v_b(s,"varlength",true);
+  if ((flags & opaque_string)!=0)
+    a_v_b(s,"opaquestring",true);
 }

 /// Write a simple reference to \b this data-type as an XML \<typeref> tag,
@@ -283,18 +332,31 @@ void Datatype::restoreXmlBasic(const Element *el)
  metatype = string2metatype( el->getAttributeValue("metatype") );
  id = 0;
  for(int4 i=0;i<el->getNumAttributes();++i) {
-    if (el->getAttributeName(i) == "core") {
+    const string &attribName( el->getAttributeName(i) );
+    if (attribName == "core") {
      if (xml_readbool(el->getAttributeValue(i)))
 	flags |= coretype;
    }
-    else if (el->getAttributeName(i) == "id") {
+    else if (attribName == "id") {
      istringstream i1(el->getAttributeValue(i));
      i1.unsetf(ios::dec | ios::hex | ios::oct);
      i1 >> id;
    }
+    else if (attribName == "varlength") {
+      if (xml_readbool(el->getAttributeValue(i)))
+	flags |= variable_length;
+    }
+    else if (attribName == "opaquestring") {
+      if (xml_readbool(el->getAttributeValue(i)))
+	flags |= opaque_string;
+    }
  }
  if ((id==0)&&(name.size()>0))	// If there is a type name
    id = hashName(name);	// There must be some kind of id
+  if (isVariableLength()) {
+    // Id needs to be unique compared to another data-type with the same name
+    id = hashSize(id, size);
+  }
 }

 /// Restore a Datatype object from an XML element
@@ -326,6 +388,21 @@ uint8 Datatype::hashName(const string &nm)
  return res;
 }

+/// This allows IDs for variable length structures to be uniquefied based on size.
+/// A base ID is given and a size of the specific instance. A unique ID is returned.
+/// The hashing is reversible by feeding the output ID back into this function with the same size.
+/// \param id is the given ID to (de)uniquify
+/// \param size is the instance size of the structure
+/// \param return the (de)uniquified id
+uint8 Datatype::hashSize(uint8 id,int4 size)
+
+{
+  uint8 sizeHash = size;
+  sizeHash *= 0x98251033aecbabaf;	// Hash the size
+  id ^= sizeHash;
+  return id;
+}
+
 void TypeChar::saveXml(ostream &s) const

 {
@@ -510,7 +587,7 @@ void TypeArray::saveXml(ostream &s) const
  s << "<type";
  saveXmlBasic(s);
  a_v_i(s,"arraysize",arraysize);
-  s << '>'; 
+  s << '>';
  arrayof->saveXmlRef(s);
  s << "</type>";
 }
@@ -563,7 +640,7 @@ void TypeEnum::setNameMap(const map<uintb,string> &nmap)
    fieldisempty = true;
    while(curmask != lastmask) {	// Repeat until there is no change in the current mask
      lastmask = curmask;		// Note changes from last time through
-      
+
      for(iter=namemap.begin();iter!=namemap.end();++iter) { // For every named enumeration value
 	uintb val = (*iter).first;
 	if ((val & curmask) != 0) {	// If the value shares ANY bits in common with the current mask
@@ -577,7 +654,7 @@ void TypeEnum::setNameMap(const map<uintb,string> &nmap)
      int4 msb = mostsigbit_set(curmask);
      if (msb > curmaxbit)
 	curmaxbit = msb;
-      
+
      uintb mask1 = 1;
      mask1 = (mask1 << lsb) - 1;     // every bit below lsb is set to 1
      uintb mask2 = 1;
@@ -740,7 +817,7 @@ void TypeStruct::setFields(const vector<TypeField> &fd)
 /// \return the index into the field list or -1
 int4 TypeStruct::getFieldIter(int4 off) const

-{				// Find subfield of given offset
+{
  int4 min = 0;
  int4 max = field.size()-1;

@@ -758,6 +835,30 @@ int4 TypeStruct::getFieldIter(int4 off) const
  return -1;
 }

+/// The field returned may or may not contain the offset.  If there are no fields
+/// that occur earlier than the offset, return -1.
+/// \param off is the given offset
+/// \return the index of the nearest field or -1
+int4 TypeStruct::getLowerBoundField(int4 off) const
+
+{
+  if (field.empty()) return -1;
+  int4 min = 0;
+  int4 max = field.size()-1;
+
+  while(min < max) {
+    int4 mid = (min + max + 1)/2;
+    if (field[mid].offset > off)
+      max = mid - 1;
+    else {			// curfield.offset <= off
+      min = mid;
+    }
+  }
+  if (min == max && field[min].offset <= off)
+    return min;
+  return -1;
+}
+
 /// Given a byte range within \b this data-type, determine the field it is contained in
 /// and pass back the renormalized offset.
 /// \param off is the byte offset into \b this
@@ -784,7 +885,7 @@ Datatype *TypeStruct::getSubType(uintb off,uintb *newoff) const

 {				// Go down one level to field that contains offset
  int4 i;
-  
+
  i = getFieldIter(off);
  if (i < 0) return Datatype::getSubType(off,newoff);
  const TypeField &curfield( field[i] );
@@ -792,6 +893,61 @@ Datatype *TypeStruct::getSubType(uintb off,uintb *newoff) const
  return curfield.type;
 }

+Datatype *TypeStruct::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
+
+{
+  int4 i = getLowerBoundField(off);
+  while(i >= 0) {
+    const TypeField &subfield( field[i] );
+    int4 diff = (int4)off - subfield.offset;
+    if (diff > 128) break;
+    Datatype *subtype = subfield.type;
+    if (subtype->getMetatype() == TYPE_ARRAY) {
+      *newoff = (intb)diff;
+      *elSize = ((TypeArray *)subtype)->getBase()->getSize();
+      return subtype;
+    }
+    else {
+      uintb suboff;
+      Datatype *res = subtype->nearestArrayedComponentBackward(subtype->getSize(), &suboff, elSize);
+      if (res != (Datatype *)0) {
+	*newoff = (intb)diff;
+	return subtype;
+      }
+    }
+    i -= 1;
+  }
+  return (Datatype *)0;
+}
+
+Datatype *TypeStruct::nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const
+
+{
+  int4 i = getLowerBoundField(off);
+  i += 1;
+  while(i<field.size()) {
+    const TypeField &subfield( field[i] );
+    int4 diff = subfield.offset - off;
+    if (diff > 128) break;
+    Datatype *subtype = subfield.type;
+    if (subtype->getMetatype() == TYPE_ARRAY) {
+      *newoff = (intb)-diff;
+      *elSize = ((TypeArray *)subtype)->getBase()->getSize();
+      return subtype;
+    }
+    else {
+      uintb suboff;
+      Datatype *res = subtype->nearestArrayedComponentForward(0, &suboff, elSize);
+      if (res != (Datatype *)0) {
+	*newoff = (intb)-diff;
+	return subtype;
+      }
+    }
+    i += 1;
+  }
+  return (Datatype *)0;
+}
+
 int4 TypeStruct::compare(const Datatype &op,int4 level) const
 {
  if (size != op.getSize()) return (op.getSize()-size);
@@ -1135,7 +1291,7 @@ Datatype *TypeSpacebase::getSubType(uintb off,uintb *newoff) const
  // Assume symbol being referenced is address tied so we use a null point of context
  // FIXME: A valid point of context may be necessary in the future
  smallest = scope->queryContainer(addr,1,nullPoint);
-  
+
  if (smallest == (SymbolEntry *)0) {
    *newoff = 0;
    return glb->types->getBase(1,TYPE_UNKNOWN);
@@ -1144,6 +1300,74 @@ Datatype *TypeSpacebase::getSubType(uintb off,uintb *newoff) const
  return smallest->getSymbol()->getType();
 }

+Datatype *TypeSpacebase::nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const
+
+{
+  Scope *scope = getMap();
+  off = AddrSpace::byteToAddress(off, spaceid->getWordSize());	// Convert from byte offset to address unit
+  // It should always be the case that the given offset represents a full encoding of the
+  // pointer, so the point of context is unused and the size is given as -1
+  Address nullPoint;
+  uintb fullEncoding;
+  Address addr = glb->resolveConstant(spaceid, off, -1, nullPoint, fullEncoding);
+  SymbolEntry *smallest = scope->queryContainer(addr,1,nullPoint);
+  Address nextAddr;
+  Datatype *symbolType;
+  if (smallest == (SymbolEntry *)0 || smallest->getOffset() != 0)
+    nextAddr = addr + 32;
+  else {
+    symbolType = smallest->getSymbol()->getType();
+    if (symbolType->getMetatype() == TYPE_STRUCT) {
+      uintb structOff = addr.getOffset() - smallest->getAddr().getOffset();
+      uintb dummyOff;
+      Datatype *res = symbolType->nearestArrayedComponentForward(structOff, &dummyOff, elSize);
+      if (res != (Datatype *)0) {
+	*newoff = structOff;
+	return symbolType;
+      }
+    }
+    int4 size = AddrSpace::byteToAddressInt(smallest->getSize(), spaceid->getWordSize());
+    nextAddr = smallest->getAddr() + size;
+  }
+  if (nextAddr < addr)
+    return (Datatype *)0;		// Don't let the address wrap
+  smallest = scope->queryContainer(nextAddr,1,nullPoint);
+  if (smallest == (SymbolEntry *)0 || smallest->getOffset() != 0)
+    return (Datatype *)0;
+  symbolType = smallest->getSymbol()->getType();
+  *newoff = addr.getOffset() - smallest->getAddr().getOffset();
+  if (symbolType->getMetatype() == TYPE_ARRAY) {
+    *elSize = ((TypeArray *)symbolType)->getBase()->getSize();
+    return symbolType;
+  }
+  if (symbolType->getMetatype() == TYPE_STRUCT) {
+    uintb dummyOff;
+    Datatype *res = symbolType->nearestArrayedComponentForward(0, &dummyOff, elSize);
+    if (res != (Datatype *)0)
+      return symbolType;
+  }
+  return (Datatype *)0;
+}
+
+Datatype *TypeSpacebase::nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const
+
+{
+  Datatype *subType = getSubType(off, newoff);
+  if (subType == (Datatype *)0)
+    return (Datatype *)0;
+  if (subType->getMetatype() == TYPE_ARRAY) {
+    *elSize = ((TypeArray *)subType)->getBase()->getSize();
+    return subType;
+  }
+  if (subType->getMetatype() == TYPE_STRUCT) {
+    uintb dummyOff;
+    Datatype *res = subType->nearestArrayedComponentBackward(*newoff,&dummyOff,elSize);
+    if (res != (Datatype *)0)
+      return subType;
+  }
+  return (Datatype *)0;
+}
+
 int4 TypeSpacebase::compare(const Datatype &op,int4 level) const

 {
@@ -1427,7 +1651,7 @@ Datatype *TypeFactory::findAdd(Datatype &ct)

 {
  Datatype *newtype,*res;
-  
+
  if (ct.name.size()!=0) {	// If there is a name
    if (ct.id == 0)		// There must be an id
      throw LowlevelError("Datatype must have a valid id");
@@ -1458,7 +1682,7 @@ Datatype *TypeFactory::findAdd(Datatype &ct)
    nametree.insert(newtype);
  return newtype;
 }
-  
+
 /// This routine renames a Datatype object and fixes up cross-referencing
 /// \param ct is the data-type to rename
 /// \param n is the new name
@@ -1483,8 +1707,9 @@ Datatype *TypeFactory::setName(Datatype *ct,const string &n)
 /// \param fd is the list of fields to set
 /// \param ot is the TypeStruct object to modify
 /// \param fixedsize is 0 or the forced size of the structure
+/// \param flags are other flags to set on the structure
 /// \return true if modification was successful
-bool TypeFactory::setFields(vector<TypeField> &fd,TypeStruct *ot,int4 fixedsize)
+bool TypeFactory::setFields(vector<TypeField> &fd,TypeStruct *ot,int4 fixedsize,uint4 flags)

 {
  int4 offset,cursize,curalign;
@@ -1529,6 +1754,7 @@ bool TypeFactory::setFields(vector<TypeField> &fd,TypeStruct *ot,int4 fixedsize)

  tree.erase(ot);
  ot->setFields(fd);
+  ot->flags |= (flags & (Datatype::opaque_string | Datatype::variable_length));
  if (fixedsize > 0) {		// If the caller is trying to force a size
    if (fixedsize > ot->size)	// If the forced size is bigger than the size required for fields
      ot->size = fixedsize;	//     Force the bigger size
@@ -1746,22 +1972,18 @@ TypeCode *TypeFactory::getTypeCode(const string &nm)
  return (TypeCode *) findAdd(tmp);
 }

-/// This creates a pointer to a given data-type.  It doesn't allow
-/// a "pointer to array" to be created however and will drill-down to
-/// the first non-array data-type
+/// This creates a pointer to a given data-type.  If the given data-type is
+/// an array, the TYPE_ARRAY property is stripped off, and a pointer to
+/// the array element data-type is returned.
 /// \param s is the size of the pointer
 /// \param pt is the pointed-to data-type
 /// \param ws is the wordsize associated with the pointer
 /// \return the TypePointer object
-TypePointer *TypeFactory::getTypePointer(int4 s,Datatype *pt,uint4 ws)
+TypePointer *TypeFactory::getTypePointerStripArray(int4 s,Datatype *pt,uint4 ws)

-{				// Create pointer to type -pt-
-  if (pt->getMetatype() == TYPE_ARRAY) {
-    // Do no allow pointers to array
-    do {
-      pt = ((TypeArray *)pt)->getBase();
-    } while(pt->getMetatype() == TYPE_ARRAY);
-  }
+{
+  if (pt->getMetatype() == TYPE_ARRAY)
+    pt = ((TypeArray *)pt)->getBase();		// Strip the first ARRAY type
  TypePointer tmp(s,pt,ws);
  return (TypePointer *) findAdd(tmp);
 }
@@ -1771,7 +1993,7 @@ TypePointer *TypeFactory::getTypePointer(int4 s,Datatype *pt,uint4 ws)
 /// \param pt is the pointed-to data-type
 /// \param ws is the wordsize associated with the pointer
 /// \return the TypePointer object
-TypePointer *TypeFactory::getTypePointerAbsolute(int4 s,Datatype *pt,uint4 ws)
+TypePointer *TypeFactory::getTypePointer(int4 s,Datatype *pt,uint4 ws)

 {
  TypePointer tmp(s,pt,ws);
@@ -1890,9 +2112,13 @@ Datatype *TypeFactory::downChain(Datatype *ptrtype,uintb &off)
  if (ptrtype->metatype != TYPE_PTR) return (Datatype *)0;
  TypePointer *ptype = (TypePointer *)ptrtype;
  Datatype *pt = ptype->ptrto;
+  // If we know we have exactly one of an array, strip the array to get pointer to element
+  bool doStrip = (pt->getMetatype() != TYPE_ARRAY);
  pt = pt->getSubType(off,&off);
  if (pt == (Datatype *)0)
    return (Datatype *)0;
+  if (doStrip)
+    return getTypePointerStripArray(ptype->size, pt, ptype->getWordSize());
  return getTypePointer(ptype->size,pt,ptype->getWordSize());
 }

@@ -2073,20 +2299,27 @@ Datatype *TypeFactory::restoreXmlTypeNoRef(const Element *el,bool forcecore)
      int4 num = el->getNumAttributes();
      uint8 newid = 0;
      int4 structsize = 0;
+      bool isVarLength = false;
      for(int4 i=0;i<num;++i) {
-	if (el->getAttributeName(i) == "id") {
+	const string &attribName(el->getAttributeName(i));
+	if (attribName == "id") {
 	  istringstream s(el->getAttributeValue(i));
 	  s.unsetf(ios::dec | ios::hex | ios::oct);
 	  s >> newid;
 	}
-	else if (el->getAttributeName(i) == "size") {
+	else if (attribName == "size") {
 	  istringstream s(el->getAttributeValue(i));
 	  s.unsetf(ios::dec | ios::hex | ios::oct);
 	  s >> structsize;
 	}
+	else if (attribName == "varlength") {
+	  isVarLength = xml_readbool(el->getAttributeValue(i));
+	}
      }
      if (newid == 0)
 	newid = Datatype::hashName(structname);
+      if (isVarLength)
+	newid = Datatype::hashSize(newid, structsize);
      ct = findByIdLocal(structname,newid);
      bool stubfirst = false;
      if (ct == (Datatype *)0) {
@@ -2105,7 +2338,7 @@ Datatype *TypeFactory::restoreXmlTypeNoRef(const Element *el,bool forcecore)
 	  throw LowlevelError("Redefinition of structure: "+structname);
      }
      else			// If structure is a placeholder stub
-	if (!setFields(ts.field,(TypeStruct *)ct,ts.size)) // Define structure now by copying fields
+	if (!setFields(ts.field,(TypeStruct *)ct,ts.size,ts.flags)) // Define structure now by copying fields
 	  throw LowlevelError("Bad structure definition");
    }
    break;
@@ -73,7 +73,9 @@ protected:
    enumtype = 4,		///< An enumeration type (as well as an integer)
    poweroftwo = 8,		///< An enumeration type where all values are of 2^^n form
    utf16 = 16,			///< 16-bit wide chars in unicode UTF16
-    utf32 = 32			///< 32-bit wide chars in unicode UTF32
+    utf32 = 32,			///< 32-bit wide chars in unicode UTF32
+    opaque_string = 64,		///< Structure that should be treated as a string
+    variable_length = 128	///< May be other structures with same name different lengths
  };
  friend class TypeFactory;
  friend struct DatatypeCompare;
@@ -85,6 +87,7 @@ protected:
  void restoreXmlBasic(const Element *el);	///< Recover basic data-type properties
  virtual void restoreXml(const Element *el,TypeFactory &typegrp);	///< Restore data-type from XML
  static uint8 hashName(const string &nm);	///< Produce a data-type id by hashing the type name
+  static uint8 hashSize(uint8 id,int4 size);	///< Reversibly hash size into id
 public:
  /// Construct the base data-type copying low-level properties of another
  Datatype(const Datatype &op) { size = op.size; name=op.name; metatype=op.metatype; flags=op.flags; id=op.id; }
@@ -94,12 +97,15 @@ public:
  Datatype(int4 s,type_metatype m,const string &n) { name=n; size=s; metatype=m; flags=0; id=0; }
  virtual ~Datatype(void) {}	///< Destructor
  bool isCoreType(void) const { return ((flags&coretype)!=0); }	///< Is this a core data-type
-  bool isCharPrint(void) const { return ((flags&(chartype|utf16|utf32))!=0); }	///< Does this print as a 'char'
+  bool isCharPrint(void) const { return ((flags&(chartype|utf16|utf32|opaque_string))!=0); }	///< Does this print as a 'char'
  bool isEnumType(void) const { return ((flags&enumtype)!=0); }		///< Is this an enumerated type
  bool isPowerOfTwo(void) const { return ((flags&poweroftwo)!=0); }	///< Is this a flag-based enumeration
  bool isASCII(void) const { return ((flags&chartype)!=0); }	///< Does this print as an ASCII 'char'
  bool isUTF16(void) const { return ((flags&utf16)!=0); }	///< Does this print as UTF16 'wchar'
  bool isUTF32(void) const { return ((flags&utf32)!=0); }	///< Does this print as UTF32 'wchar'
+  bool isVariableLength(void) const { return ((flags&variable_length)!=0); }	///< Is \b this a variable length structure
+  bool hasSameVariableBase(const Datatype *ct) const;		///< Are these the same variable length data-type
+  bool isOpaqueString(void) const { return ((flags&opaque_string)!=0); }	///< Is \b this an opaquely encoded string
  uint4 getInheritable(void) const { return (flags & coretype); }	///< Get properties pointers inherit
  type_metatype getMetatype(void) const { return metatype; }	///< Get the type \b meta-type
  uint8 getId(void) const { return id; }			///< Get the type id
@@ -107,6 +113,8 @@ public:
  const string &getName(void) const { return name; }		///< Get the type name
  virtual void printRaw(ostream &s) const;			///< Print a description of the type to stream
  virtual Datatype *getSubType(uintb off,uintb *newoff) const; ///< Recover component data-type one-level down
+  virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
+  virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
  virtual int4 numDepend(void) const { return 0; }	///< Return number of component sub-types
  virtual Datatype *getDepend(int4 index) const { return (Datatype *)0; }	///< Return the i-th component sub-type
  virtual void printNameBase(ostream &s) const { if (!name.empty()) s<<name[0]; } ///< Print name as short prefix
@@ -304,6 +312,7 @@ protected:
  vector<TypeField> field;			///< The list of fields
  void setFields(const vector<TypeField> &fd);	///< Establish fields for \b this
  int4 getFieldIter(int4 off) const;		///< Get index into field list
+  int4 getLowerBoundField(int4 off) const;	///< Get index of last field before or equal to given offset
  virtual void restoreXml(const Element *el,TypeFactory &typegrp);
 public:
  TypeStruct(const TypeStruct &op);	///< Construct from another TypeStruct
@@ -312,6 +321,8 @@ public:
  vector<TypeField>::const_iterator endField(void) const { return field.end(); }	///< End of fields
  const TypeField *getField(int4 off,int4 sz,int4 *newoff) const;	///< Get field based on offset
  virtual Datatype *getSubType(uintb off,uintb *newoff) const;
+  virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
+  virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
  virtual int4 numDepend(void) const { return field.size(); }
  virtual Datatype *getDepend(int4 index) const { return field[index].type; }
  virtual int4 compare(const Datatype &op,int4 level) const; // For tree structure
@@ -370,6 +381,8 @@ public:
  Scope *getMap(void) const;	///< Get the symbol table indexed by \b this
  Address getAddress(uintb off,int4 sz,const Address &point) const;	///< Construct an Address given an offset
  virtual Datatype *getSubType(uintb off,uintb *newoff) const;
+  virtual Datatype *nearestArrayedComponentForward(uintb off,uintb *newoff,int4 *elSize) const;
+  virtual Datatype *nearestArrayedComponentBackward(uintb off,uintb *newoff,int4 *elSize) const;
  virtual int4 compare(const Datatype &op,int4 level) const;
  virtual int4 compareDependency(const Datatype &op) const; // For tree structure
  virtual Datatype *clone(void) const { return new TypeSpacebase(*this); }
@@ -412,7 +425,7 @@ public:
  Architecture *getArch(void) const { return glb; }	///< Get the Architecture object
  Datatype *findByName(const string &n);		///< Return type of given name
  Datatype *setName(Datatype *ct,const string &n); 	///< Set the given types name
-  bool setFields(vector<TypeField> &fd,TypeStruct *ot,int4 fixedsize);	///< Set fields on a TypeStruct
+  bool setFields(vector<TypeField> &fd,TypeStruct *ot,int4 fixedsize,uint4 flags);	///< Set fields on a TypeStruct
  bool setEnumValues(const vector<string> &namelist,
 		      const vector<uintb> &vallist,
 		      const vector<bool> &assignlist,
@@ -424,8 +437,8 @@ public:
  Datatype *getBase(int4 s,type_metatype m);			///< Get atomic type
  Datatype *getBase(int4 s,type_metatype m,const string &n);	///< Get named atomic type
  TypeCode *getTypeCode(void);					///< Get an "anonymous" function data-type
-  TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws);	///< Construct a pointer data-type
-  TypePointer *getTypePointerAbsolute(int4 s,Datatype *pt,uint4 ws);	///< Construct an absolute pointer data-type
+  TypePointer *getTypePointerStripArray(int4 s,Datatype *pt,uint4 ws);	///< Construct a pointer data-type, stripping an ARRAY level
+  TypePointer *getTypePointer(int4 s,Datatype *pt,uint4 ws);	///< Construct an absolute pointer data-type
  TypePointer *getTypePointerNoDepth(int4 s,Datatype *pt,uint4 ws);	///< Construct a depth limited pointer data-type
  TypeArray *getTypeArray(int4 as,Datatype *ao);		///< Construct an array data-type
  TypeStruct *getTypeStruct(const string &n);			///< Create an (empty) structure
@@ -537,7 +537,7 @@ void TypeOpBranchind::printRaw(ostream &s,const PcodeOp *op)
 TypeOpCall::TypeOpCall(TypeFactory *t) : TypeOp(t,CPUI_CALL,"call")

 {
-  opflags = (PcodeOp::special|PcodeOp::call|PcodeOp::coderef|PcodeOp::nocollapse);
+  opflags = (PcodeOp::special|PcodeOp::call|PcodeOp::has_callspec|PcodeOp::coderef|PcodeOp::nocollapse);
  behave = new OpBehavior(CPUI_CALL,false,true); // Dummy behavior
 }

@@ -610,7 +610,7 @@ Datatype *TypeOpCall::getOutputLocal(const PcodeOp *op) const
 TypeOpCallind::TypeOpCallind(TypeFactory *t) : TypeOp(t,CPUI_CALLIND,"callind")

 {
-  opflags = PcodeOp::special|PcodeOp::call|PcodeOp::nocollapse;
+  opflags = PcodeOp::special|PcodeOp::call|PcodeOp::has_callspec|PcodeOp::nocollapse;
  behave = new OpBehavior(CPUI_CALLIND,false,true); // Dummy behavior
 }

--- a/Show More
+++ b/Show More