diff --git a/Ghidra/Features/Decompiler/buildNatives.gradle b/Ghidra/Features/Decompiler/buildNatives.gradle index e0dfb2fa3f..d2332bafa4 100644 --- a/Ghidra/Features/Decompiler/buildNatives.gradle +++ b/Ghidra/Features/Decompiler/buildNatives.gradle @@ -105,6 +105,7 @@ model { include "merge.cc" include "double.cc" include "constseq.cc" + include "bitfield.cc" include "coreaction.cc" include "condexe.cc" include "override.cc" diff --git a/Ghidra/Features/Decompiler/certification.manifest b/Ghidra/Features/Decompiler/certification.manifest index 3ff8179a61..7df6e9e364 100644 --- a/Ghidra/Features/Decompiler/certification.manifest +++ b/Ghidra/Features/Decompiler/certification.manifest @@ -12,6 +12,8 @@ src/decompile/.cproject||GHIDRA||||END| src/decompile/cpp/.gitignore||GHIDRA||||END| src/decompile/cpp/Doxyfile||GHIDRA|||Most of this file is autogenerated by doxygen which falls under the GPL - output from GPL products are NOT GPL! - mjbell4|END| src/decompile/cpp/Makefile||GHIDRA||||END| +src/decompile/datatests/bitfields.xml||GHIDRA||||END| +src/decompile/datatests/bitfields2.xml||GHIDRA||||END| src/decompile/datatests/boolless.xml||GHIDRA||||END| src/decompile/datatests/ccmp.xml||GHIDRA||||END| src/decompile/datatests/concat.xml||GHIDRA||||END| diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile b/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile index ca0719fa5f..61ebba3c72 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/Makefile @@ -83,7 +83,7 @@ DECCORE=capability architecture options graph cover block cast typeop database c variable varmap jumptable emulate emulateutil flow userop expression multiprecision \ funcdata funcdata_block funcdata_op funcdata_varnode unionresolve pcodeinject \ heritage prefersplit rangeutil ruleaction subflow blockaction merge double \ - transform constseq coreaction condexe override dynamic crc32 prettyprint \ + transform constseq bitfield coreaction condexe override dynamic crc32 prettyprint \ printlanguage printc printjava memstate opbehavior paramid signature $(COREEXT_NAMES) # Files used for any project that use the sleigh decoder SLEIGH= sleigh pcodeparse pcodecompile sleighbase slghsymbol \ diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc index 9e8039d486..3a3d33a2a0 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.cc @@ -627,6 +627,246 @@ void RangeList::decode(Decoder &decoder) decoder.closeElement(elemId); } +BitRange::BitRange(const BitRange &op2,int4 off,int4 sz) + +{ + byteOffset = off; + byteSize = sz; + numBits = op2.numBits; + isBigEndian = op2.isBigEndian; + leastSigBit = translateLSB(op2); +} + +/// Both the byte container and the bit range are compared and must be equal to return 0. +/// \param op2 is the other bit range to compare with +/// \return -1, 0, or 1 to establish ordering the two ranges +int4 BitRange::compare(const BitRange &op2) const + +{ + if (byteOffset != op2.byteOffset) + return (byteOffset < op2.byteOffset) ? -1:1; + if (byteSize != op2.byteSize) + return (byteSize < op2.byteSize) ? -1 : 1; + if (leastSigBit != op2.leastSigBit) + return (leastSigBit < op2.leastSigBit) ? -1:1; + if (numBits != op2.numBits) + return (numBits < op2.numBits) ? -1:1; + return 0; +} + +/// The returned result is directly comparable with \b leastSigBit for determining order/overlap. +/// \param op2 is the other BitRange to translate into \b this frame +/// \return the translated value of op2.leastSigBit +int4 BitRange::translateLSB(const BitRange &op2) const + +{ + int4 op2Sig = op2.leastSigBit; + if (isBigEndian) { + int4 thisPos = byteOffset + byteSize; + int4 op2Pos = op2.byteOffset + op2.byteSize; + op2Sig += 8 * (thisPos - op2Pos); + } + else { + op2Sig += 8 * (op2.byteOffset - byteOffset); + } + return op2Sig; +} + +/// Return: +/// - -1 if \b this should come before (no intersection) +/// - 0 if \b this and op2 are the same bitrange +/// - 1 if \b this should come after (no intersection) +/// - 2 if \b this is contained in op2 +/// - 3 if op2 is contained in \b this +/// - 4 if partial overlap +/// +/// \param op2 is the other range to compare +/// \return the intersection code +int4 BitRange::overlapTest(const BitRange &op2) const + +{ + int4 op2Sig = translateLSB(op2); + int4 thisMost = leastSigBit + numBits; + int4 op2Most = op2Sig + op2.numBits; + if (isBigEndian) { + if (leastSigBit >= op2Most) return -1; + if (op2Sig >= thisMost) return 1; + } + else { + if (thisMost <= op2Sig) return -1; + if (op2Most <= leastSigBit) return 1; + } + // Reaching here we have some kind of intersection + if (leastSigBit == op2Sig && thisMost == op2Most) return 0; + if (op2Sig <= leastSigBit && op2Most >= thisMost) return 2; /// this contained in op2 + if (leastSigBit <= op2Sig && thisMost >= op2Most) return 3; /// op2 contained in this + return 4; +} + +/// The byte container for \b this does not change only \b leastSigBit and \b numBits. +/// If the intersection is empty, \b numBits is set to 0. +/// \param op2 is the bit range to intersect with \b this. +void BitRange::intersection(const BitRange &op2) + +{ + int4 op2Sig = translateLSB(op2); + int4 op2Most = op2Sig + op2.numBits; + int4 thisMost = leastSigBit + numBits; + if (op2Sig > leastSigBit) { + numBits -= (op2Sig - leastSigBit); + leastSigBit = op2Sig; + } + if (op2Most < thisMost) { + numBits -= (thisMost - op2Most); + } + if (numBits < 0) { + leastSigBit = 0; + numBits = 0; + } +} + +/// The range of bits is intersected with the 1-bits of the mask. The resulting +/// range is the minimal cover of the bits in the intersection. +/// \param mask is the mask to intersect with +void BitRange::intersectMask(uintb mask) + +{ + mask &= getMask(); + if (mask == 0) { + leastSigBit = 0; + numBits = 0; + return; + } + int4 newLeastSig = leastsigbit_set(mask); + int4 newMostSig = mostsigbit_set(mask) + 1; + int4 thisMost = leastSigBit + numBits; + if (newLeastSig > leastSigBit) { + numBits -= (newLeastSig - leastSigBit); + leastSigBit = newLeastSig; + } + if (newMostSig < thisMost) { + numBits -= (thisMost - newMostSig); + } +} + +/// The bit range is shifted to the left by the given amount. +/// \param leftShiftAmount is the amount to shift the range by +void BitRange::shift(int4 leftShiftAmount) + +{ + leastSigBit += leftShiftAmount; + int4 most = leastSigBit + numBits; + if (leastSigBit < 0) { + numBits += leastSigBit; + leastSigBit = 0; + } + else if (most > byteSize * 8) { + numBits -= (most - byteSize * 8); + } + if (numBits < 0) { + leastSigBit = 0; + numBits = 0; + } +} + +/// The number of bits may be affected. +/// \param num is the number of bytes to truncate +void BitRange::truncateMostSigBytes(int4 num) + +{ + if (isBigEndian) { + byteOffset += num; + } + byteSize -= num; + int4 maxOffset = leastSigBit + numBits; + if (maxOffset > byteSize * 8) + numBits -= (maxOffset - byteSize * 8); + if (numBits < 0) + numBits = 0; +} + +/// \param num is the number of bytes to truncate +void BitRange::truncateLeastSigBytes(int4 num) + +{ + if (!isBigEndian) + byteOffset += num; + byteSize -= num; + leastSigBit -= num * 8; + if (leastSigBit < 0) { + numBits = numBits + leastSigBit; + leastSigBit = 0; + if (numBits < 0) + numBits = 0; + } +} + +/// Only the container is affected, the bit range itself does not change. +/// \param num is the number of bytes to add +void BitRange::extendBytes(int4 num) + +{ + if (isBigEndian) + byteOffset -= num; + byteSize += num; +} + +/// The bit-mask is aligned with the byte container. +/// \return the bit-mask describing \b this range +uintb BitRange::getMask(void) const + +{ + uintb res; + if (numBits >= sizeof(uintb)*8) + res = 0; + else { + res = 1; + res <<= numBits; + } + res -= 1; + res <<= leastSigBit; + return res; +} + +/// \return \b true if the beginning and end of the range fall on byte boundaries +bool BitRange::isByteRange(void) const + +{ + if ((numBits & 7) != 0) return false; + if ((leastSigBit & 7) != 0) return false; + return true; +} + +/// \return \b true if the most significant bit of the field and the container are the same +bool BitRange::isMostSignificant(void) const + +{ + return 8*byteSize == leastSigBit + numBits; +} + +void BitRange::minimizeContainer(void) + +{ + int4 trunc = leastSigBit / 8; + if (isBigEndian) + byteSize -= trunc; + else + byteOffset += trunc; + leastSigBit &= 7; + int4 num = byteSize - ((leastSigBit + numBits + 7) / 8); + if (num > 0) { + if (isBigEndian) + byteOffset += num; + byteSize -= num; + } +} + +void BitRange::expandToMost(void) + +{ + numBits = 8*byteSize - leastSigBit; // Increase number of bits to maximum that still fits +} + #ifdef UINTB4 uintb uintbmasks[9] = { 0, 0xff, 0xffff, 0xffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; #else @@ -675,6 +915,22 @@ uintb sign_extend(uintb in,int4 sizein,int4 sizeout) return res; } +/// \param val is the value to extend +/// \param numbits is the number of bits in the value +/// \param size is the integer size in bytes +/// \return the extended value +uintb extend_signbit(uintb val,int4 numbits,int4 size) + +{ + if (numbits < size * 8) { + int4 sa = 8*sizeof(intb) - numbits; + intb sval = val; + val = (sval << sa) >> sa; + val &= calc_mask(size); + } + return val; +} + /// Swap the least significant \b size bytes in \b val /// \param val is a reference to the value to swap /// \param size is the number of bytes to swap diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh index dba288d5eb..510432e5a2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/address.hh @@ -253,6 +253,37 @@ public: void decode(Decoder &decoder); ///< Decode \b this RangeList from a \ element }; +/// \brief An endian aware range of bits contained in a contiguous set of bytes +class BitRange { +public: + int4 byteOffset; ///< Byte offset of the region containing the range + int4 byteSize; ///< Size of the region in bytes + int4 leastSigBit; ///< Least significant bit of the bit-range within its region + int4 numBits; ///< Number of bits in the range + bool isBigEndian; ///< Is the underlying encoding big endian + BitRange(void) { byteOffset = -1; byteSize = -1; leastSigBit = -1; numBits = -1; isBigEndian = false; } ///< Construct \e undefined range + BitRange(int4 bOff,int4 bSize,bool bigEndian) { + byteOffset = bOff; byteSize = bSize; leastSigBit = 0; numBits = bSize * 8; isBigEndian = bigEndian; } ///< Construct byte range + BitRange(const BitRange &op2,int4 off,int4 sz); ///< Constructor, copy range into new container + BitRange(int4 bOff,int4 bSize,int4 least,int4 num,bool bigEndian) { byteOffset = bOff; byteSize = bSize; leastSigBit = least; + numBits = num; isBigEndian = bigEndian; } ///< Constructor + bool empty(void) const { return (numBits <= 0); } ///< Return \b true if \b this is an empty bit range (zero bits) + int4 compare(const BitRange &op2) const; ///< Compare \b this with another as containers + int4 translateLSB(const BitRange &op2) const; ///< Translate the \b leastSigBit of the given range into \b this reference frame + int4 overlapTest(const BitRange &op2) const; ///< Characterize the type of overlap between \b this and another range + void intersection(const BitRange &op2); ///< Replace \b this with the intersection of \b this with another BitRange + void intersectMask(uintb mask); ///< Restrict \b this with a mask that lines up with the container + void shift(int4 leftShiftAmount); ///< Replace \b this with the shifted range + void truncateMostSigBytes(int4 num); ///< Truncate the most significant bytes in the byte container + void truncateLeastSigBytes(int4 num); ///< Truncate the least significant bytes in the byte container + void extendBytes(int4 num); ///< Add most significant bytes to the container + uintb getMask(void) const; ///< Get mask representing \b this range + bool isByteRange(void) const; ///< Return \b true if \b this bit range is also a byte range + bool isMostSignificant(void) const; ///< Return \b true if the bit range occupies the most significant bits of the container + void minimizeContainer(void); ///< Shrink the container to fit the bit range + void expandToMost(void); ///< Expand the bitrange until it includes the most significant bits of the container +}; + /// Precalculated masks indexed by size extern uintb uintbmasks[]; @@ -577,6 +608,7 @@ extern bool signbit_negative(uintb val,int4 size); ///< Return true if the sign- extern uintb calc_mask(int4 size); ///< Calculate a mask for a given byte size extern uintb uintb_negate(uintb in,int4 size); ///< Negate the \e sized value extern uintb sign_extend(uintb in,int4 sizein,int4 sizeout); ///< Sign-extend a value between two byte sizes +extern uintb extend_signbit(uintb val,int4 numbits,int4 size); ///< Extend a signed value of given number of bits to a full integer extern void byte_swap(intb &val,int4 size); ///< Swap bytes in the given value diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/bitfield.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/bitfield.cc new file mode 100644 index 0000000000..add61c741b --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/bitfield.cc @@ -0,0 +1,2392 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "bitfield.hh" +#include "funcdata.hh" + +namespace ghidra { + +BitFieldNodeState::BitFieldNodeState(const BitRange &used,Varnode *vn,const TypeBitField *fld) + : bitsUsed(used), bitsField(fld->bits,used.byteOffset,used.byteSize) +{ + node = vn; + field = fld; + origLeastSigBit = bitsField.leastSigBit; + isSignExtended = (field->type->getMetatype() == TYPE_INT) && bitsField.isMostSignificant(); +} + +BitFieldNodeState::BitFieldNodeState(const BitRange &used,Varnode *vn,int4 leastSig,int4 numBits) + : bitsUsed(used), bitsField(used.byteOffset,used.byteSize,leastSig,numBits,used.isBigEndian) +{ + node = vn; + field = (const TypeBitField *)0; + origLeastSigBit = bitsField.leastSigBit; + isSignExtended = false; +} + +/// Copy another state, but replace \b bitsField +/// \param copy is the state to copy +/// \param newField is the new range for \b bitsField +/// \param vn is the Varnode holding the new range +/// \param sgnExt is the new state of sign extension +BitFieldNodeState::BitFieldNodeState(const BitFieldNodeState ©,const BitRange &newField,Varnode *vn,bool sgnExt) + : bitsUsed(copy.bitsUsed), bitsField(newField) +{ + node = vn; + field = copy.field; + origLeastSigBit = copy.origLeastSigBit; + isSignExtended = sgnExt; +} + +/// A BitFieldNodeState is constructed for each bitfield that the Varnode overlaps. +/// Holes between bitfields can also have a BitFieldNodeState. +/// \param vn is the given Varnode +/// \param followHoles is \b true if a record for each hole should be created +void BitFieldTransform::establishFields(Varnode *vn,bool followHoles) + +{ + int4 vnBitSize = vn->getSize() * 8; + BitRange bitrange(initialOffset,vn->getSize(),0,vnBitSize,isBigEndian); + vector overlap; + parentStruct->collectBitFields(0, overlap, initialOffset, vn->getSize()); + sort(overlap.begin(),overlap.end(),BitFieldTriple::compare); + int4 pos = 0; + for(int4 i=0;ibits); + int4 fieldEnd = fieldPos + triple.bitfield->bits.numBits; + if (fieldPos > vnBitSize) + fieldPos = vnBitSize; + if (fieldEnd > vnBitSize) + fieldEnd = vnBitSize; + if (fieldPos > pos) { // We have a hole + if (followHoles) + workList.emplace_back(bitrange,vn,pos,(fieldPos - pos)); + pos = fieldPos; + } + int4 code = bitrange.overlapTest(triple.bitfield->bits); + if (code == 0 || code == 3)// Note if field is properly contained in vn + workList.emplace_back(bitrange,vn,triple.bitfield); // Field is properly contained in vn + else { + if (followHoles) + workList.emplace_back(bitrange,vn,pos,(fieldEnd-pos)); + } + pos = fieldEnd; + } + if (pos < vnBitSize && followHoles) { + workList.emplace_back(bitrange,vn,pos,vnBitSize-pos); // Final hole + } +} + +/// \param f is the containing function +/// \param dt is the bitfield data-type +/// \param off is any initial byte offset into the data-type for the root Varnode +BitFieldTransform::BitFieldTransform(Funcdata *f,Datatype *dt,int4 off) + +{ + func = f; + parentStruct = (TypeStruct *)0; + containerSize = -1; + initialOffset = -1; + if (dt->getMetatype() == TYPE_STRUCT) { + parentStruct = (TypeStruct *)dt; + initialOffset = off; + } + else if (dt->getMetatype() == TYPE_PARTIALSTRUCT) { + TypePartialStruct *part = (TypePartialStruct *)dt; + dt = part->getParent(); + if (dt->getMetatype() == TYPE_STRUCT) { + parentStruct = (TypeStruct *)dt; + initialOffset = off + part->getOffset(); + } + } + isBigEndian = f->getArch()->getDefaultDataSpace()->isBigEndian(); +} + +/// If the state is for a partial field whose storage location is overwritten +/// later in the same basic block, return \b true +/// \param state is the field +/// \return \b true if a partial field has been overwritten +bool BitFieldInsertTransform::isOverwrittenPartial(const BitFieldNodeState &state) + +{ + if (state.field != (const TypeBitField *)0) + return false; // Field is not partial + if (state.bitsField.byteSize > sizeof(uintb)) + return false; + if (finalWriteOp->code() != CPUI_STORE) { + // Reconstruct the original bit range + BitRange curRange(initialOffset,mappedVn->getSize(),state.origLeastSigBit,state.bitsField.numBits,isBigEndian); + return findOverwrite(mappedVn,finalWriteOp->getParent(),curRange); + } + return false; +} + +bool BitFieldInsertTransform::checkPulledOriginalValue(BitFieldNodeState &state) + +{ + if (!state.node->isWritten()) return false; + PcodeOp *op = state.node->getDef(); + OpCode opc = op->code(); + if (opc != CPUI_ZPULL && opc != CPUI_SPULL) return false; + int4 pos = (int4)op->getIn(1)->getOffset(); + int4 numbits = (int4)op->getIn(2)->getOffset(); + if (pos != state.bitsField.leastSigBit) return false; + if (numbits != state.bitsField.numBits) return false; + return checkOriginalBase(op->getIn(0)); +} + +/// If the Varnode is a the initial value of the storage being inserted into, return \b true. +/// This can be either the result of the initial LOAD or the mapped storage location being read directly. +/// \param vn is the given Varnode to check +/// \return \b true if it is the original value +bool BitFieldInsertTransform::checkOriginalBase(Varnode *vn) + +{ + if (finalWriteOp->code() == CPUI_STORE) { + if (!vn->isWritten()) return false; + PcodeOp *loadOp = vn->getDef(); + if (loadOp->code() != CPUI_LOAD) return false; + if (!pointerEquality(loadOp->getIn(1), finalWriteOp->getIn(1))) return false; + if (loadOp->getParent() != finalWriteOp->getParent()) return false; + } + else { + if (mappedVn == vn) return false; + if (mappedVn->getAddr() != vn->getAddr() || mappedVn->getSize() != vn->getSize()) + return false; + if (!vn->isAddrTied()) return false; + } + originalValue = vn; + return true; +} + +/// \param state is the given Varnode +/// \return \b true if the Varnode contains the \e original \e value for the bitfield(s) +bool BitFieldInsertTransform::isOriginalValue(BitFieldNodeState &state) + +{ + if (state.bitsField.leastSigBit != state.origLeastSigBit) return false; + if (state.node == originalValue) return true; + if (checkPulledOriginalValue(state)) + return true; + return checkOriginalBase(state.node); +} + +/// If the state is not following a specific field, \b false is returned. +/// \param state gives the constant Varnode and field +/// \return \b true if an InsertRecord was created +bool BitFieldInsertTransform::addConstantWrite(BitFieldNodeState &state) + +{ + uintb value = state.node->getOffset(); + state.node = (Varnode *)0; + if (state.field == (const TypeBitField *)0) { + return false; + } + if (state.bitsField.byteSize > sizeof(uintb)) return false; + uintb mask = state.bitsField.getMask(); + value = value & mask; + value >>= state.bitsField.leastSigBit; + if (state.field->type->getMetatype() == TYPE_INT) { + value = extend_signbit(value, state.bitsField.numBits, state.bitsField.byteSize); + } + insertList.emplace_back(value,state.field->type,state.origLeastSigBit,state.field->bits.numBits); + return true; +} + +/// If the state is not following a specific field, \b false is returned. +/// The state will no longer be followed. +/// \param state describes the field +/// \return \b true if an InsertRecord was created +bool BitFieldInsertTransform::addZeroOut(BitFieldNodeState &state) + +{ + state.node = (Varnode *)0; + if (state.field == (const TypeBitField *)0) { + return false; + } + insertList.emplace_back(0,state.field->type,state.origLeastSigBit,state.field->bits.numBits); + return true; +} + +/// \param state is the specific Varnode and field +void BitFieldInsertTransform::addFieldWrite(BitFieldNodeState &state) + +{ + Datatype *dt = state.field->type; + if (dt->getSize() != state.node->getSize()) + dt = (Datatype *)0; + insertList.emplace_back(state.node,dt,state.origLeastSigBit,state.field->bits.numBits,state.bitsField.leastSigBit); + state.node = (Varnode *)0; +} + +/// The second input must be a constant mask. +/// If the mask zeroes out the field, create a zero InsertRecord. +/// If the mask fully contains the field, follow the field through the first input. +/// Otherwise return \b false. +/// \param state is the field being followed +/// \param op is the INT_AND +/// \return \b true if the field is followed or zeroed out +bool BitFieldInsertTransform::handleAndBack(BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) return false; + if (state.bitsField.byteSize > sizeof(uintb)) return false; + uintb val = state.bitsField.getMask(); + uintb res = val & cvn->getOffset(); + if (res == val) { + state.node = op->getIn(0); + state.bitsUsed.intersectMask(cvn->getOffset()); // Update bitsUsed to indicate a bit range was masked + return true; // This field is contained in mask + } + if (res == 0) { // The field is zeroed out + return addZeroOut(state); + } + return false; // Partial zeroing +} + +/// Follow the field through the input that has not masked off its bitrange. +/// If neither input has mased off the bitrange, or if both have, return \b false; +/// \param state is the field being followed +/// \param op is the INT_OR +/// \return \b true if the field is followed through a single input +bool BitFieldInsertTransform::handleOrBack(BitFieldNodeState &state,PcodeOp *op) + +{ + if (state.bitsField.byteSize > sizeof(uintb)) return false; + uintb mask = state.bitsField.getMask(); + Varnode *vn0 = op->getIn(0); + Varnode *vn1 = op->getIn(1); + bool isMasked0 = (vn0->getNZMask() & mask) == 0; + bool isMasked1 = (vn1->getNZMask() & mask) == 0; + if (isMasked0 == isMasked1) { + if (vn1->isConstant()) { + if ((vn1->getNZMask() & mask) == mask) { // Or-ing constant that sets all bits of field to 1 + state.node = vn1; // Follow the constant + return true; + } + } + return false; // Both inputs are unmasked (or both masked), can't follow field + } + state.node = isMasked0 ? vn1 : vn0; // Follow the unmasked Varnode + + return true; +} + +bool BitFieldInsertTransform::handleAddBack(BitFieldNodeState &state,PcodeOp *op) + +{ + if (state.bitsField.byteSize > sizeof(uintb)) return false; + Varnode *vn0 = op->getIn(0); + Varnode *vn1 = op->getIn(1); + uintb mask0 = vn0->getNZMask(); + uintb mask1 = vn1->getNZMask(); + if ((mask0 & mask1) != 0) + return false; // Inputs are mixed, can't follow + uintb mask = state.bitsField.getMask(); + bool isMasked0 = (mask0 & mask) == 0; + bool isMasked1 = (mask1 & mask) == 0; + if (isMasked0 == isMasked1) // If both unmasked (or both masked), can't follow field + return false; + state.node = isMasked0 ? vn1 : vn0; + return true; +} + +/// Update the state to reflect the shift. If the field has been completely filled with +/// zeroes by the shift, create a zero InsertRecord. If the field is only partially filled, +/// return \b false. +/// \param state is the field being followed +/// \param op is the INT_LEFT +/// \return \b true if the field is followed or been zeroed out +bool BitFieldInsertTransform::handleLeftBack(BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) return false; + int4 sa = cvn->getOffset(); + if (sa < 0 || sa >= sizeof(uintb)*8) return false; + BitRange newRange(state.bitsField); + newRange.shift(-sa); + if (state.bitsField.numBits == newRange.numBits) { // All the bits are still present + state.bitsField = newRange; + state.bitsUsed.shift(-sa); + state.node = op->getIn(0); + return true; + } + else if (newRange.numBits == 0) { // Zero bits shifted into field + return addZeroOut(state); + } + + return false; +} + +/// Update the state to reflect the shift. +/// \param state is the field being followed +/// \param op is the INT_RIGHT +/// \return \b true if the field is followed +bool BitFieldInsertTransform::handleRightBack(BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) return false; + int4 sa = cvn->getOffset(); + if (sa < 0 || sa >= sizeof(uintb)*8) return false; + BitRange newRange(state.bitsField); + newRange.shift(sa); + if (state.bitsField.numBits == newRange.numBits) { // All the bits are still present + state.bitsField = newRange; + state.bitsUsed.shift(sa); + state.node = op->getIn(0); + return true; + } + return false; +} + +/// Follow the field to the input, and update the state to reflect the smaller byte container. +/// If the extension puts zero bits in field, return \b false. +/// \param state is the field being followed +/// \param op is the INT_ZEXT +/// \return \b true if field is followed +bool BitFieldInsertTransform::handleZextBack(BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *vn = op->getIn(0); + int4 truncAmount = op->getOut()->getSize() - vn->getSize(); + BitRange newRange(state.bitsField); + newRange.truncateMostSigBytes(truncAmount); + if (state.bitsField.numBits == newRange.numBits) { + state.bitsField = newRange; + state.bitsUsed.truncateMostSigBytes(truncAmount); + state.node = vn; + } + else if (state.bitsField.numBits == 0) + return addZeroOut(state); // Extended zeroes fill out the bitfield + else + return false; + return true; +} + +/// Treat INT_MULT by a power of 2 like INT_LEFT. +/// \param state is the field being followed +/// \param op is the INT_MULT +/// \return \b true if field is followed or zeroed out +bool BitFieldInsertTransform::handleMultBack(BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *vn1 = op->getIn(1); + if (!vn1->isConstant()) return false; + uintb val = vn1->getOffset(); + if (popcount(val) != 1) return false; + int4 sa = leastsigbit_set(val); + BitRange newRange(state.bitsField); + newRange.shift(-sa); + if (state.bitsField.numBits == newRange.numBits) { // All the bits are still present + state.bitsField = newRange; + state.bitsUsed.shift(-sa); + state.node = op->getIn(0); + return true; + } + else if (state.bitsField.numBits == 0) { // Zero bits shifted into field + return addZeroOut(state); + } + return false; +} + +/// Follow the field into the input of the SUBPIECE, which may have shifted it +/// \param state is the field being followed +/// \param op is the SUBPIECE +/// \return \b true if field is followed +bool BitFieldInsertTransform::handleSubpieceBack(BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *inVn = op->getIn(0); + int4 extendAmount = inVn->getSize() - state.node->getSize(); + int4 sa = (int4)op->getIn(1)->getOffset() * 8; + BitRange newRange(state.bitsField); + newRange.extendBytes(extendAmount); + newRange.shift(-sa); + if (state.bitsField.numBits == newRange.numBits) { // All the bits are still present + state.bitsField = newRange; + state.bitsUsed.extendBytes(extendAmount); + state.bitsUsed.shift(-sa); + state.node = op->getIn(0); + return true; + } + return false; +} + +/// If the call produces the bitfield structure directly, we can treat the return value +/// as the original value, even though the storage is not address tied. +/// \param state is the field being followed +/// \param op is the call +/// \return \b true if the return value can be treated as the \e original \e value +bool BitFieldInsertTransform::testCallOriginal(BitFieldNodeState &state,PcodeOp *op) + +{ + if (!op->isCall()) return false; + if (finalWriteOp->code() == CPUI_STORE) return false; // If value is being STOREd, original value is not a call + if (state.bitsField.leastSigBit != state.origLeastSigBit) return false; + if (mappedVn->isAddrTied()) return false; // If value is address tied, cannot have different storage + if (originalValue != (Varnode *)0) return false; // Already found an original value + Datatype *dt = op->getOut()->getTypeDefFacing(); + int4 off; + if (dt->getMetatype() == TYPE_STRUCT) { + off = 0; + } + else if (dt->getMetatype() == TYPE_PARTIALSTRUCT) { + TypePartialStruct *part = (TypePartialStruct *)dt; + off = part->getOffset(); + dt = part->getParent(); + } + else + return false; + if (dt != parentStruct) return false; // Check if data-type matches + if (off != initialOffset) return false; + originalValue = op->getOut(); + return true; +} + +/// \param state is the field to follow backward +/// \return \b true if there was no conflicting information +bool BitFieldInsertTransform::processBackward(BitFieldNodeState &state) + +{ + while(state.node != (Varnode *)0) { + if (state.node->isConstant()) { + return addConstantWrite(state); + } + if (isOriginalValue(state)) { + state.node = (Varnode *)0; + return true; + } + if (state.field != (const TypeBitField *)0) { + if (state.isFieldAligned()) { + addFieldWrite(state); + return true; + } + } + if (!state.node->isWritten()) return false; + PcodeOp *op = state.node->getDef(); + bool liftRes; + switch(op->code()) { + case CPUI_COPY: + state.node = op->getIn(0); + liftRes = true; + break; + case CPUI_INT_ADD: + liftRes = handleAddBack(state, op); + break; + case CPUI_INT_AND: + liftRes = handleAndBack(state, op); + break; + case CPUI_INT_LEFT: + liftRes = handleLeftBack(state, op); + break; + case CPUI_INT_ZEXT: + liftRes = handleZextBack(state, op); + break; + case CPUI_INT_OR: + liftRes = handleOrBack(state, op); + break; + case CPUI_INT_MULT: + liftRes = handleMultBack(state, op); + break; + case CPUI_SUBPIECE: + liftRes = handleSubpieceBack(state, op); + break; + case CPUI_INT_SRIGHT: + liftRes = handleRightBack(state, op); + break; + case CPUI_CALL: + case CPUI_CALLIND: + case CPUI_CALLOTHER: + liftRes = testCallOriginal(state, op); + if (liftRes) { + state.node = (Varnode *)0; // We can treat this as if it matched the original value + return true; + } + break; + default: + liftRes = false; + break; + } + if (!liftRes) { + if (state.field == (const TypeBitField *)0) + return false; + if (state.bitsField.byteSize > sizeof(uintb)) + return false; + BitRange nonZeroBits(state.bitsField); + nonZeroBits.intersectMask(state.node->getNZMask()); // Apply what we know about zero bits + if (nonZeroBits.numBits == 0) + return addZeroOut(state); // All bits in the field are zero + state.bitsUsed.intersectMask(state.node->getNZMask()); + if (nonZeroBits.numBits == state.bitsUsed.numBits) { // Only used non-zero bits are in the field + addFieldWrite(state); + return true; + } + return false; + } + } + return true; +} + +/// \return the bitfield data-type +Datatype *BitFieldTransform::buildPartialType(void) + +{ + if (containerSize == parentStruct->getSize()) + return parentStruct; + return func->getArch()->types->getTypePartialStruct(parentStruct, initialOffset, containerSize); +} + +/// \brief Return \b true if specified bits in a Varnode are overwritten in the same basic block +/// +/// This assumes other unspecified bits within the given Varnode are preserved at the point of overwrite. +/// \param vn is the given Varnode +/// \param bl is the given block +/// \param range specifies the bits within the Varnode +/// \return \b true if the bits are used +bool BitFieldTransform::findOverwrite(Varnode *vn,BlockBasic *bl,const BitRange &range) + +{ + Varnode *cvn; + BitRange minRange = range; + minRange.minimizeContainer(); + Address addr = vn->getAddr() + (minRange.byteOffset - range.byteOffset); + list::const_iterator iter; + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + Varnode *curVn = vn; + PcodeOp *op = *iter; + BitRange curRange = range; + do { + if (op->getParent() != bl) { + if (curRange.numBits != 0) + return false; // Bits are used outside the block + break; + } + switch(op->code()) { + case CPUI_PIECE: + if (op->getIn(0) == curVn) { + int4 sz = op->getIn(1)->getSize(); + curRange.extendBytes(sz); + curRange.shift(sz * 8); + } + else { + curRange.extendBytes(op->getIn(0)->getSize()); + } + break; + case CPUI_INT_LEFT: + cvn = op->getIn(1); + if (cvn->isConstant()) + curRange.shift((int4)cvn->getOffset()); + else + return false; + break; + case CPUI_INT_RIGHT: + cvn = op->getIn(1); + if (cvn->isConstant()) + curRange.shift(-(int4)cvn->getOffset()); + else + return false; + break; + case CPUI_COPY: + case CPUI_INT_OR: + case CPUI_INT_XOR: + case CPUI_INT_NEGATE: + break; // Remaining range continues to be used + case CPUI_INT_AND: + cvn = op->getIn(1); + if (cvn->isConstant()) + curRange.intersectMask(cvn->getOffset()); + break; + case CPUI_INSERT: + curRange.intersectMask(~InsertExpression::getRangeMask(op)); + break; + case CPUI_INDIRECT: + curVn = op->getOut(); + if (addr.containedBy(minRange.byteSize, curVn->getAddr(), curVn->getSize())) + return (curRange.numBits == 0); + return false; + break; + default: + if (curRange.numBits != 0) + return false; // Bits are actively used, not overwritten + op = (PcodeOp *)0; // No overlap yet, but don't follow this path further + break; + } + if (op == (PcodeOp *)0) break; + curVn = op->getOut(); + if (addr.containedBy(minRange.byteSize, curVn->getAddr(), curVn->getSize())) { + if (curRange.numBits == 0) + return true; + } + if (curVn->hasNoDescend()) break; + op = curVn->loneDescend(); + } while(op != (PcodeOp *)0); + } + return false; +} + +/// If the given op is null, a new INSERT is created, otherwise, op is redefined to be an INSERT. +/// All the INSERT inputs are set based on the InsertRecord. The output is not set or modified. +/// \param op is a preexisting p-code op to reconfigure, or null +/// \param rec is the record describing the INSERT +/// \return the configured INSERT op +PcodeOp *BitFieldInsertTransform::setInsertInputs(PcodeOp *op,const InsertRecord &rec) + +{ + if (op == (PcodeOp *)0) { + op = func->newOp(4,finalWriteOp->getAddr()); + } + else { + while(op->numInput() < 4) + func->opInsertInput(op, (Varnode *)0, op->numInput()); + } + func->opSetOpcode(op, CPUI_INSERT); + func->opSetInput(op,originalValue,0); + Varnode *valVn = rec.vn; + if (valVn == (Varnode *)0) { + if (rec.dt != (Datatype *)0) { + valVn = func->newConstant(rec.dt->getSize(), rec.constVal); + valVn->updateType(rec.dt); + } + else { + valVn = func->newConstant(containerSize, rec.constVal); + } + } + func->opSetInput(op,valVn,1); + func->opSetInput(op,func->newConstant(4,rec.pos),2); + func->opSetInput(op,func->newConstant(4,rec.numBits),3); + func->opMarkSpecialPrint(op); // Not printed as normal operator with output + return op; +} + +/// If necessary, a INT_RIGHT is performed on the (insertion value) input to INSERT. +/// \param insertOp is the INSERT +/// \param rec is the given InsertRecord +void BitFieldInsertTransform::addFieldShift(PcodeOp *insertOp,const InsertRecord &rec) + +{ + if (rec.shiftAmount == 0) return; + Varnode *valVn = insertOp->getIn(1); + PcodeOp *shiftOp = func->newOp(2, insertOp->getAddr()); + func->opSetOpcode(shiftOp, CPUI_INT_RIGHT); + Varnode *newOut = func->newUniqueOut(valVn->getSize(),shiftOp); + func->opSetInput(insertOp, newOut, 1); + func->opSetInput(shiftOp, valVn, 0); + func->opSetInput(shiftOp, func->newConstant(4, rec.shiftAmount),1); + func->opInsertBefore(shiftOp, insertOp); +} + +/// Check that the output of the LOAD has only INSERT, ZPULL, SPULL, or the finalWriteOp as a descendant. +/// If so mark the LOAD as non-printing. +/// \param loadOp is the LOAD +/// \return \b true if the LOAD was marked as non-printing +bool BitFieldInsertTransform::foldLoad(PcodeOp *loadOp) const + +{ + Varnode *outvn = loadOp->getOut(); + list::const_iterator iter; + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *op = *iter; + if (op == finalWriteOp) continue; + OpCode opc = op->code(); + if (opc != CPUI_INSERT && opc != CPUI_ZPULL && opc != CPUI_SPULL) + return false; + } + func->opMarkNonPrinting(loadOp); + return true; +} + +/// Check that the pointer into the given LOAD is defined by a PTRSUB and that all descendants of the pointer +/// are LOADs or STOREs that have been absorbed. If so mark the PTRSUB as non-printing. +/// \param loadOp is the LOAD +void BitFieldInsertTransform::foldPtrsub(PcodeOp *loadOp) const + +{ + Varnode *vn = loadOp->getIn(1); + if (!vn->isWritten()) return; + PcodeOp *ptrsub = vn->getDef(); + if (ptrsub->code() != CPUI_PTRSUB) return; + list::const_iterator iter; + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + PcodeOp *op = *iter; + if (op->code() == CPUI_STORE && op->doesSpecialPrinting()) continue; + if (op->code() == CPUI_LOAD && op->notPrinted()) continue; + return; + } + func->opMarkNonPrinting(ptrsub); +} + +/// Look for (first) two INSERT descendants of \e value being inserted. +/// If these exist and are of the same form and in the same basic block, delete the second one. +/// \param rec is the record referencing the INSERTed \e value +void BitFieldInsertTransform::checkRedundancy(const InsertRecord &rec) + +{ + if (rec.vn == (Varnode *)0) return; + PcodeOp *immedOp = (PcodeOp *)0; + list::const_iterator iter = rec.vn->beginDescend(); + for(;iter != rec.vn->endDescend();++iter) { + PcodeOp *op = *iter; + if (op->code() != CPUI_INSERT) { + if (op->code() != CPUI_INT_RIGHT) continue; + op = op->getOut()->loneDescend(); + if (op == (PcodeOp *)0 || op->code() != CPUI_INSERT) continue; + } + if (immedOp == (PcodeOp *)0) { + immedOp = op; + continue; + } + if (op->getIn(2)->getOffset() != immedOp->getIn(2)->getOffset()) continue; + if (op->getIn(3)->getOffset() != immedOp->getIn(3)->getOffset()) continue; + if (finalWriteOp->code() == CPUI_STORE) { + PcodeOp *store1 = op->getOut()->loneDescend(); + if (store1 == (PcodeOp *)0) continue; + if (store1->code() != CPUI_STORE) continue; + PcodeOp *store2 = immedOp->getOut()->loneDescend(); + if (store2 == (PcodeOp *)0) continue; + if (store2->code() != CPUI_STORE) continue; + if (store1->getParent() != store2->getParent()) continue; + if (!pointerEquality(store1->getIn(1), store2->getIn(1))) continue; + vector scratch; + if (store1->getSeqNum().getOrder() < store2->getSeqNum().getOrder()) + func->opDestroyRecursive(store2, scratch); + else + func->opDestroyRecursive(store1, scratch); + } + else { + + } + return; + } +} + +/// \param f is the function +/// \param op is the p-code terminating the putative bitfield expression +/// \param dt is the structure containing bitfields +/// \param off is the amount of offset +BitFieldInsertTransform::BitFieldInsertTransform(Funcdata *f,PcodeOp *op,Datatype *dt,int4 off) + : BitFieldTransform(f,dt,off) +{ + if (initialOffset == -1) + return; + finalWriteOp = op; + Varnode *outvn; + if (finalWriteOp->code() == CPUI_STORE) { + outvn = finalWriteOp->getIn(2); + } + else if (finalWriteOp->code() == CPUI_INDIRECT) { + mappedVn = finalWriteOp->getOut(); // Keep the storage location of the INDIRECT output + outvn = op->getIn(0); + if (!outvn->isWritten()) return; + finalWriteOp = outvn->getDef(); // But use the op feeding the INDIRECT as the finalWriteOp + } + else { + outvn = finalWriteOp->getOut(); + mappedVn = outvn; + } + containerSize = outvn->getSize(); + originalValue = (Varnode *)0; + establishFields(outvn,true); +} + +/// Verify that any STORE between the original value LOAD and the final STORE +/// does not affect any of the known original value bits. +/// \param mask is the set of bits that must come from the putative \e original \e value +/// \return \b true if there is no interference +bool BitFieldInsertTransform::verifyLoadStoreOriginalValue(uintb mask) const + +{ + PcodeOp *loadOp = originalValue->getDef(); + list::const_iterator iter = finalWriteOp->getBasicIter(); + list::const_iterator biter = finalWriteOp->getParent()->beginOp(); + uintb off; + Varnode *basePtr = rootPointer(finalWriteOp->getIn(1), off); + while(iter != biter) { + --iter; + PcodeOp *op = *iter; + if (op == loadOp) return true; + if (op->isCall()) return false; + if (op->code() != CPUI_STORE) continue; + if (op->getIn(0)->getOffset() != loadOp->getIn(0)->getOffset()) + continue; // LOAD and STORE not to same address space + uintb otherOff; + if (basePtr != rootPointer(op->getIn(1),otherOff)) + return false; // Unrelated pointer (potential alias) + if (otherOff != off) + continue; + Varnode *vn = op->getIn(2); + if (!vn->isWritten()) return false; // Unknown value + PcodeOp *insertOp = vn->getDef(); + if (insertOp->code() != CPUI_INSERT) return false; // Unknown value + uintb insertMask = InsertExpression::getRangeMask(insertOp); + if ((insertMask & mask) != 0) return false; // Writing bits that are supposed to be original value + } + return true; +} + +/// Verify that any write to the mapped storage location between the original value LOAD and the STORE +/// does not affect any of the known original value bits +/// \param mask is the set of bits that must come from the putative \e original \e value +/// \return \b true if there is no interference +bool BitFieldInsertTransform::verifyMappedOriginalValue(uintb mask) const + +{ + list::const_iterator iter = finalWriteOp->getBasicIter(); + list::const_iterator biter = finalWriteOp->getParent()->beginOp(); + while(iter != biter) { + --iter; + PcodeOp *op = *iter; + Varnode *vn = op->getOut(); + if (vn == originalValue) return true; + if (vn == (Varnode *)0) continue; + if (op->isCall()) return false; // Mapped location in unknown state + if (vn->getAddr() != originalValue->getAddr()) continue; + if (vn->getSize() != originalValue->getSize()) continue; + if (!vn->isWritten()) return false; // Unknown value + PcodeOp *insertOp = vn->getDef(); + if (insertOp->code() != CPUI_INSERT) return false; // Unknown value + uintb insertMask = InsertExpression::getRangeMask(insertOp); + if ((insertMask & mask) != 0) return false; // Writing bits that are supposed to be original value + } + return true; +} + +/// Collect all bits which are \b not being INSERTed to by \b this transform. +/// These must be from the \e original \e value of the storage location. +/// \return a mask representing any bits coming from the original value +uintb BitFieldInsertTransform::constructOriginalValueMask(void) const + +{ + uintb mask = 0; + for(list::const_iterator iter=insertList.begin();iter!=insertList.end();++iter) { + const InsertRecord &rec(*iter); + uintb val = 0; + if (rec.numBits < 8*sizeof(uintb)) { + val = 1; + val <<= rec.numBits; + } + val -= 1; + val <<= rec.pos; + mask |= val; + } + mask = ~mask & calc_mask(originalValue->getSize()); + return mask; +} + +/// \return \b true if putative original value bits are unaffected +bool BitFieldInsertTransform::verifyOriginalValueBits(void) const + +{ + if (originalValue == (Varnode *)0) return true; // Not using original value bits + uintb mask = constructOriginalValueMask(); + if (mask == 0) return true; + if (finalWriteOp->code() == CPUI_STORE) + return verifyLoadStoreOriginalValue(mask); + return verifyMappedOriginalValue(mask); +} + +/// Follow all field in the \b workList back and try to match \e insert expressions. +/// \return \b true if all fields match +bool BitFieldInsertTransform::doTrace(void) + +{ + if (workList.empty()) + return false; // Nothing to follow + while(!workList.empty()) { + BitFieldNodeState &node( workList.front() ); + if (!processBackward(node) && !isOverwrittenPartial(node)) + return false; + workList.pop_front(); + } + if (insertList.empty()) return false; + return verifyOriginalValueBits(); +} + +void BitFieldInsertTransform::apply(void) + +{ + list::const_iterator iter; + Datatype *partialType = buildPartialType(); + if (finalWriteOp->code() == CPUI_STORE) { + Varnode *deadPoint = finalWriteOp->getIn(2); // Root of expression that may be dead + PcodeOp *currentStore = finalWriteOp; // Original STORE is modified for first INSERT + PcodeOp *loadModel = (PcodeOp *)0; + Datatype *loadType = (Datatype *)0; + if (originalValue == (Varnode *)0) { + originalValue = func->newConstant(containerSize, 0); + } + else { + loadModel = originalValue->getDef(); + loadType = originalValue->getTypeDefFacing(); + } + for(iter=insertList.begin();iter!=insertList.end();++iter) { + const InsertRecord &rec(*iter); + if (currentStore == (PcodeOp *)0) { + currentStore = func->newOp(3, finalWriteOp->getAddr()); // Create new STORE for each additional INSERT + func->opSetOpcode(currentStore, CPUI_STORE); + func->opSetInput(currentStore, finalWriteOp->getIn(0), 0); + func->opSetInput(currentStore, finalWriteOp->getIn(1), 1); + func->opInsertAfter(currentStore, finalWriteOp); + if (loadModel != (PcodeOp *)0) { + PcodeOp *loadOp = func->newOp(2, loadModel->getAddr()); // Create new LOAD for each additional INSERT + func->opSetOpcode(loadOp, CPUI_LOAD); + func->opSetInput(loadOp, loadModel->getIn(0),0); + func->opSetInput(loadOp, loadModel->getIn(1),1); + originalValue = func->newUniqueOut(containerSize, loadOp); + originalValue->updateType(loadType); + func->opInsertBefore(loadOp,currentStore); + func->opMarkNonPrinting(loadOp); // Don't print LOAD, prevent CAST ops + } + } + PcodeOp *insertOp = setInsertInputs((PcodeOp *)0,rec); + Varnode *newOut = func->newUniqueOut(containerSize, insertOp); + newOut->updateType(partialType); + func->opSetInput(currentStore,insertOp->getOut(),2); + func->opInsertBefore(insertOp,currentStore); + func->opMarkSpecialPrint(currentStore); // Mark special bitfield printing on STORE + addFieldShift(insertOp, rec); + currentStore = (PcodeOp *)0; + } + func->destroyVarnodeRecursive(deadPoint); + if (loadModel != (PcodeOp *)0 && loadModel->code() == CPUI_LOAD) { + if (foldLoad(loadModel)) { + foldPtrsub(loadModel); + } + } + } + else { // Mapped variable + vector deadPoints; + for(int4 i=0;inumInput();++i) + deadPoints.push_back(finalWriteOp->getIn(i)); // Roots of expressions that may be dead + if (originalValue == (Varnode *)0) { + originalValue = func->newConstant(containerSize, 0); + } + iter = insertList.begin(); + PcodeOp *insertOp = setInsertInputs(finalWriteOp, *iter); // Redefine finalWriteOp as INSERT, preserving original output + insertOp->getOut()->updateType(partialType); + addFieldShift(insertOp, *iter); + ++iter; + + for(;iter!=insertList.end();++iter) { + PcodeOp *lastOp = insertOp; + func->opUnsetInput(lastOp,0); // Unset originalValue as input, so it can go to new INSERT + insertOp = setInsertInputs((PcodeOp *)0, *iter); // New INSERT + Varnode *newOut = func->newVarnodeOut(containerSize, mappedVn->getAddr(), insertOp); + newOut->updateType(partialType); + func->opSetInput(lastOp,newOut,0); + func->opInsertBefore(insertOp, lastOp); + addFieldShift(insertOp, *iter); + } + for(int4 i=0;idestroyVarnodeRecursive(deadPoints[i]); + } + + for(iter=insertList.begin();iter!=insertList.end();++iter) + checkRedundancy(*iter); +} + +/// If a PcodeOp is given, the specific read of the state.readVn will be replaced with a new \e unique Varnode +/// holding the effective extraction. Otherwise the state.readVn will be redefined as an extraction for all reads. +/// \param state holds the extracted Varnode and the position of the bitfield at the point of extraction +/// \param op is the specific PcodeOp reading the extracted field or null +BitFieldPullTransform::PullRecord::PullRecord(const BitFieldNodeState &state,PcodeOp *op) + +{ + type = normal; + readVn = state.node; + readOp = op; + dt = state.field->type; + pos = state.origLeastSigBit; + numBits = state.field->bits.numBits; + leftShift = state.bitsField.leastSigBit; + mask = 0; +} + +/// \param state holds the extracted Varnode and the position of the bitfield at the point of extraction +/// \param op is the specific INT_EQUAL or INT_NOTEQUAL PcodeOp reading the extracted field +/// \param val is a mask representing the bitfield within the Varnode +BitFieldPullTransform::PullRecord::PullRecord(const BitFieldNodeState &state,PcodeOp *op,uintb val) + +{ + type = equal; + readVn = state.node; + readOp = op; + dt = state.field->type; + pos = state.origLeastSigBit; + numBits = state.field->bits.numBits; + leftShift = state.bitsField.leastSigBit; + mask = val; +} + +/// \param op is the PcodeOp whose input pull is being aborted +BitFieldPullTransform::PullRecord::PullRecord(PcodeOp *op) + +{ + type = aborted; + readVn = (Varnode *)0; + readOp = op; + dt = (Datatype *)0; + pos = 0; + numBits = 0; + leftShift = 0; + mask = 0; +} + +/// Sort based on the PcodeOp whose input is being pulled +/// \param op2 is the record to compare with \b this +/// \return \b true if \b this comes before \b op2 +bool BitFieldPullTransform::PullRecord::operator<(const PullRecord &op2) const + +{ + if (readOp != (PcodeOp *)0 && op2.readOp != (PcodeOp *)0) { + if (readOp != op2.readOp) + return (readOp->getSeqNum() < op2.readOp->getSeqNum()); + } + else if (readOp == (PcodeOp *)0) + return true; + else if (op2.readOp == (PcodeOp *)0) + return false; + return false; +} + +/// \param vn is the Varnode being read +/// \param bitField is the bitfield being followed +/// \return \b true if all consumed bits are in the bitfield +bool BitFieldPullTransform::testConsumed(Varnode *vn,const BitRange &bitField) + +{ + if (bitField.byteSize > sizeof(uintb)) return false; + uintb mask = bitField.getMask(); + uintb intersect = mask & vn->getConsume(); + return (intersect == vn->getConsume()); +} + +/// If the \e bitfield is moved into the output Varnode without losing bits, +/// add the output as a new \e bitfield state and update usage information for original \b root bits. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INT_LEFT reading the \e bitfield Varnode +void BitFieldPullTransform::handleLeftForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (op->getIn(0) != state.node) return; + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) return; + int4 sa = (int4)cvn->getOffset(); + BitRange newRange(state.bitsField); + newRange.shift(sa); + if (newRange.numBits == 0) + return; + if (state.bitsField.numBits == newRange.numBits) { + bool newSignExt = state.isSignExtended || newRange.isMostSignificant(); + workList.emplace_back(state,newRange,op->getOut(),newSignExt); + workList.back().bitsUsed.shift(sa); + } + else if (testConsumed(op->getOut(),newRange)) { + pullList.emplace_back(state,op); + } +} + +/// If the \e bitfield is moved into the output Varnode without losing bits, +/// add the output as a new \e bitfield state and update usage information for original \b root bits. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INT_RIGHT or INT_SRIGHT reading the \e bitfield Varnode +void BitFieldPullTransform::handleRightForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (op->getIn(0) != state.node) return; + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) return; + int4 sa = (int4)cvn->getOffset(); + BitRange newRange(state.bitsField); + newRange.shift(-sa); + if (newRange.numBits == 0) + return; + if (state.bitsField.numBits == newRange.numBits) { + bool newSignExt = (op->code() == CPUI_INT_SRIGHT) ? state.isSignExtended : false; + workList.emplace_back(state,newRange,op->getOut(),newSignExt); + workList.back().bitsUsed.shift(-sa); + if (op->code() == CPUI_INT_SRIGHT && !state.isSignExtended) { + workList.back().bitsUsed.expandToMost(); // Sign extending bits not in the field + } + } + else if (testConsumed(op->getOut(),newRange)) { + pullList.emplace_back(state,op); + } +} + +/// If the \e bitfield is masked into the output Varnode without losing bits, +/// add the output as a new \e bitfield state and update usage information for original \b root bits. +/// If every bit outside the \e bitfield is zeroed plus additional bits in the \e bitfield, +/// create a PullRecord for this particular read. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INT_AND reading the \e bitfield Varnode +void BitFieldPullTransform::handleAndForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (op->getIn(0) != state.node) return; + if (state.bitsField.byteSize > sizeof(uintb)) return; + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) return; + uintb andVal = cvn->getOffset(); + uintb mask = state.bitsField.getMask(); + uintb intersect = andVal & mask; + if (intersect == 0) return; // Field is completely masked away + if (intersect == mask) { // Nothing is masked away, follow the whole field + bool newSignExt = state.bitsField.isMostSignificant(); + workList.emplace_back(state,state.bitsField,op->getOut(),newSignExt); + workList.back().bitsUsed.intersectMask(andVal); + } + else if (testConsumed(op->getOut(),state.bitsField)) { + pullList.emplace_back(state,op); + } +} + +/// Add the output Varnode as a new \e bitfield state and update usage information for original \b root bits. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INT_ZEXT or INT_SEXT reading the \e bitfield Varnode +void BitFieldPullTransform::handleExtForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *outvn = op->getOut(); + int4 diff = outvn->getSize() - state.node->getSize(); + bool newSignExt = (op->code() == CPUI_INT_SEXT) ? state.isSignExtended : false; + workList.emplace_back(state,state.bitsField,outvn,newSignExt); + workList.back().bitsField.extendBytes(diff); + workList.back().bitsUsed.extendBytes(diff); + if (op->code() == CPUI_INT_SEXT && !state.isSignExtended) { + workList.back().bitsUsed.expandToMost(); // Sign extending bits not in the field + } +} + +/// If the INT_MULT can be viewed as a left shift, and If the \e bitfield is moved into the output +/// Varnode without losing bits, add the output as a new \e bitfield state and update usage information +/// for original \b root bits. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INT_MULT reading the \e bitfield Varnode +void BitFieldPullTransform::handleMultForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (op->getIn(0) != state.node) return; + Varnode *vn1 = op->getIn(1); + if (!vn1->isConstant()) return; + uintb val = vn1->getOffset(); + if (popcount(val) != 1) { + handleLeastSigOp(state, op); + return; + } + int4 sa = leastsigbit_set(val); + BitRange newRange(state.bitsField); + newRange.shift(sa); + if (newRange.numBits == 0) + return; + if (state.bitsField.numBits == newRange.numBits) { + bool newSignExt = state.isSignExtended || newRange.isMostSignificant(); + workList.emplace_back(state,newRange,op->getOut(),newSignExt); + workList.back().bitsUsed.shift(sa); + } +} + +/// If the \e bitfield is truncated without losing bits, add the output as a new +/// \e bitfield state and update usage information for original \b root bits. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the SUBPIECE reading the \e bitfield Varnode +void BitFieldPullTransform::handleSubpieceForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (op->getIn(0) != state.node) return; + int4 leastTrunc = (int4)op->getIn(1)->getOffset(); + int4 mostTrunc = (state.bitsField.byteSize - leastTrunc) - op->getOut()->getSize(); + BitRange newRange(state.bitsField); + + newRange.truncateLeastSigBytes(leastTrunc); + newRange.truncateMostSigBytes(mostTrunc); + if (newRange.numBits == 0) + return; + if (state.bitsField.numBits == newRange.numBits) { + bool newSignExt = state.isSignExtended; // Any sign extension is preserved, since we only truncate and whole field is present + workList.emplace_back(state,newRange,op->getOut(),newSignExt); + workList.back().bitsUsed.truncateLeastSigBytes(leastTrunc); + workList.back().bitsUsed.truncateMostSigBytes(mostTrunc); + } + else if (testConsumed(op->getOut(),newRange)) { + pullList.emplace_back(state,op); + } +} + +/// Test if we can treat the value being INSERTed as a PULL of the current bitfield. +/// The INSERT must only be inserting bits from the bitfield, in which case we create a PullRecord directly. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INSERT reading the \e bitfield Varnode +void BitFieldPullTransform::handleInsertForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (op->getIn(1) != state.node) return; // Handle only if pull is value being inserted + if (state.bitsField.leastSigBit != 0) return; // Test if least sig bits of inserted value are in the bitfield + int4 sz = (int4)op->getIn(3)->getOffset(); + if (sz > state.bitsField.numBits) return; // Test if more bits are getting INSERTed than are in bitfield + + pullList.emplace_back(state,op); // Can treat input to INSERT as pull of current bitfield +} + +/// If the \b bitfield is the most significant bits being compared, and the +/// constant being compared to has 1 bits in the least significant positions, +/// create a PullRecord indicating the comparison acts on the pulled bits. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the comparison reading the \e bitfield Varnode +void BitFieldPullTransform::handleLessForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (!state.bitsField.isMostSignificant()) + return; + int4 slot = op->getSlot(state.node); + Varnode *cvn = op->getIn(1-slot); + if (!cvn->isConstant()) return; + uintb val = cvn->getOffset(); + bool leastSigZeroBits = (val & 1) == 0; + int4 numExtremalBits; + if (leastSigZeroBits) + numExtremalBits = leastsigbit_set(val); // Check how many least significant 0 bits + else + numExtremalBits = leastsigbit_set(~val); // Check how many least significant 1 bits + if (numExtremalBits < 0) + numExtremalBits = sizeof(uintb) * 8; + bool needMaskCheck = false; + OpCode opc = op->code(); + if (opc == CPUI_INT_SLESS || opc == CPUI_INT_LESS) { + if (leastSigZeroBits && slot != 0) return; + if (!leastSigZeroBits && slot == 0) + needMaskCheck = true; + } + else if (opc == CPUI_INT_SLESSEQUAL || opc == CPUI_INT_LESSEQUAL) { + if (leastSigZeroBits && slot != 1) return; + if (!leastSigZeroBits && slot == 1) + needMaskCheck = true; + } + if (needMaskCheck) { + uintb mask; + if (numExtremalBits >= 8*sizeof(uintb)) + mask = 0; + else { + mask = 1; + mask <<= numExtremalBits; + } + mask -= 1; + if ((mask & state.node->getNZMask()) == mask) return; // Must be at least one 0 bit + } + if (state.bitsField.leastSigBit <= numExtremalBits) { // If the field extends into the extremal bits + // The comparison is only affected by field bits. View field as pulled and then shifted. + pullList.emplace_back(state,op); + } +} + +/// This handles arithmetic/logical ops where the result on least significant bits doesn't change +/// if the more significant bits are truncated from the inputs. +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the arithmetic/logical op +void BitFieldPullTransform::handleLeastSigOp(const BitFieldNodeState &state,PcodeOp *op) + +{ + if (state.bitsField.leastSigBit != 0) return; // Field must be in least significant bits + if (testConsumed(op->getOut(),state.bitsField)) { + pullList.emplace_back(state,op); + } +} + +/// \param state is the current state of the \e bitfield and the Varnode holding it +/// \param op is the INT_EQUAL or INT_NOTEQUAL comparison reading the \e bitfield Varnode +void BitFieldPullTransform::handleEqualForward(const BitFieldNodeState &state,PcodeOp *op) + +{ + Varnode *cvn = op->getIn(1); + if (state.bitsField.byteSize > sizeof(uintb)) return; + if (!cvn->isConstant()) return; + if (state.field != (const TypeBitField *)0 && state.field->bits.numBits == state.bitsField.numBits) { + uintb val = state.bitsField.getMask(); + pullList.emplace_back(state,op,val); + } + else { + pullList.emplace_back(op); // Abort any pulls into this op + } +} + +/// \param state is the current state of the \e bitfield and the Varnode holding it +void BitFieldPullTransform::processForward(BitFieldNodeState &state) + +{ + list::const_iterator iter; + if (state.isFieldAligned() && state.doesSignExtensionMatch()) { + pullList.emplace_back(state,(PcodeOp *)0); + return; + } + for(iter=state.node->beginDescend();iter!=state.node->endDescend();++iter) { + PcodeOp *op = *iter; + switch(op->code()) { + case CPUI_INT_LEFT: + handleLeftForward(state, op); + break; + case CPUI_INT_MULT: + handleMultForward(state, op); + break; + case CPUI_INT_RIGHT: + case CPUI_INT_SRIGHT: + handleRightForward(state, op); + break; + case CPUI_INT_AND: + handleAndForward(state, op); + break; + case CPUI_INT_ZEXT: + case CPUI_INT_SEXT: + handleExtForward(state, op); + break; + case CPUI_INT_LESS: + case CPUI_INT_LESSEQUAL: + case CPUI_INT_SLESS: + case CPUI_INT_SLESSEQUAL: + handleLessForward(state, op); + break; + case CPUI_INT_EQUAL: + case CPUI_INT_NOTEQUAL: + handleEqualForward(state, op); + break; + case CPUI_INT_ADD: + case CPUI_INT_OR: + case CPUI_INT_XOR: + case CPUI_INT_2COMP: + case CPUI_INT_NEGATE: + handleLeastSigOp(state, op); + break; + case CPUI_SUBPIECE: + handleSubpieceForward(state, op); + break; + case CPUI_INSERT: + handleInsertForward(state, op); + break; + default: + break; + } + } +} + +/// \brief Determine if pulls at a specific INT_EQUAL or INT_NOTEQUAL are consistent as a whole +/// +/// Run through PullRecords for a single INT_EQUAL or INT_NOTEQUAL. These records are deleted if: +/// - An \e aborted record is present, indicating a partial field or hole is being compared +/// - Other unrelated bits are being compared +/// +/// \param iter points to the first PullRecord for the op +/// \return an iterator pointing after all PullRecords for the op +list::iterator BitFieldPullTransform::testCompareGroup(list::iterator iter) + +{ + list::iterator curiter = iter; + bool isAborted = false; + uintb collectMask = 0; // Collect mask of all fields being tested + Varnode *vn = (*iter).readVn; + PcodeOp *op = (*iter).readOp; + uintb val = op->getIn(1)->getOffset(); + do { + PullRecord &rec( *curiter ); + if (rec.readOp != op) break; + ++curiter; + if (rec.type == PullRecord::aborted) + isAborted = true; + collectMask |= rec.mask; + } while(curiter != pullList.end()); + if (isAborted || (~collectMask & val) != 0 || (~collectMask & vn->getNZMask()) != 0) { + curiter = pullList.erase(iter,curiter); + } + return curiter; +} + +/// Create the ZPULL or SPULL op. Duplicate the LOAD if necessary. Add an INT_LEFT if needed. +/// \param rec is the given PullRecord +/// \param state is state maintained across all transforms +void BitFieldPullTransform::applyRecord(PullRecord &rec,TransformState &state) + +{ + PcodeOp *modOp; + if (rec.readOp == (PcodeOp *)0) { // null here indicates readVn contains a complete pull + modOp = rec.readVn->getDef(); // readVn always has a defining op + func->opUnsetOutput(modOp); // set up to modify the definition of readVn + } + else { // Otherwise modify the single read of readVn, readOp + if (rec.readVn != root) + modOp = rec.readVn->getDef(); + else + modOp = rec.readOp; + int4 slot = rec.readOp->getSlot(rec.readVn); + rec.readVn = func->newUnique(rec.readVn->getSize()); // New Varnode to hold the complete pull + func->opSetInput(rec.readOp,rec.readVn,slot); + } + Varnode *inVn = root; + if (loadOp != (PcodeOp *)0 && state.count > 0) { + PcodeOp *newLoad = func->newOp(2, loadOp->getAddr()); + func->opSetOpcode(newLoad, CPUI_LOAD); // Make copy of original LOAD + func->opSetInput(newLoad,loadOp->getIn(0),0); + func->opSetInput(newLoad,loadOp->getIn(1),1); + inVn = func->newUniqueOut(containerSize, newLoad); + func->opInsertAfter(newLoad,loadOp); + func->opMarkNonPrinting(newLoad); + } + inVn->updateType(state.partialType); + PcodeOp *pullOp = func->newOp(3, modOp->getAddr()); + func->opSetOpcode(pullOp, (rec.dt->getMetatype() == TYPE_INT) ? CPUI_SPULL : CPUI_ZPULL); + func->opSetInput(pullOp,inVn,0); + func->opSetInput(pullOp,func->newConstant(4,rec.pos),1); + func->opSetInput(pullOp,func->newConstant(4,rec.numBits),2); + if (modOp != rec.readOp) + func->opInsertAfter(pullOp, modOp); + else + func->opInsertBefore(pullOp, modOp); + if (rec.leftShift != 0) { + Varnode *shiftVn = func->newUniqueOut(containerSize, pullOp); + PcodeOp *shiftOp = func->newOp(2, modOp->getAddr()); + func->opSetOpcode(shiftOp, CPUI_INT_LEFT); + func->opSetInput(shiftOp,shiftVn,0); + func->opSetInput(shiftOp,func->newConstant(4,rec.leftShift),1); + func->opInsertAfter(shiftOp,pullOp); + func->opSetOutput(shiftOp, rec.readVn); + } + else { + func->opSetOutput(pullOp, rec.readVn); + } + + Varnode *pullOut = pullOp->getOut(); + if (pullOut->getType()->getMetatype() == TYPE_UNKNOWN) { + Datatype *dt = func->getArch()->types->resizeInteger(rec.dt,pullOut->getSize()); + pullOut->updateType(dt); + } + else if (rec.dt->getMetatype() == TYPE_BOOL && pullOut->getSize() == 1 && rec.numBits == 1) { + pullOut->updateType(rec.dt); + } + if (modOp != rec.readOp) { + Varnode *outvn = modOp->getOut(); + if (outvn == (Varnode *)0 || outvn->hasNoDescend()) + func->opDestroyRecursive(modOp, state.deadScratch); + } + state.count += 1; +} + +/// The first PullRecord at least must be for a comparison op. If there are more than one, +/// the op is converted into a boolean expression with comparison for each record. +/// Then the constant value for each comparison is adjusted to match the PullRecord bitfield. +/// The PullRecords are \e not removed, but are converted to \e normal records so that +/// the applyRecord() method can create the ZPULL or SPULL ops. +/// \param rec must be the first PullRecord in \b pullList +void BitFieldPullTransform::applyCompareRecord(const PullRecord &rec) + +{ + uintb origVal = rec.readOp->getIn(1)->getOffset(); + int4 num = 0; + list::iterator iter,enditer; + enditer = pullList.begin(); + while(enditer != pullList.end()) { // Gather pulls to the same compare + if ((*enditer).readOp != rec.readOp) + break; + ++enditer; + num += 1; + } + if (num > 1) { + OpCode opc = rec.readOp->code(); + OpCode combineCode = (opc == CPUI_INT_EQUAL) ? CPUI_BOOL_AND : CPUI_BOOL_OR; + Varnode *vn = rec.readOp->getIn(0); + PcodeOp *curCombine = rec.readOp; + func->opSetOpcode(curCombine,combineCode); + iter = pullList.begin(); + for(int4 i=0;inewOp(2,curCombine->getAddr()); + func->opSetOpcode(op, opc); + Varnode *boolVn = func->newUniqueOut(1, op); + func->opSetInput(op,vn,0); + func->opInsertBefore(op,curCombine); + if (i == 0) { + func->opSetInput(curCombine, boolVn, 0); + } + else if (i < num-1) { + PcodeOp *combineOp = func->newOp(2,curCombine->getAddr()); + func->opSetOpcode(combineOp,combineCode); + Varnode *bool2Vn = func->newUniqueOut(1,combineOp); + func->opSetInput(curCombine,bool2Vn,1); + func->opSetInput(combineOp,boolVn,0); + func->opInsertBefore(combineOp,curCombine); + curCombine = combineOp; + } + else { + func->opSetInput(curCombine,boolVn,1); + } + (*iter).readOp = op; + ++iter; + } + } + iter = pullList.begin(); + while(iter != enditer) { + PullRecord &subrec(*iter); + uintb val = origVal & subrec.mask; + val >>= subrec.leftShift; + if (subrec.dt->getMetatype() == TYPE_INT) + val = extend_signbit(val, subrec.numBits, subrec.readVn->getSize()); + Varnode *vn = func->newConstant(subrec.readVn->getSize(),val); + Datatype *dt = func->getArch()->types->resizeInteger(subrec.dt, subrec.readVn->getSize()); + vn->updateType(dt); + func->opSetInput(subrec.readOp,vn,1); // Adjust compare value + subrec.type = PullRecord::normal; // Convert to normal pull + subrec.leftShift = 0; // left shift has been accounted for + ++iter; + } +} + +/// Check that the output of the LOAD has only ZPULL, SPULL, or INSERT as a descendant. +/// If so mark the LOAD as non-printing. +/// \param loadOp is the LOAD +/// \return \b true if the LOAD was marked as non-printing +bool BitFieldPullTransform::foldLoad(PcodeOp *loadOp) const + +{ + Varnode *outvn = loadOp->getOut(); + list::const_iterator iter; + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + OpCode opc = (*iter)->code(); + if (opc != CPUI_ZPULL && opc != CPUI_SPULL && opc != CPUI_INSERT) + return false; + } + func->opMarkNonPrinting(loadOp); + return true; +} + +/// Check that the pointer into the given LOAD is defined by a PTRSUB and that all descendants of the pointer +/// are LOADs that have been absorbed. If so mark the PTRSUB as non-printing. +/// \param loadOp is the LOAD +void BitFieldPullTransform::foldPtrsub(PcodeOp *loadOp) const + +{ + Varnode *vn = loadOp->getIn(1); + if (!vn->isWritten()) return; + PcodeOp *ptrsub = vn->getDef(); + if (ptrsub->code() != CPUI_PTRSUB) return; + list::const_iterator iter; + for(iter=vn->beginDescend();iter!=vn->endDescend();++iter) { + PcodeOp *op = *iter; + if (op->code() != CPUI_LOAD) return; + if (!op->notPrinted()) return; // Check if LOAD has been absorbed + } + func->opMarkNonPrinting(ptrsub); +} + +/// \param f is the function +/// \param r is the root Varnode with a \e bitfield data-type +/// \param dt is the data-type containing bitfields (may be partial) +/// \param off is the byte offset into the data-type to associate with \b root +BitFieldPullTransform::BitFieldPullTransform(Funcdata *f,Varnode *r,Datatype *dt,int4 off) + : BitFieldTransform(f,dt,off) +{ + if (initialOffset == -1) + return; + root = r; + containerSize = root->getSize(); + if (root->isWritten() && root->getDef()->code() == CPUI_LOAD) + loadOp = root->getDef(); + else + loadOp = (PcodeOp *)0; + establishFields(root,false); // Don't follow holes +} + +/// Create a PullRecord at each pull point. +/// \return \b true if any PullRecords were created +bool BitFieldPullTransform::doTrace(void) + +{ + while(!workList.empty()) { + processForward(workList.front()); + workList.pop_front(); + } + if (pullList.empty()) + return false; + pullList.sort(); + list::iterator iter = pullList.begin(); + while(iter != pullList.end()) { + if ((*iter).type != PullRecord::normal) + iter = testCompareGroup(iter); + else + ++iter; + } + return !pullList.empty(); +} + +/// For each pull record, either: +/// - Redefine \b readVn with a ZPULL or SPULL. Delete the original op defining \b readVn +/// - Create a Varnode for a specific \b readOp that effectively holds the pulled value +void BitFieldPullTransform::apply(void) + +{ + TransformState state; + state.count = 0; + state.partialType = buildPartialType(); + while(!pullList.empty()) { + PullRecord &rec(pullList.front()); + if (rec.type == PullRecord::equal) { + applyCompareRecord(rec); + } + else { + applyRecord(rec,state); + pullList.pop_front(); + } + } + if (loadOp != (PcodeOp *)0) { + if (foldLoad(loadOp)) + foldPtrsub(loadOp); + } +} + +void RuleBitFieldStore::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_STORE); +} + +int4 RuleBitFieldStore::applyOp(PcodeOp *op,Funcdata &data) + +{ + Datatype *ptr = op->getIn(1)->getTypeReadFacing(op); + int4 off; + Datatype *dt = ptr->getPtrInto(off); + if (dt == (Datatype *)0) return 0; + if (!dt->hasBitfields()) return 0; + Varnode *vn = op->getIn(2); + if (vn->isWritten() && vn->getDef()->code() == CPUI_INSERT) return 0; + BitFieldInsertTransform transform(&data,op,dt,off); + if (!transform.doTrace()) + return 0; + transform.apply(); + return 1; +} + +void RuleBitFieldOut::getOpList(vector &oplist) const + +{ + uint4 list[]={ CPUI_COPY, CPUI_INT_EQUAL, CPUI_INT_NOTEQUAL, CPUI_INT_SLESS, CPUI_INT_SLESSEQUAL, + CPUI_INT_LESS, CPUI_INT_LESSEQUAL, CPUI_INT_ZEXT, CPUI_INT_SEXT, CPUI_INT_ADD, CPUI_INT_CARRY, + CPUI_INT_SCARRY, CPUI_INT_XOR, CPUI_INT_AND, CPUI_INT_OR, CPUI_INT_LEFT, CPUI_INT_RIGHT, + CPUI_INT_SRIGHT, CPUI_INT_MULT, CPUI_BOOL_NEGATE, CPUI_BOOL_XOR, CPUI_BOOL_AND, CPUI_BOOL_OR, + CPUI_FLOAT_EQUAL, CPUI_FLOAT_NOTEQUAL, CPUI_FLOAT_LESS, CPUI_FLOAT_LESSEQUAL, CPUI_FLOAT_NAN, + CPUI_INDIRECT, CPUI_SUBPIECE }; + oplist.insert(oplist.end(),list,list+30); +} + +int4 RuleBitFieldOut::applyOp(PcodeOp *op,Funcdata &data) + +{ + Varnode *outvn = op->getOut(); + Datatype *dt = outvn->getTypeDefFacing(); + if (!dt->hasBitfields()) return 0; + BitFieldInsertTransform transform(&data,op,dt,0); + if (!transform.doTrace()) + return 0; + transform.apply(); + return 1; +} + +void RuleBitFieldLoad::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_LOAD); +} + +int4 RuleBitFieldLoad::applyOp(PcodeOp *op,Funcdata &data) + +{ + Datatype *ptr = op->getIn(1)->getTypeReadFacing(op); + int4 off; + Datatype *dt = ptr->getPtrInto(off); + if (dt == (Datatype *)0) return 0; + if (!dt->hasBitfields()) return 0; + if (op->notPrinted()) return 0; // LOAD visited before + BitFieldPullTransform transform(&data,op->getOut(),dt,off); + if (!transform.doTrace()) + return 0; + transform.apply(); + return 1; +} + +void RuleBitFieldIn::getOpList(vector &oplist) const + +{ + uint4 list[]={ CPUI_COPY, + CPUI_INT_EQUAL, CPUI_INT_NOTEQUAL, CPUI_INT_SLESS, CPUI_INT_SLESSEQUAL, CPUI_INT_LESS, CPUI_INT_LESSEQUAL, + CPUI_INT_ZEXT, CPUI_INT_SEXT, + CPUI_INT_ADD, CPUI_INT_NEGATE, + CPUI_INT_AND, CPUI_INT_LEFT, CPUI_INT_RIGHT, CPUI_INT_SRIGHT, CPUI_INT_MULT, + CPUI_SUBPIECE }; + oplist.insert(oplist.end(),list,list+17); +} + +int4 RuleBitFieldIn::applyOp(PcodeOp *op,Funcdata &data) + +{ + Varnode *invn = op->getIn(0); + Datatype *dt = invn->getTypeReadFacing(op); + if (!dt->hasBitfields()) return 0; + BitFieldPullTransform transform(&data,invn,dt,0); + if (!transform.doTrace()) + return 0; + transform.apply(); + return 1; +} + +/// \brief Perform transforms involving the expression: `field >> #c` +/// +/// \param data is the function +/// \param rightOp is the INT_RIGHT or INT_SRIGHT op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if a transform was performed, 0 otherwise +int4 RulePullAbsorb::absorbRight(Funcdata &data,PcodeOp *rightOp,PcodeOp *pullOp) + +{ + list::const_iterator iter; + Varnode *outvn = rightOp->getOut(); + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *readOp = *iter; + if (readOp->code() == CPUI_INT_AND) { + int4 res = absorbRightAndCompZero(data,rightOp,readOp,pullOp); + if (res != 0) return res; + } + } + return 0; +} + +/// \brief Perform transform: `((sfield >> #n) & #1) == #0 => #0 <= sfield` +/// +/// Perform the variant: `((sfield >> #n) & #1) != #0 => sfield < #0` +/// \param data is the function +/// \param rightOp is the INT_RIGHT or INT_SRIGHT op +/// \param andOp is the INT_AND +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if a transform was performed, 0 otherwise +int4 RulePullAbsorb::absorbRightAndCompZero(Funcdata &data,PcodeOp *rightOp,PcodeOp *andOp,PcodeOp *pullOp) + +{ + if (pullOp->code() != CPUI_SPULL) return 0; + Varnode *cvn = rightOp->getIn(1); + if (!cvn->isConstant()) return 0; + int4 sa = cvn->getOffset(); + int4 numbits = pullOp->getIn(2)->getOffset(); + if (numbits -1 != sa) return 0; // Check that shift puts sign bit into least sig position + if (!andOp->getIn(1)->constantMatch(1)) return 0; + list::const_iterator iter; + Varnode *outvn = andOp->getOut(); + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *readOp = *iter; + OpCode opc = readOp->code(); + if (opc != CPUI_INT_EQUAL && opc != CPUI_INT_NOTEQUAL) continue; + if (!readOp->getIn(1)->constantMatch(0)) continue; + Varnode *vn = pullOp->getOut(); + if (opc == CPUI_INT_EQUAL) { + data.opSetOpcode(readOp, CPUI_INT_LESSEQUAL); + Varnode *zvn = readOp->getIn(1); + data.opSetInput(readOp,vn,1); + data.opSetInput(readOp,zvn,0); + } + else { + data.opSetOpcode(readOp,CPUI_INT_SLESS); + data.opSetInput(readOp,vn,0); + } + data.destroyVarnodeRecursive(outvn); + return 1; + } + return 0; +} + +/// \brief Perform transforms involving the expression: `field << #c` +/// +/// \param data is the function +/// \param leftOp is the INT_LEFT op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if a transform was performed, 0 otherwise +int4 RulePullAbsorb::absorbLeft(Funcdata &data,PcodeOp *leftOp,PcodeOp *pullOp) + +{ + list::const_iterator iter; + Varnode *outvn = leftOp->getOut(); + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *readOp = *iter; + int4 res = 0; + OpCode opc = readOp->code(); + if (opc == CPUI_INT_SLESS) + res = absorbCompare(data,readOp,leftOp,pullOp); + else if (opc == CPUI_INT_RIGHT) + res = absorbLeftRight(data,readOp,leftOp,pullOp); + else if (opc == CPUI_INT_AND) + res = absorbLeftAnd(data,readOp,leftOp,pullOp); + if (res != 0) return res; + } + return 0; +} + +/// \brief Perform the transform: `(field << #c) >> #d => field >> (#d-#c)` +/// +/// \param data is the function +/// \param rightOp is the INT_RIGHT op +/// \param leftOp is the INT_LEFT op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if a transform was performed, 0 otherwise +int4 RulePullAbsorb::absorbLeftRight(Funcdata &data,PcodeOp *rightOp,PcodeOp *leftOp,PcodeOp *pullOp) + +{ + Varnode *leftcvn = leftOp->getIn(1); + if (!leftcvn->isConstant()) return 0; + Varnode *rightcvn = rightOp->getIn(1); + if (!rightcvn->isConstant()) return 0; + int4 bitsize = pullOp->getIn(2)->getOffset(); + Varnode *invn = pullOp->getIn(0); + int4 containerSize = invn->getSize() * 8; + int4 leftshift = leftcvn->getOffset(); + int4 rightshift = rightcvn->getOffset(); + if (leftshift + bitsize > containerSize) return 0; // Check if left shift destroys field data + int4 sa = rightshift - leftshift; + if (sa == 0) { + data.totalReplace(rightOp->getOut(),pullOp->getOut()); + data.destroyVarnodeRecursive(rightOp->getOut()); + } + else if (sa > 0) { // Right shift is bigger than left + data.opSetInput(rightOp, data.newConstant(rightcvn->getSize(),sa), 1); + data.opSetInput(rightOp, pullOp->getOut(),0); + data.destroyVarnodeRecursive(leftOp->getOut()); + } + else { // Left shift is bigger than right + data.opSetOpcode(rightOp, CPUI_INT_LEFT); + data.opSetInput(rightOp, data.newConstant(rightcvn->getSize(),-sa), 1); + data.opSetInput(rightOp, pullOp->getOut(),0); + data.destroyVarnodeRecursive(leftOp->getOut()); + } + return 1; +} + +/// \brief Perform the transform: `((field << #c) & #b) == #d => (field & #b>>c) == #d>>c` +/// +/// \param data is the function +/// \param andOp is the INT_AND op +/// \param leftOp is the INT_LEFT op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if a transform was performed, 0 otherwise +int4 RulePullAbsorb::absorbLeftAnd(Funcdata &data,PcodeOp *andOp,PcodeOp *leftOp,PcodeOp *pullOp) + +{ + Varnode *shiftAmount = leftOp->getIn(1); + if (!shiftAmount->isConstant()) return 0; + int4 sa = shiftAmount->getOffset(); + if (sa < 0 || sa >= sizeof(uintb)*8) return 0; + Varnode *maskVn = andOp->getIn(1); + if (!maskVn->isConstant()) return 0; + uintb mask = maskVn->getOffset(); + list::const_iterator iter; + Varnode *outvn = andOp->getOut(); + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *readOp = *iter; + OpCode opc = readOp->code(); + if (opc == CPUI_INT_EQUAL || opc == CPUI_INT_NOTEQUAL) { + Varnode *compVal = readOp->getIn(1); + if (!compVal->isConstant()) continue; + uintb val = compVal->getOffset() >> sa; + if (val << sa != compVal->getOffset()) continue; + mask >>= sa; + Varnode *newAnd = data.newConstant(maskVn->getSize(), mask); + newAnd->updateType(maskVn->getType()); + data.opSetInput(andOp,newAnd,1); + if (val != compVal->getOffset()) { + Varnode *newVal = data.newConstant(compVal->getSize(),val); + newVal->updateType(compVal->getType()); + data.opSetInput(readOp,newVal,1); + } + data.opSetInput(andOp,leftOp->getIn(0),0); + data.destroyVarnodeRecursive(leftOp->getOut()); + return 1; + } + } + return 0; +} + +/// \brief Perform transform: `field & #signbit == #0 => field < 0` +/// +/// \param data is the function +/// \param andOp is the INT_AND op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if a transform was performed, 0 otherwise +int4 RulePullAbsorb::absorbAnd(Funcdata &data,PcodeOp *andOp,PcodeOp *pullOp) + +{ + Varnode *maskVn = andOp->getIn(1); + if (!maskVn->isConstant()) return 0; + Varnode *vn = pullOp->getOut(); + if (pullOp->code() != CPUI_SPULL) return 0; // Not signed + int4 bitsize = (int4)pullOp->getIn(2)->getOffset(); + uintb matchVal = 1; + matchVal <<= (bitsize-1); // Mask for sign-bit + if (matchVal != maskVn->getOffset()) return 0; + list::const_iterator iter; + Varnode *outvn = andOp->getOut(); + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *readOp = *iter; + OpCode opc = readOp->code(); + if (opc == CPUI_INT_EQUAL || opc == CPUI_INT_NOTEQUAL) { + if (!readOp->getIn(1)->constantMatch(0)) continue; + Varnode *newZero = data.newConstant(vn->getSize(),0); + Datatype *dt = data.getArch()->types->resizeInteger(vn->getType(),vn->getSize()); + newZero->updateType(dt); + if (opc == CPUI_INT_EQUAL) { + data.opSetOpcode(readOp, CPUI_INT_SLESSEQUAL); + data.opSetInput(readOp,newZero,0); + data.opSetInput(readOp,vn,1); + } + else { + data.opSetOpcode(readOp, CPUI_INT_SLESS); + data.opSetInput(readOp,vn,0); + data.opSetInput(readOp,newZero,1); + } + data.destroyVarnodeRecursive(andOp->getOut()); + return 1; + } + } + return 0; +} + +/// \brief Perform transforms involving comparisons: INT_LESS, INT_SLESS +/// +/// Perform transforms: +/// - `(boolfield << #c) s< #0 => boolfield` +/// - `#0 s< (boolfield << #c) => !boolfield` +/// - `(field << #c) < (#d<<#c) => field < #d` +/// - `(#d<<#c) < (field << #c) => #d < field` +/// +/// \param data is the function +/// \param compOp is the INT_LESS or INT_SLESS op +/// \param leftOp is the INT_LEFT op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if transform performed, 0 otherwise +int4 RulePullAbsorb::absorbCompare(Funcdata &data,PcodeOp *compOp,PcodeOp *leftOp,PcodeOp *pullOp) + +{ + int4 sa = 0; + if (leftOp != (PcodeOp *)0) { + Varnode *cvn = leftOp->getIn(1); + if (!cvn->isConstant()) return 0; + sa = cvn->getOffset(); + } + int4 numbits = pullOp->getIn(2)->getOffset(); + Varnode *invn = pullOp->getIn(0); + int4 sz = invn->getSize() * 8; + if (numbits + sa != sz) // Verify that high bit of field is shifted into sign-bit + return 0; + Varnode *inVn = (leftOp == (PcodeOp *)0) ? pullOp->getOut() : leftOp->getOut(); + Varnode *lessVn0 = compOp->getIn(0); + Varnode *lessVn1 = compOp->getIn(1); + if (compOp->code() == CPUI_INT_SLESS) { + if (numbits == 1 && lessVn0 == inVn && lessVn1->isConstant() && lessVn1->getOffset() == 0) { + Varnode *oldVn = compOp->getOut(); + data.totalReplace(oldVn,pullOp->getOut()); + data.destroyVarnodeRecursive(oldVn); + return 1; + } + if (numbits == 1 && lessVn1 == inVn && lessVn0->isConstant() + && lessVn0->getOffset() == calc_mask(inVn->getSize())) { + data.opRemoveInput(compOp,0); + data.opSetOpcode(compOp,CPUI_BOOL_NEGATE); + data.opSetInput(compOp,pullOp->getOut(),0); + data.destroyVarnodeRecursive(inVn); + return 1; + } + } + uintb mask = 1; + mask = (mask << sa) -1; + if (sa > 0 && sa < 8*sizeof(uintb) && inVn == lessVn0 && lessVn1->isConstant()) { + uintb origVal = lessVn1->getOffset(); + uintb lowBits = mask & origVal; + if (lowBits == 0 || lowBits == 1) { + uintb newVal; + if (lowBits == 1) { + newVal = (origVal - 1) >> sa; // Convert to constant for LESSEQUAL + newVal = (newVal + 1) & calc_mask(inVn->getSize()); // Convert back to LESS after shift + } + else + newVal = origVal >> sa; + data.opSetInput(compOp,pullOp->getOut(),0); + data.opSetInput(compOp,data.newConstant(inVn->getSize(), newVal),1); + data.destroyVarnodeRecursive(inVn); + return 1; + } + } + if (sa > 0 && sa < 8*sizeof(uintb) && inVn == lessVn1 && lessVn0->isConstant()) { + uintb origVal = lessVn0->getOffset(); + uintb lowBits = mask & origVal; + if (lowBits == 0 || lowBits == mask) { + uintb newVal; + if (lowBits == mask) { + newVal = (origVal + 1) >> sa; // Convert to constant for LESSEQUAL + newVal = (newVal - 1) & calc_mask(inVn->getSize()); // Convert back to LESS after shift + } + else + newVal = origVal >> sa; + data.opSetInput(compOp,pullOp->getOut(),1); + data.opSetInput(compOp,data.newConstant(inVn->getSize(), newVal),0); + data.destroyVarnodeRecursive(inVn); + return 1; + } + } + return 0; +} + +/// \brief Perform transform: `y = SEXT( SPULL( x, #p, #n ) ) => y = SPULL( x, #p, #n )` +/// +/// Also transform: `y = ZEXT( ZPULL( x, #p, #n ) ) => y = ZPULL( x, #p, #n )` +/// +/// \param data is the function +/// \param extOp is the INT_SEXT or INT_ZEXT op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if transform performed, 0 otherwise +int4 RulePullAbsorb::absorbExt(Funcdata &data,PcodeOp *extOp,PcodeOp *pullOp) + +{ + bool pullSigned = pullOp->code() == CPUI_SPULL; + bool extSigned = extOp->code() == CPUI_INT_SEXT; + if (extSigned != pullSigned) return 0; + Varnode *vn = extOp->getIn(0); + if (vn->loneDescend() != extOp) return 0; + data.opSetOpcode(extOp, pullOp->code()); + data.opSetInput(extOp,pullOp->getIn(0),0); + Varnode *posVn = pullOp->getIn(1); + Varnode *numVn = pullOp->getIn(2); + data.opInsertInput(extOp,posVn,1); + data.opInsertInput(extOp,numVn,2); + data.destroyVarnodeRecursive(vn); + return 1; +} + +/// \brief Perform transform: `y = SUB( PULL( x, #p, #n ) ) => y = PULL( x, #p, #n )` +/// +/// \param data is the function +/// \param subOp is the SUBPIECE op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if transform performed, 0 otherwise +int4 RulePullAbsorb::absorbSubpiece(Funcdata &data,PcodeOp *subOp,PcodeOp *pullOp) + +{ + if (subOp->getIn(1)->getOffset() != 0) return 0; + int4 bitsize = (int4)pullOp->getIn(2)->getOffset(); + Varnode *outvn = subOp->getOut(); + if (bitsize > 8*outvn->getSize()) return 0; + Varnode *vn = subOp->getIn(0); + if (vn->loneDescend() != subOp) return 0; + data.opSetOpcode(subOp, pullOp->code()); + data.opSetInput(subOp, pullOp->getIn(0), 0); + Varnode *posVn = pullOp->getIn(1); + Varnode *numVn = pullOp->getIn(2); + data.opSetInput(subOp,posVn,1); + data.opInsertInput(subOp,numVn,2); + data.destroyVarnodeRecursive(vn); + return 1; +} + +/// \brief Perform transform: `ZPULL( x, #p, #1) != #0 => ZPULL(x, #p, #1)` +/// +/// Also transform the variant `ZPULL( x, #p, #1) == #0 => !ZPULL(x, #p, #1)` +/// \param data is the function +/// \param compOp is the INT_EQUAL or INT_NOTEQUAL op +/// \param pullOp is the ZPULL or SPULL op +/// \return 1 if transform performed, 0 otherwise +int4 RulePullAbsorb::absorbCompZero(Funcdata &data,PcodeOp *compOp,PcodeOp *pullOp) + +{ + Varnode *zvn = compOp->getIn(1); + if (!zvn->constantMatch(0)) return 0; + int4 bitsize = (int4)pullOp->getIn(2)->getOffset(); + if (bitsize != 1) return 0; + Varnode *vn = compOp->getIn(0); + if (vn->loneDescend() != compOp) return 0; + if (vn->isAddrTied()) return 0; + if (pullOp->code() == CPUI_SPULL) return 0; + const TypeBitField *field = BitFieldExpression::getPullField(pullOp); + if (field == (const TypeBitField *)0 || field->type->getMetatype() != TYPE_BOOL) + return 0; + if (compOp->code() == CPUI_INT_EQUAL) { + if (vn->getSize() > 1) { + Address smalladdr = vn->getAddr(); + if (vn->getSpace()->isBigEndian()) + smalladdr = smalladdr + (vn->getSize() -1); + data.opUnsetOutput(pullOp); + Varnode *newVn = data.newVarnodeOut(1, smalladdr, pullOp); + Datatype *dt = data.getArch()->types->getBase(1,TYPE_BOOL); + newVn->updateType(dt); + data.opSetInput(compOp,newVn,0); + data.deleteVarnode(vn); + } + data.opSetOpcode(compOp,CPUI_BOOL_NEGATE); + data.opRemoveInput(compOp, 1); + } + else { + data.opSetOpcode(compOp,pullOp->code()); + data.opSetInput(compOp, pullOp->getIn(0), 0); + Varnode *posVn = pullOp->getIn(1); + Varnode *numVn = pullOp->getIn(2); + data.opSetInput(compOp,posVn,1); + data.opInsertInput(compOp,numVn,2); + data.destroyVarnodeRecursive(vn); + } + return 1; +} + +void RulePullAbsorb::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_ZPULL); + oplist.push_back(CPUI_SPULL); +} + +int4 RulePullAbsorb::applyOp(PcodeOp *op,Funcdata &data) + +{ + list::const_iterator iter; + Varnode *outvn = op->getOut(); + for(iter=outvn->beginDescend();iter!=outvn->endDescend();++iter) { + PcodeOp *readOp = *iter; + int4 res = 0; + switch(readOp->code()) { + case CPUI_INT_RIGHT: + case CPUI_INT_SRIGHT: + res = absorbRight(data,readOp,op); + break; + case CPUI_INT_LEFT: + res = absorbLeft(data,readOp,op); + break; + case CPUI_INT_AND: + res = absorbAnd(data,readOp,op); + break; + case CPUI_INT_SLESS: + case CPUI_INT_LESS: + res = absorbCompare(data,readOp,(PcodeOp *)0,op); + break; + case CPUI_INT_ZEXT: + case CPUI_INT_SEXT: + res = absorbExt(data, readOp, op); + break; + case CPUI_SUBPIECE: + res = absorbSubpiece(data, readOp, op); + break; + case CPUI_INT_EQUAL: + case CPUI_INT_NOTEQUAL: + res = absorbCompZero(data, readOp, op); + break; + default: + break; + } + if (res != 0) return res; + } + return 0; +} + +/// If the Varnode is shifted, return the Varnode stripped of the shift. +/// \param vn is the Varnode to test +/// \param sa is the given shift amount +/// \return the stripped Varnode or null +Varnode *RuleInsertAbsorb::leftShiftVarnode(Varnode *vn,int sa) + +{ + if (!vn->isWritten()) return (Varnode *)0; + PcodeOp *multOp = vn->getDef(); + Varnode *multVal = multOp->getIn(1); + if (!multVal->isConstant()) return (Varnode *)0; + uintb matchVal; + if (multOp->code() == CPUI_INT_MULT) { + matchVal = 1; + matchVal <<= sa; + } + else if (multOp->code() == CPUI_INT_LEFT) { + matchVal = sa; + } + else + return (Varnode *)0; + if (multVal->getOffset() != matchVal) return (Varnode *)0; + return multOp->getIn(0); +} + +/// \brief Perform the transform: `INSERT( x & #mask, #p, #n ) => INSERT( x, #p, #n )` +/// +/// \param data is the function +/// \param andOp is the INT_AND op +/// \param insertOp is the INSERT op +/// \return 1 if transform performed, 0 otherwise +int4 RuleInsertAbsorb::absorbAnd(Funcdata &data,PcodeOp *andOp,PcodeOp *insertOp) + +{ + Varnode *cvn = andOp->getIn(1); + if (!cvn->isConstant()) return 0; + uintb val = cvn->getOffset(); + uintb mask = InsertExpression::getLSBMask(insertOp); + if ((mask & val) != mask) return 0; // AND mask must be of least significant bits that get INSERTed + data.opSetInput(insertOp,andOp->getIn(0),1); + data.destroyVarnodeRecursive(andOp->getOut()); + return 1; +} + +/// \brief Perform the transform: `INSERT( (x << #c) >> #c, #p, #n ) => INSERT(x, #p, #n )` +/// +/// Also transform the variant: `INSERT( SUB( x << #c, #0) >> #c, #p, #n ) => INSERT(x, #p, #n)` +int4 RuleInsertAbsorb::absorbRightLeft(Funcdata &data,PcodeOp *nextOp,PcodeOp *rightOp,PcodeOp *insertOp) + +{ + PcodeOp *leftOp; + if (nextOp->code() == CPUI_INT_LEFT) + leftOp = nextOp; + else if (nextOp->code() == CPUI_SUBPIECE) { + if (nextOp->getIn(1)->getOffset() != 0) + return 0; + Varnode *subin = nextOp->getIn(0); + if (!subin->isWritten()) return 0; + leftOp = subin->getDef(); + if (leftOp->code() != CPUI_INT_LEFT) return 0; + } + else + return 0; + Varnode *lvn = leftOp->getIn(1); + if (!lvn->isConstant()) return 0; + Varnode *rvn = rightOp->getIn(1); + if (!rvn->isConstant()) return 0; + int4 lsa = (int4)lvn->getOffset(); + int4 rsa = (int4)rvn->getOffset(); + if (lsa != rsa) return 0; + int4 bitsize = (int4)insertOp->getIn(3)->getOffset(); + if (bitsize > insertOp->getIn(1)->getSize() * 8 - lsa) // Shifts cancel unless bitsize exceeds number of bits preserved + return 0; + data.opSetInput(insertOp,leftOp->getIn(0),1); + data.destroyVarnodeRecursive(rightOp->getOut()); + return 1; +} + +/// \brief Perform the transform: `field = (a * #c + b * #c) >> #n => field = a + b` +/// +/// \param data is the function +/// \param rightOp is the INT_RIGHT or INT_SRIGHT op +/// \param addOp is the INT_ADD op +/// \param insertOp is the INSERT op +/// \return 1 if transform performed, 0 otherwise +int4 RuleInsertAbsorb::absorbShiftAdd(Funcdata &data,PcodeOp *rightOp,PcodeOp *addOp,PcodeOp *insertOp) + +{ + int4 sa = (int4)rightOp->getIn(1)->getOffset(); + if (sa <=0 || sa >= 8*sizeof(uintb)) + return 0; + Varnode *vn0 = leftShiftVarnode(addOp->getIn(0),sa); + if (vn0 == (Varnode *)0) return 0; + Varnode *vn1; + Varnode *addVn1 = addOp->getIn(1); + if (addVn1->isConstant()) { + uintb addVal = addVn1->getOffset(); + addVal >>= sa; + if ((addVal << sa) != addVn1->getOffset()) return 0; + vn1 = data.newConstant(vn0->getSize(), addVal); + vn1->updateType(addVn1->getType()); + } + else { + vn1 = leftShiftVarnode(addVn1,sa); + if (vn1 == (Varnode *)0) return 0; + } + int4 bitsize = (int4)insertOp->getIn(3)->getOffset(); + if (bitsize > vn0->getSize() * 8 - sa) // Check that none of the carry bits make it into field + return 0; + data.opSetOpcode(rightOp, CPUI_INT_ADD); + data.opSetInput(rightOp, vn0, 0); + data.opSetInput(rightOp, vn1, 1); + data.destroyVarnodeRecursive(addOp->getOut()); + return 1; +} + +/// \brief Perform transforms like: `INSERT( (x & #0xff) + y ) => INSERT( x + y )` +/// +/// The op feeding the INSERT can be any operation where more significant bits of the input do not affect the less significant bits. +/// \param data is the function +/// \param baseOp is one of INT_ADD, INT_AND, INT_OR, or INT_XOR. +/// \param insertOp is the INSERT op +/// \return 1 if transform performed, 0 otherwise +int4 RuleInsertAbsorb::absorbNestedAnd(Funcdata &data,PcodeOp *baseOp,PcodeOp *insertOp) + +{ + if (baseOp->getOut()->loneDescend() != insertOp) return 0; // Result only used by INSERT + for(int4 slot=0;slot<2;++slot) { + Varnode *vn = baseOp->getIn(slot); + if (!vn->isWritten()) continue; + PcodeOp *andOp = vn->getDef(); + if (andOp->code() != CPUI_INT_AND) continue; + Varnode *cvn = andOp->getIn(1); + if (!cvn->isConstant()) continue; + uintb mask = coveringmask(cvn->getOffset()); + if (mask != cvn->getOffset()) continue; + if ((mask & 1)==0) continue; // Masking off least significant bits + int4 count = popcount(mask); + int4 bitsize = (int4)insertOp->getIn(3)->getOffset(); + if (count < bitsize) continue; // INSERT masks off fewer bits, so AND still has an effect + data.opSetInput(baseOp,andOp->getIn(0),slot); + data.destroyVarnodeRecursive(andOp->getOut()); + return 1; + } + return 0; +} + +void RuleInsertAbsorb::getOpList(vector &oplist) const + +{ + oplist.push_back(CPUI_INSERT); +} + +int4 RuleInsertAbsorb::applyOp(PcodeOp *op,Funcdata &data) + +{ + Varnode *inVn = op->getIn(1); + if (!inVn->isWritten()) return 0; + PcodeOp *inOp = inVn->getDef(); + Varnode *vn; + PcodeOp *nextOp; + OpCode opc; + switch(inOp->code()) { + case CPUI_SUBPIECE: + if (inOp->getIn(1)->getOffset() != 0) return 0; + data.opSetInput(op,inOp->getIn(0),1); + data.destroyVarnodeRecursive(inVn); + return 1; + case CPUI_INT_RIGHT: + case CPUI_INT_SRIGHT: + if (!inOp->getIn(1)->isConstant()) return 0; + vn = inOp->getIn(0); + if (!vn->isWritten()) return 0; + nextOp = vn->getDef(); + opc = nextOp->code(); + if (opc == CPUI_INT_ADD) + return absorbShiftAdd(data, inOp, nextOp, op); + else if (opc == CPUI_INT_LEFT || opc == CPUI_SUBPIECE) + return absorbRightLeft(data, nextOp, inOp, op); + break; + case CPUI_INT_AND: + return absorbAnd(data,inOp,op); + case CPUI_INT_ADD: + case CPUI_INT_OR: + case CPUI_INT_XOR: + case CPUI_INT_MULT: + return absorbNestedAnd(data, inOp, op); + default: + break; + } + return 0; +} + +} // End namespace ghidra diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/bitfield.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/bitfield.hh new file mode 100644 index 0000000000..4905fc5f93 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/bitfield.hh @@ -0,0 +1,266 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/// \file bitfield.hh +/// \brief Classes for transforming bitfield expressions + +#ifndef __BITFIELD_HH__ +#define __BITFIELD_HH__ + +#include "ruleaction.hh" + +namespace ghidra { + +/// \brief Description of the bitfields covered by a Varnode +class BitFieldNodeState { +public: + BitRange bitsUsed; ///< Bits being used from \b this Varnode + BitRange bitsField; ///< Bits from bit-field being followed + Varnode *node; ///< Varnode holding bitfields + const TypeBitField *field; ///< Bit-field being followed + int4 origLeastSigBit; ///< Original position of least significant bit + bool isSignExtended; ///< Bitfield has been sign-extended into node + BitFieldNodeState(const BitRange &used,Varnode *vn,const TypeBitField *fld); ///< Constructor to follow a field + BitFieldNodeState(const BitRange &used,Varnode *vn,int4 leastSig,int4 numBits); ///< Constructor for a hole + BitFieldNodeState(const BitFieldNodeState ©,const BitRange &newField,Varnode *vn,bool sgnExt); ///< Copy constructor with new \b bitsField + /// \brief Can the current Varnode be treated as the isolated bitfield + bool isFieldAligned(void) const { return (bitsField.leastSigBit == 0 && bitsField.numBits == bitsUsed.numBits); } + /// \brief Return \b true if the signedness of the field matches the extension used to extract it + bool doesSignExtensionMatch(void) const { return isSignExtended == (field->type->getMetatype() == TYPE_INT); } +}; + +/// \brief Class for transforming bitfield expressions +/// +/// For both insertion and extraction, establish the bitfields that need to be traced. +class BitFieldTransform { +protected: + Funcdata *func; ///< The containing function + TypeStruct *parentStruct; ///< Structure owning the bitfields + list workList; ///< Fields that are being followed + int4 initialOffset; ///< Byte offset into parent structure + int4 containerSize; ///< Size of Varnode containing bitfields + bool isBigEndian; ///< Endianness associated with bitfields + void establishFields(Varnode *vn,bool followHoles); ///< Build worklist for each bitfield overlapped by given Varnode + Datatype *buildPartialType(void); ///< Build the (partial) data-type associated with the root bitfield container + static bool findOverwrite(Varnode *vn,BlockBasic *bl,const BitRange &range); +public: + BitFieldTransform(Funcdata *f,Datatype *dt,int4 off); ///< Constructor setting up basic info about bitfield data-type +}; + +/// \brief Class that converts bitfield insertion expressions into explicit INSERT operations +/// +/// The doTrace() method traces backward from a root Varnode that contains bitfields to find points that +/// can be treated as a value written to an individual bitfield, creating an InsertRecord at each point. +/// If all bits of the Varnode are accounted for, the apply() method transforms expressions based on any InsertRecord. +class BitFieldInsertTransform : public BitFieldTransform { + /// \brief Info about a Varnode that can be treated as a write to a single bitfield + class InsertRecord { + friend class BitFieldInsertTransform; + Varnode *vn; ///< Value being inserted (or null) + uintb constVal; ///< Constant value being inserted + Datatype *dt; ///< Data-type associated with value + int4 pos; ///< Position being inserted to + int4 numBits; ///< Number of bits being inserted + int4 shiftAmount; ///< Amount that value needs to be right shifted + public: + InsertRecord(Varnode *v,Datatype *d,int4 p,int4 sz,int4 sa) { vn = v; dt = d; constVal = 0; pos = p; numBits = sz; shiftAmount = sa; } ///< Constructor for Varnode + InsertRecord(uintb val,Datatype *d,int4 p,int4 sz) { vn = (Varnode *)0; dt = d; constVal = val; pos = p; numBits = sz; shiftAmount = 0; } ///< Constructor for constant + }; + PcodeOp *finalWriteOp; ///< STORE to bitfields or op outputing to bitfields + Varnode *originalValue; ///< Value prior to insertion + Varnode *mappedVn; ///< Bitfield container written to + list insertList; ///< Insertion actions + bool verifyLoadStoreOriginalValue(uintb mask) const; ///< Test for other STORE ops interfering with the \e original \e value + bool verifyMappedOriginalValue(uintb mask) const; ///< Test for other ops interfering with the mapped \e original \e value + uintb constructOriginalValueMask(void) const; ///< Calculate mask where 1 bits represent all the bits being preserved + bool verifyOriginalValueBits(void) const; ///< Do final check that unINSERTed bits come from the \e original \e value + bool isOverwrittenPartial(const BitFieldNodeState &state); ///< Is given state a partial field that is overwritten later + bool checkPulledOriginalValue(BitFieldNodeState &state); ///< Is this an original value defined by ZPULL or SPULL + bool checkOriginalBase(Varnode *vn); ///< Check if the given Varnode is the original LOAD or mapped value + bool isOriginalValue(BitFieldNodeState &state); ///< Is the given Varnode a (partial) copy of the original value being INSERTed into + bool addConstantWrite(BitFieldNodeState &state); ///< Create InsertRecord writing a constant into the field + bool addZeroOut(BitFieldNodeState &state); ///< Create InsertRecord writing 0 into the field + void addFieldWrite(BitFieldNodeState &state); ///< Create InsertRecord writing Varnode into the field + bool handleAndBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through INT_AND with a mask + bool handleOrBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through one branch of INT_OR + bool handleAddBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through one branch of INT_AND + bool handleLeftBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through INT_LEFT by a constant + bool handleRightBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through INT_SRIGHT by a constant + bool handleZextBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through INT_ZEXT + bool handleMultBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through INT_MULT + bool handleSubpieceBack(BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield back through SUBPIECE + bool testCallOriginal(BitFieldNodeState &state,PcodeOp *op); ///< Test if a call is producing the \e original \e value + bool processBackward(BitFieldNodeState &state); ///< Follow field back, creating an InsertRecord if possible + PcodeOp *setInsertInputs(PcodeOp *op,const InsertRecord &rec); ///< Fill-in INSERT inputs based on given InsertRecord + void addFieldShift(PcodeOp *insertOp,const InsertRecord &rec); ///< Create any shift p-code op specified by given InsertRecord + bool foldLoad(PcodeOp *loadOp) const; ///< Try to mark LOAD as part of INSERT + void foldPtrsub(PcodeOp *loadOp) const; ///< Try to mark PTRSUB as part of INSERT + void checkRedundancy(const InsertRecord &rec); ///< Check if value is getting INSERTed twice and remove second +public: + BitFieldInsertTransform(Funcdata *f,PcodeOp *op,Datatype *dt,int4 off); ///< Construct from a terminating op + bool doTrace(void); ///< Trace bitfields backward from the terminating op + void apply(void); ///< Transform recovered expressions into INSERT operations +}; + +/// \brief Class that converts bitfield pull expressions into explicit ZPULL and SPULL operations +/// +/// The doTrace() method traces forward from a root Varnode that contains bitfields to find points where +/// an individual bitfield has been fully isolated, creating an PullRecord at each point. +/// If all bits of the Varnode are accounted for, the apply() method transforms expressions based on any PullRecord. +class BitFieldPullTransform : public BitFieldTransform { + /// \brief During final transformation, this is the state maintained between processing individual PullRecords + class TransformState { + friend class BitFieldPullTransform; + vector deadScratch; ///< Scratch space for opDestroyRecursive method + Datatype *partialType; ///< Partial data-type of the root container + int4 count; ///< Number of PullRecords processed + }; + /// \brief Info about a single read by a PcodeOp that can be treated as a \e pull of 1 or more bitfields + class PullRecord { + enum { + normal = 0, ///< A single field pull + equal = 1, ///< Pull for INT_EQUAL or INT_NOTEQUAL + aborted = 2 ///< Code to indicate that the pull for the entire PcodeOp should be aborted + }; + friend class BitFieldPullTransform; + Varnode *readVn; ///< Varnode holding pulled value + PcodeOp *readOp; ///< Op reading the pulled value, or null if readVn itself is redefined + Datatype *dt; ///< Data-type associated with the pulled value + int4 type; ///< Type of pull + int4 pos; ///< Bit position of field being pulled + int4 numBits; ///< Number of bits in field being pulled + int4 leftShift; ///< Amount final field is left shifted + uintb mask; ///< Mask representing the bitfield within the Varnode + public: + PullRecord(const BitFieldNodeState &state,PcodeOp *op); ///< Construct pull record for a specific PcodeOp read + PullRecord(const BitFieldNodeState &state,PcodeOp *op,uintb val); ///< Construct record for a pull into an INT_EQUAL or INT_NOTEQUAL + PullRecord(PcodeOp *op); ///< Construct record representing an abort + bool operator<(const PullRecord &op2) const; ///< Compare records + }; + Varnode *root; ///< Value being pulled from + PcodeOp *loadOp; ///< LOAD op producing root (if non-null) + list pullList; ///< Pull actions + static bool testConsumed(Varnode *vn,const BitRange &bitField); ///< Test if all consumed bits are in the given bitfield + void handleLeftForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through INT_LEFT + void handleRightForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through INT_RIGHT + void handleAndForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through INT_AND + void handleExtForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through INT_ZEXT + void handleMultForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through INT_MULT + void handleSubpieceForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through SUBPIECE + void handleInsertForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward into INSERT + void handleLessForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield forward through INT_LESS, INT_SLESS + void handleLeastSigOp(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield into INT_ADD, INT_MULT, INT_OR, INT_XOR + void handleEqualForward(const BitFieldNodeState &state,PcodeOp *op); ///< Follow bitfield into INT_EQUAL or INT_NOTEQUAL + void processForward(BitFieldNodeState &state); ///< Follow bitfield forward one level through all its descendants + list::iterator testCompareGroup(list::iterator iter); + void applyRecord(PullRecord &rec,TransformState &state); ///< Perform transform corresponding to the given PullRecord + void applyCompareRecord(const PullRecord &rec); ///< Perform transform on an INT_EQUAL or INT_NOTEQUAL + bool foldLoad(PcodeOp *loadOp) const; ///< Try to mark LOAD as part of ZPULL or SPULL + void foldPtrsub(PcodeOp *loadOp) const; ///< Try to mark PTRSUB as part of ZPULL or SPULL +public: + BitFieldPullTransform(Funcdata *f,Varnode *r,Datatype *dt,int4 off); ///< Construct from Varnode containing bitfields + bool doTrace(void); ///< Trace bitfields from \b root to points where they are pulled + void apply(void); ///< Transform recovered expressions into ZPULL or SPULL operations +}; + +/// \brief Collapse bitfield insertion ending in a CPUI_STORE +class RuleBitFieldStore : public Rule { +public: + RuleBitFieldStore(const string &g) : Rule( g, 0, "bitfield_store") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleBitFieldStore(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + +/// \brief Collapse bitfield insertion ending in a write to a mapped Varnode +class RuleBitFieldOut : public Rule { +public: + RuleBitFieldOut(const string &g) : Rule( g, 0, "bitfield_out") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleBitFieldOut(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + +/// \brief Collapse bitfield pulls starting with a CPUI_LOAD +class RuleBitFieldLoad : public Rule { +public: + RuleBitFieldLoad(const string &g) : Rule( g, 0, "bitfield_load") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleBitFieldLoad(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + +/// \brief Collapse bitfield pulls starting with mapped Varnodes +class RuleBitFieldIn : public Rule { +public: + RuleBitFieldIn(const string &g) : Rule( g, 0, "bitfield_in") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleBitFieldIn(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + +/// \brief Simplify expressions explicitly using ZPULL and SPULL p-code ops +class RulePullAbsorb : public Rule { + int4 absorbRight(Funcdata &data,PcodeOp *rightOp,PcodeOp *pullOp); + int4 absorbRightAndCompZero(Funcdata &data,PcodeOp *rightOp,PcodeOp *andOp,PcodeOp *pullOp); + int4 absorbLeft(Funcdata &data,PcodeOp *leftOp,PcodeOp *pullOp); + int4 absorbLeftRight(Funcdata &data,PcodeOp *rightOp,PcodeOp *leftOp,PcodeOp *pullOp); + int4 absorbLeftAnd(Funcdata &data,PcodeOp *andOp,PcodeOp *leftOp,PcodeOp *pullOp); + int4 absorbAnd(Funcdata &data,PcodeOp *andOp,PcodeOp *pullOp); + int4 absorbCompare(Funcdata &data,PcodeOp *compOp,PcodeOp *leftOp,PcodeOp *pullOp); + int4 absorbExt(Funcdata &data,PcodeOp *extOp,PcodeOp *pullOp); + int4 absorbSubpiece(Funcdata &data,PcodeOp *subOp,PcodeOp *pullOp); + int4 absorbCompZero(Funcdata &data,PcodeOp *compOp,PcodeOp *pullOp); +public: + RulePullAbsorb(const string &g) : Rule( g, 0, "pull_absorb") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RulePullAbsorb(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + +/// \brief Simplify expressions explicitly using the INSERT p-code op +class RuleInsertAbsorb : public Rule { + static Varnode *leftShiftVarnode(Varnode *vn,int4 sa); ///< Test if a Varnode is left-shifted by the given amount + int4 absorbAnd(Funcdata &data,PcodeOp *andOp,PcodeOp *insertOp); + int4 absorbRightLeft(Funcdata &data,PcodeOp *nextOp,PcodeOp *rightOp,PcodeOp *insertOp); + int4 absorbShiftAdd(Funcdata &data,PcodeOp *rightOp,PcodeOp *addOp,PcodeOp *insertOp); + int4 absorbNestedAnd(Funcdata &data,PcodeOp *baseOp,PcodeOp *insertOp); +public: + RuleInsertAbsorb(const string &g) : Rule( g, 0, "insert_absorb") {} ///< Constructor + virtual Rule *clone(const ActionGroupList &grouplist) const { + if (!grouplist.contains(getGroup())) return (Rule *)0; + return new RuleInsertAbsorb(getGroup()); + } + virtual void getOpList(vector &oplist) const; + virtual int4 applyOp(PcodeOp *op,Funcdata &data); +}; + +} // End namespace ghidra +#endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc index b5e31405eb..27304436d4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/constseq.cc @@ -507,7 +507,7 @@ void HeapSequence::findDuplicateBases(vector &duplist) if (!copyRoot->isWritten()) break; op = copyRoot->getDef(); opc = op->code(); - if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB) + if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD) break; } while(op->getIn(1)->isConstant()); @@ -523,7 +523,7 @@ void HeapSequence::findDuplicateBases(vector &duplist) op = *iter; ++iter; opc = op->code(); - if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRSUB) + if (opc != CPUI_PTRSUB && opc != CPUI_INT_ADD && opc != CPUI_PTRADD) continue; if (op->getIn(0) != vn || !op->getIn(1)->isConstant()) continue; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc index a392076ad2..f9e147e628 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/coreaction.cc @@ -18,6 +18,7 @@ #include "double.hh" #include "subflow.hh" #include "constseq.hh" +#include "bitfield.hh" namespace ghidra { @@ -1540,7 +1541,7 @@ void ActionFuncLink::funcLinkOutput(FuncCallSpecs *fc,Funcdata &data) Datatype *outtype = outparam->getType(); if (outtype->getMetatype() != TYPE_VOID) { int4 sz = outparam->getSize(); - if (sz == 1 && outtype->getMetatype() == TYPE_BOOL && data.isTypeRecoveryOn()) + if (outtype->getMetatype() == TYPE_BOOL && data.isTypeRecoveryOn()) data.opMarkCalculatedBool(callop); Address addr = outparam->getAddress(); if (addr.getSpace()->getType() == IPTR_SPACEBASE) { @@ -2349,6 +2350,7 @@ int4 ActionDefaultParams::apply(Funcdata &data) void ActionSetCasts::checkPointerIssues(PcodeOp *op,Varnode *vn,Funcdata &data) { + if (op->doesSpecialPrinting()) return; Datatype *ptrtype = op->getIn(1)->getHighTypeReadFacing(op); int4 valsize = vn->getSize(); if ((ptrtype->getMetatype()!=TYPE_PTR)|| (((TypePointer *)ptrtype)->getPtrTo()->getSize() != valsize)) { @@ -3062,6 +3064,11 @@ int4 ActionMarkExplicit::baseExplicit(Varnode *vn,int4 maxref) return -1; } if (vn->hasNoDescend()) return -1; // Must have at least one descendant + if (def->code() == CPUI_INSERT) { + PcodeOp *storeOp = def->getOut()->loneDescend(); + if (storeOp == (PcodeOp *)0 || storeOp->code() != CPUI_STORE) + return -1; // INSERT output is explicit unless it is immediately used by STORE + } if (def->code() == CPUI_PTRSUB) { // A dereference Varnode *basevn = def->getIn(0); @@ -3703,6 +3710,9 @@ void ActionDeadCode::propagateConsumed(vector &worklist) if (sz > sizeof(uintb)) { // If there exists bits beyond the precision of the consume field if (sa >= 8*sizeof(uintb)) a = ~((uintb)0); // Make sure we assume one bits where we shift in unrepresented bits + else if (sa == 0) { + a = outc; + } else a = (outc >> sa) ^ ( (~((uintb)0)) << (8*sizeof(uintb)-sa)); sz = 8*sz -sa; @@ -3763,10 +3773,11 @@ void ActionDeadCode::propagateConsumed(vector &worklist) pushConsumed(b,op->getIn(2), worklist); pushConsumed(b,op->getIn(3), worklist); break; - case CPUI_EXTRACT: + case CPUI_ZPULL: + case CPUI_SPULL: a = 1; a <<= (int4)op->getIn(2)->getOffset(); - a -= 1; // Extract mask + a -= 1; // Pull mask a &= outc; // Consumed bits of mask a <<= (int4)op->getIn(1)->getOffset(); pushConsumed(a,op->getIn(0),worklist); @@ -5425,7 +5436,7 @@ void ActionDatabase::buildDefaultGroups(void) "deadcode", "typerecovery", "stackptrflow", "blockrecovery", "stackvars", "deadcontrolflow", "switchnorm", "cleanup", "splitcopy", "splitpointer", "merge", "dynamic", "casts", "analysis", - "fixateglobals", "fixateproto", "constsequence", + "fixateglobals", "fixateproto", "constsequence", "bitfields", "segment", "returnsplit", "nodejoin", "doubleload", "doubleprecis", "unreachable", "subvar", "floatprecision", "conditionalexe", "" }; @@ -5708,6 +5719,12 @@ void ActionDatabase::universalAction(Architecture *conf) actcleanup->addRule( new RuleSplitStore("splitpointer") ); actcleanup->addRule( new RuleStringCopy("constsequence")); actcleanup->addRule( new RuleStringStore("constsequence")); + actcleanup->addRule( new RuleBitFieldStore("bitfields")); + actcleanup->addRule( new RuleBitFieldOut("bitfields")); + actcleanup->addRule( new RuleBitFieldLoad("bitfields")); + actcleanup->addRule( new RuleBitFieldIn("bitfields")); + actcleanup->addRule( new RulePullAbsorb("bitfields")); + actcleanup->addRule( new RuleInsertAbsorb("bitfields")); } act->addAction( actcleanup ); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc index 05658e854e..5ba36b30eb 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/dynamic.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -57,8 +57,8 @@ const uint4 DynamicHash::transtable[] = { 0, // CAST is skipped CPUI_INT_ADD, CPUI_INT_ADD, // PTRADD and PTRSUB hash same as INT_ADD - CPUI_SEGMENTOP, CPUI_CPOOLREF, CPUI_NEW, CPUI_INSERT, CPUI_EXTRACT, - CPUI_POPCOUNT, CPUI_LZCOUNT + CPUI_SEGMENTOP, CPUI_CPOOLREF, CPUI_NEW, CPUI_INSERT, CPUI_ZPULL, + CPUI_POPCOUNT, CPUI_LZCOUNT, CPUI_SPULL }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/expression.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/expression.cc index 0ac7a61ccf..796b4104b1 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/expression.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/expression.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "expression.hh" +#include "database.hh" namespace ghidra { @@ -392,6 +393,198 @@ void AddExpression::gatherTwoTermsRoot(Varnode *root) gather(root,(uintb)1,1); } +/// Find the structure immediately containing the byte range described by the given data-type. +/// Then, given the least significant bit of the bitfield, find the structure immediately containing the bitfield, +/// which may be a different structure. +/// \param dt is the data-type describing the byte range +/// \param initByteOff is the initial offset of the byte range within the data-type (may be -1) +/// \param leastBitOff is the least significant bit of the bitfield within the byte range +/// \param isBigEndian is \b true if the data-type is stored in big endian memory +void BitFieldExpression::getStructures(Datatype *dt,int4 initByteOff,int4 leastBitOff,bool isBigEndian) + +{ + TypeStruct *encompassStruct; + theStruct = (TypeStruct *)0; + byteRangeOffset = (initByteOff < 0) ? 0 : initByteOff; + if (dt->getMetatype() == TYPE_PARTIALSTRUCT) { + TypePartialStruct *partial = (TypePartialStruct *)dt; + Datatype *structTmp = partial->getParent(); + if (structTmp->getMetatype() != TYPE_STRUCT) return; + byteRangeOffset += partial->getOffset(); + encompassStruct = (TypeStruct *)structTmp; + } + else if (dt->getMetatype() == TYPE_STRUCT) { + encompassStruct = (TypeStruct *)dt; + } + else + return; + int8 offset = byteRangeOffset; + leastBitOff /= 8; + if (isBigEndian) + offset += (dt->getSize() - leastBitOff - 1); + else + offset += leastBitOff; + theStruct = encompassStruct; + offsetToBitStruct = 0; + for(;;) { + int8 newoff; + Datatype *tmpDt = theStruct->getSubType(offset, &newoff); + if (tmpDt == (Datatype *)0) break; + if (tmpDt->getMetatype() != TYPE_STRUCT) break; + theStruct = (TypeStruct *)tmpDt; + offsetToBitStruct += (offset-newoff); + offset = newoff; + } +} + +const Varnode *BitFieldExpression::recoverStructurePointer(const Varnode *vn,int4 offset) + +{ + if (offset == 0 && theStruct->getSize() == vn->getSize()) + return vn; + else if (vn->isWritten()) { + const PcodeOp *ptrSub = vn->getDef(); + if (ptrSub->code() == CPUI_PTRSUB) { + if ((int4)ptrSub->getIn(1)->getOffset() == offset) { + return ptrSub->getIn(0); + } + } + } + + if (offset != 0) + return (const Varnode *)0; + return vn; +} + +/// \param pull is the given ZPULL or SPULL +/// \return the corresponding bitfield description or null +const TypeBitField *BitFieldExpression::getPullField(const PcodeOp *pull) + +{ + BitFieldExpression expr; + const Varnode *inVn = pull->getIn(0); + int4 leastBitOff = (int4)pull->getIn(1)->getOffset(); + int4 bitSize = (int4)pull->getIn(2)->getOffset(); + bool isBig = inVn->getSpace()->isBigEndian(); + Datatype *dt = inVn->getTypeReadFacing(pull); + expr.getStructures(dt, 0, leastBitOff, isBig); + if (expr.theStruct == (TypeStruct *)0) + return (const TypeBitField *)0; + BitRange range(expr.byteRangeOffset-expr.offsetToBitStruct,inVn->getSize(),leastBitOff,bitSize,isBig); + return expr.theStruct->findMatchingBitField(range); +} + +InsertExpression::InsertExpression(const PcodeOp *insert) + +{ + insertOp = insert; + bitfield = (const TypeBitField *)0; + const Varnode *value = insertOp->getOut(); + symbol = value->getHigh()->getSymbol(); + if (symbol == (const Symbol *)0) return; + AddrSpace *spc = value->getSpace(); + int4 leastBitOff = (int4)insertOp->getIn(2)->getOffset(); + int4 bitSize = (int4)insertOp->getIn(3)->getOffset(); + getStructures(symbol->getType(),value->getHigh()->getSymbolOffset(),leastBitOff,spc->isBigEndian()); + if (theStruct == (TypeStruct *)0) return; + BitRange range(byteRangeOffset - offsetToBitStruct,value->getSize(),leastBitOff,bitSize,spc->isBigEndian()); + bitfield = theStruct->findMatchingBitField(range); +} + +/// \param insert is the CPUI_INSERT op +/// \return a mask with a 1 wherever bits are inserted by the op +uintb InsertExpression::getRangeMask(const PcodeOp *insert) + +{ + int4 leastBitOff = (int4)insert->getIn(2)->getOffset(); + int4 bitSize = (int4)insert->getIn(3)->getOffset(); + uintb res = 0; + res = ~res; + if (bitSize < 8 * sizeof(uintb)) + res = ~(res << bitSize); + res <<= leastBitOff; + return res; +} + +/// \param insert is the CPUI_INSERT op +/// \return mask with of the least significant bits +uintb InsertExpression::getLSBMask(const PcodeOp *insert) + +{ + int4 bitSize = (int4)insert->getIn(3)->getOffset(); + uintb res = 0; + res = ~res; + if (bitSize < 8 * sizeof(uintb)) + res = ~(res << bitSize); + return res; +} + +InsertStoreExpression::InsertStoreExpression(const PcodeOp *store) + +{ + bitfield = (const TypeBitField *)0; + structPtr = (Varnode *)0; + theStruct = (TypeStruct *)0; + loadOp = (PcodeOp *)0; + const Varnode *value = store->getIn(2); + if (!value->isWritten()) return; + insertOp = value->getDef(); + if (insertOp->code() != CPUI_INSERT) return; + const Varnode *dest = insertOp->getIn(0); // dest can either be a constant or LOAD + if (dest->isWritten()) { + loadOp = dest->getDef(); + if (loadOp->code() != CPUI_LOAD) return; + } + else if (!dest->isConstant()) + return; + AddrSpace *spc = store->getIn(0)->getSpaceFromConst(); + int4 leastBitOff = (int4)insertOp->getIn(2)->getOffset(); + int4 bitSize = (int4)insertOp->getIn(3)->getOffset(); + getStructures(value->getTypeDefFacing(),0,leastBitOff,spc->isBigEndian()); + if (theStruct == (TypeStruct *)0) return; + structPtr = recoverStructurePointer(store->getIn(1), byteRangeOffset); + if (structPtr == (const Varnode *)0) return; + BitRange range(byteRangeOffset - offsetToBitStruct,value->getSize(),leastBitOff,bitSize,spc->isBigEndian()); + bitfield = theStruct->findMatchingBitField(range); +} + +PullExpression::PullExpression(const PcodeOp *pull) + +{ + pullOp = pull; + bitfield = (const TypeBitField *)0; + theStruct = (TypeStruct *)0; + loadOp = (const PcodeOp *)0; + structPtr = (const Varnode *)0; + const Varnode *inVn = pullOp->getIn(0); + AddrSpace *spc; + Datatype *dt; + int4 offset; + if (inVn->isWritten() && inVn->getDef()->code() == CPUI_LOAD) { + loadOp = inVn->getDef(); + spc = loadOp->getIn(0)->getSpaceFromConst(); + dt = inVn->getTypeReadFacing(pullOp); + offset = 0; + } + else { + symbol = inVn->getHigh()->getSymbol(); + if (symbol == (const Symbol *)0) return; + spc = inVn->getSpace(); + dt = symbol->getType(); + offset = inVn->getHigh()->getSymbolOffset(); + } + int4 leastBitOff = (int4)pullOp->getIn(1)->getOffset(); + int4 bitSize = (int4)pullOp->getIn(2)->getOffset(); + getStructures(dt,offset,leastBitOff,spc->isBigEndian()); + if (theStruct == (TypeStruct *)0) return; + if (loadOp != (const PcodeOp *)0) { + structPtr = recoverStructurePointer(loadOp->getIn(1), byteRangeOffset); + if (structPtr == (const Varnode *)0) return; + } + BitRange range(byteRangeOffset-offsetToBitStruct,inVn->getSize(),leastBitOff,bitSize,spc->isBigEndian()); + bitfield = theStruct->findMatchingBitField(range); +} + /// \brief Perform basic comparison of two given Varnodes /// /// Return @@ -559,4 +752,53 @@ bool functionalDifference(Varnode *vn1,Varnode *vn2,int4 depth) return false; } +/// \brief Back-track as far as possible from a pointer Varnode thru PTRSUB, INT_ADD, and COPY collecting offsets +/// +/// The pointer that is reached by back-tracking is returned, and any accumulated offset is passed back. +/// \param vn is the pointer Varnode +/// \param offset passes back the accumulated offset +/// \return the reached Varnode pointer +Varnode *rootPointer(Varnode *vn,uintb &offset) + +{ + offset = 0; + for(;;) { + if (!vn->isWritten()) break; + PcodeOp *op = vn->getDef(); + OpCode opc = op->code(); + if (opc == CPUI_PTRSUB) { + offset += op->getIn(1)->getOffset(); + vn = op->getIn(0); + } + else if (opc == CPUI_INT_ADD) { + Varnode *cvn = op->getIn(1); + if (!cvn->isConstant()) break; + offset += cvn->getOffset(); + vn = op->getIn(0); + } + else if (opc == CPUI_COPY) { + vn = op->getIn(0); + } + else + break; + } + return vn; +} + +/// \brief Determine if two pointer Varnodes always hold the same value +/// +/// \param vn1 is the first pointer to compare +/// \param vn2 is the second pointer to compare +bool pointerEquality(Varnode *vn1,Varnode *vn2) + +{ + uintb off1,off2; + + if (vn1 == vn2) return true; + vn1 = rootPointer(vn1,off1); + vn2 = rootPointer(vn2,off2); + if (off1 != off2) return false; + return (vn1 == vn2); +} + } // End namespace ghidra diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/expression.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/expression.hh index fedf4d8b8a..e476310255 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/expression.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/expression.hh @@ -134,6 +134,17 @@ public: const vector &getSort(void) { return sorter; } ///< Get the sorted list of references }; +/// \brief A comparison operator for ordering terms in a sum +/// +/// This is based on Varnode::termOrder which groups constants terms and +/// ignores multiplicative coefficients. +/// \param op1 is the first term to compare +/// \param op2 is the second term +/// \return \b true if the first term is less than the second +inline bool TermOrder::additiveCompare(const AdditiveEdge *op1,const AdditiveEdge *op2) { + return (-1 == op1->getVarnode()->termOrder(op2->getVarnode())); +} + /// \brief Class for lightweight matching of two additive expressions /// /// Collect (up to 2) terms along with any constants and coefficients. @@ -161,20 +172,67 @@ public: bool isEquivalent(const AddExpression &op2) const; ///< Determine if 2 expressions are equivalent }; -/// \brief A comparison operator for ordering terms in a sum +/// \brief A container for an expression manipulating a bitfield /// -/// This is based on Varnode::termOrder which groups constants terms and -/// ignores multiplicative coefficients. -/// \param op1 is the first term to compare -/// \param op2 is the second term -/// \return \b true if the first term is less than the second -inline bool TermOrder::additiveCompare(const AdditiveEdge *op1,const AdditiveEdge *op2) { - return (-1 == op1->getVarnode()->termOrder(op2->getVarnode())); -} +/// The expression centers around either a CPUI_INSERT or a CPUI_EXTRACT op but encompasses multiple p-code ops +/// that represent either a single read of or single write to a bitfield within a structure. This class recovers +/// the expected elements of the expression. The method isValid() returns \b true if the expression has the +/// expected form and can be interpreted as a single read or write. +class BitFieldExpression { +protected: + void getStructures(Datatype *dt,int4 initByteOff,int4 leastBitOff,bool isBigEndian); ///< Recover the structure(s) holding the bitfield +public: + TypeStruct *theStruct; ///< Parent structure containing the bitfield + const TypeBitField *bitfield; ///< Formal bitfield description + int4 byteRangeOffset; ///< Offset of byte range into encompassStruct + int4 offsetToBitStruct; ///< Offset of structure containing bitfield in encompassStruct + const Varnode *recoverStructurePointer(const Varnode *vn,int4 offset); ///< Recover the Varnode holding the pointer to the parent structure + bool isValid(void) const { return (bitfield != (const TypeBitField *)0); } ///< Is \b this a valid bitfield expression + static const TypeBitField *getPullField(const PcodeOp *pull); ///< Get field description corresponding to given ZPULL or SPULL +}; + +/// \brief A write to a bitfield stored in an explicit Varnode +/// +/// The INSERT output is expected to be a associated with a (partial) symbol and +/// hold data-type information. +class InsertExpression : public BitFieldExpression { +public: + const PcodeOp *insertOp; ///< INSERT op + const Symbol *symbol; ///< Structure symbol represented by the INSERT output + InsertExpression(const PcodeOp *insert); ///< Construct from an INSERT + static uintb getRangeMask(const PcodeOp *insert); ///< Get a mask representing the INSERTed range of bits + static uintb getLSBMask(const PcodeOp *insert); ///< Get mask of least significant bits matching INSERT size +}; + +/// \brief A write to a bitfield through a STORE op +/// +/// The INSERT output is expected to hold data-type info. A final STORE must be +/// present and a LOAD, recovering parts of the structure that are unaffected, may be present. +class InsertStoreExpression : public BitFieldExpression { +public: + const PcodeOp *insertOp; ///< INSERT op + const PcodeOp *loadOp; ///< LOAD op (may be null) + const Varnode *structPtr; ///< Varnode holding pointer to the parent structure + InsertStoreExpression(const PcodeOp *store); ///< Construct from a STORE +}; + +/// \brief A read of a bitfield via a ZPULL or SPULL operator +/// +/// The first input to the operator must either be a (partial) symbol or be written by a LOAD. +class PullExpression : public BitFieldExpression { +public: + const PcodeOp *pullOp; ///< ZPULL or SPULL op + const Symbol *symbol; ///< Symbol holding the structure and bitfield (may be null) + const PcodeOp *loadOp; ///< LOAD reading bytes containing the bitfield (may be null) + const Varnode *structPtr; ///< Varnode holding pointer to the parent structure + PullExpression(const PcodeOp *pull); ///< Construct from a ZPULL or SPULL +}; extern int4 functionalEqualityLevel(Varnode *vn1,Varnode *vn2,Varnode **res1,Varnode **res2); extern bool functionalEquality(Varnode *vn1,Varnode *vn2); extern bool functionalDifference(Varnode *vn1,Varnode *vn2,int4 depth); +extern Varnode *rootPointer(Varnode *vn,uintb &offset); +extern bool pointerEquality(Varnode *vn1,Varnode *vn2); } // End namespace ghidra #endif diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh index 083056d7f3..db85597a12 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata.hh @@ -292,6 +292,7 @@ public: Varnode *newExtendedConstant(int4 s,uint8 *val,PcodeOp *op); ///< Create extended precision constant void adjustInputVarnodes(const Address &addr,int4 sz); void deleteVarnode(Varnode *vn) { vbank.destroy(vn); } ///< Delete the given varnode + void destroyVarnodeRecursive(Varnode *vn); ///< Destroy Varnode (if unused) and any PcodeOp that produced it Address findDisjointCover(Varnode *vn,int4 &sz); ///< Find range covering given Varnode and any intersecting Varnodes diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc index fdaf94603a..2232a823d5 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/funcdata_varnode.cc @@ -536,6 +536,22 @@ void Funcdata::adjustInputVarnodes(const Address &addr,int4 sz) } } +/// If the Varnode has descendants or is address forced, this method does nothing. +/// Otherwise, the Varnode is destroyed as is its defining PcodeOp. Any dead inputs to the PcodeOp are +/// then destroyed recursively. +/// \param vn is the Varnode to destroy +void Funcdata::destroyVarnodeRecursive(Varnode *vn) + +{ + if (vn->isAutoLive() || !vn->hasNoDescend()) return; + if (!vn->isWritten()) { + vbank.destroy(vn); + return; + } + vector scratch; + opDestroyRecursive(vn->getDef(), scratch); +} + /// All p-code ops that read the Varnode are transformed so that they read /// a special constant instead (associate with unreachable block removal). /// \param vn is the given Varnode @@ -888,6 +904,9 @@ void Funcdata::calcNZMask(void) if (!vn->isWritten()) { if (vn->isConstant()) vn->nzm = vn->getOffset(); + else if (vn->isTypeLock() && vn->getType()->getMetatype() == TYPE_BOOL) { + vn->nzm = 1; + } else { vn->nzm = calc_mask(vn->getSize()); if (vn->isSpacebase()) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc index 1077bada52..d93a492062 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.cc @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -/* A Bison parser, made by GNU Bison 3.5.1. */ +/* A Bison parser, made by GNU Bison 3.7.4. */ /* Bison implementation for Yacc-like parsers in C @@ -49,6 +49,10 @@ /* C LALR(1) parser skeleton written by Richard Stallman, by simplifying the original so-called "semantic" parser. */ +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + /* All symbols defined below should begin with yy or YY, to avoid infringing on user name space. This should be done even for local variables, as they might otherwise be expanded by user macros. @@ -56,14 +60,11 @@ define necessary library symbols; they are noted "INFRINGES ON USER NAME SPACE" below. */ -/* Undocumented macros, especially those whose name start with YY_, - are private implementation details. Do not rely on them. */ +/* Identify Bison output, and Bison version. */ +#define YYBISON 30704 -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "3.5.1" +/* Bison version string. */ +#define YYBISON_VERSION "3.7.4" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -120,14 +121,6 @@ static CParse *parse; # endif # endif -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - /* Debug traces. */ #ifndef GRAMMARDEBUG @@ -145,25 +138,31 @@ static CParse *parse; extern int grammardebug; #endif -/* Token type. */ +/* Token kinds. */ #ifndef GRAMMARTOKENTYPE # define GRAMMARTOKENTYPE enum grammartokentype { - DOTDOTDOT = 258, - BADTOKEN = 259, - STRUCT = 260, - UNION = 261, - ENUM = 262, - DECLARATION_RESULT = 263, - PARAM_RESULT = 264, - NUMBER = 265, - IDENTIFIER = 266, - STORAGE_CLASS_SPECIFIER = 267, - TYPE_QUALIFIER = 268, - FUNCTION_SPECIFIER = 269, - TYPE_NAME = 270 + GRAMMAREMPTY = -2, + GRAMMAREOF = 0, /* "end of file" */ + GRAMMARerror = 256, /* error */ + GRAMMARUNDEF = 257, /* "invalid token" */ + DOTDOTDOT = 258, /* DOTDOTDOT */ + BADTOKEN = 259, /* BADTOKEN */ + STRUCT = 260, /* STRUCT */ + UNION = 261, /* UNION */ + ENUM = 262, /* ENUM */ + DECLARATION_RESULT = 263, /* DECLARATION_RESULT */ + PARAM_RESULT = 264, /* PARAM_RESULT */ + SCOPERES = 265, /* SCOPERES */ + NUMBER = 266, /* NUMBER */ + IDENTIFIER = 267, /* IDENTIFIER */ + STORAGE_CLASS_SPECIFIER = 268, /* STORAGE_CLASS_SPECIFIER */ + TYPE_QUALIFIER = 269, /* TYPE_QUALIFIER */ + FUNCTION_SPECIFIER = 270, /* FUNCTION_SPECIFIER */ + TYPE_NAME = 271 /* TYPE_NAME */ }; + typedef enum grammartokentype grammartoken_kind_t; #endif /* Value type. */ @@ -195,6 +194,68 @@ extern GRAMMARSTYPE grammarlval; int grammarparse (void); +/* Symbol kind. */ +enum yysymbol_kind_t +{ + YYSYMBOL_YYEMPTY = -2, + YYSYMBOL_YYEOF = 0, /* "end of file" */ + YYSYMBOL_YYerror = 1, /* error */ + YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ + YYSYMBOL_DOTDOTDOT = 3, /* DOTDOTDOT */ + YYSYMBOL_BADTOKEN = 4, /* BADTOKEN */ + YYSYMBOL_STRUCT = 5, /* STRUCT */ + YYSYMBOL_UNION = 6, /* UNION */ + YYSYMBOL_ENUM = 7, /* ENUM */ + YYSYMBOL_DECLARATION_RESULT = 8, /* DECLARATION_RESULT */ + YYSYMBOL_PARAM_RESULT = 9, /* PARAM_RESULT */ + YYSYMBOL_SCOPERES = 10, /* SCOPERES */ + YYSYMBOL_NUMBER = 11, /* NUMBER */ + YYSYMBOL_IDENTIFIER = 12, /* IDENTIFIER */ + YYSYMBOL_STORAGE_CLASS_SPECIFIER = 13, /* STORAGE_CLASS_SPECIFIER */ + YYSYMBOL_TYPE_QUALIFIER = 14, /* TYPE_QUALIFIER */ + YYSYMBOL_FUNCTION_SPECIFIER = 15, /* FUNCTION_SPECIFIER */ + YYSYMBOL_TYPE_NAME = 16, /* TYPE_NAME */ + YYSYMBOL_17_ = 17, /* ';' */ + YYSYMBOL_18_ = 18, /* ',' */ + YYSYMBOL_19_ = 19, /* '{' */ + YYSYMBOL_20_ = 20, /* '}' */ + YYSYMBOL_21_ = 21, /* ':' */ + YYSYMBOL_22_ = 22, /* '=' */ + YYSYMBOL_23_ = 23, /* '(' */ + YYSYMBOL_24_ = 24, /* ')' */ + YYSYMBOL_25_ = 25, /* '[' */ + YYSYMBOL_26_ = 26, /* ']' */ + YYSYMBOL_27_ = 27, /* '*' */ + YYSYMBOL_YYACCEPT = 28, /* $accept */ + YYSYMBOL_document = 29, /* document */ + YYSYMBOL_declaration = 30, /* declaration */ + YYSYMBOL_declaration_specifiers = 31, /* declaration_specifiers */ + YYSYMBOL_init_declarator_list = 32, /* init_declarator_list */ + YYSYMBOL_init_declarator = 33, /* init_declarator */ + YYSYMBOL_type_specifier = 34, /* type_specifier */ + YYSYMBOL_struct_or_union_specifier = 35, /* struct_or_union_specifier */ + YYSYMBOL_struct_declaration_list = 36, /* struct_declaration_list */ + YYSYMBOL_struct_declaration = 37, /* struct_declaration */ + YYSYMBOL_specifier_qualifier_list = 38, /* specifier_qualifier_list */ + YYSYMBOL_struct_declarator_list = 39, /* struct_declarator_list */ + YYSYMBOL_struct_declarator = 40, /* struct_declarator */ + YYSYMBOL_enum_specifier = 41, /* enum_specifier */ + YYSYMBOL_enumerator_list = 42, /* enumerator_list */ + YYSYMBOL_enumerator = 43, /* enumerator */ + YYSYMBOL_declarator = 44, /* declarator */ + YYSYMBOL_var_identifier = 45, /* var_identifier */ + YYSYMBOL_direct_declarator = 46, /* direct_declarator */ + YYSYMBOL_pointer = 47, /* pointer */ + YYSYMBOL_type_qualifier_list = 48, /* type_qualifier_list */ + YYSYMBOL_parameter_type_list = 49, /* parameter_type_list */ + YYSYMBOL_parameter_list = 50, /* parameter_list */ + YYSYMBOL_parameter_declaration = 51, /* parameter_declaration */ + YYSYMBOL_abstract_declarator = 52, /* abstract_declarator */ + YYSYMBOL_direct_abstract_declarator = 53, /* direct_abstract_declarator */ + YYSYMBOL_assignment_expression = 54 /* assignment_expression */ +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + @@ -294,6 +355,7 @@ typedef int yytype_uint16; #define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + /* Stored state numbers (used for stacks). */ typedef yytype_int8 yy_state_t; @@ -312,6 +374,7 @@ typedef int yy_state_fast_t; # endif #endif + #ifndef YY_ATTRIBUTE_PURE # if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) # define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) @@ -369,7 +432,7 @@ typedef int yy_state_fast_t; #define YY_ASSERT(E) ((void) (0 && (E))) -#if ! defined yyoverflow || YYERROR_VERBOSE +#if !defined yyoverflow /* The parser invokes alloca or malloc; define the necessary symbols. */ @@ -434,8 +497,7 @@ void free (void *); /* INFRINGES ON USER NAME SPACE */ # endif # endif # endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - +#endif /* !defined yyoverflow */ #if (! defined yyoverflow \ && (! defined __cplusplus \ @@ -500,25 +562,27 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 18 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 155 +#define YYLAST 157 /* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 26 +#define YYNTOKENS 28 /* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 26 +#define YYNNTS 27 /* YYNRULES -- Number of rules. */ -#define YYNRULES 71 +#define YYNRULES 74 /* YYNSTATES -- Number of states. */ -#define YYNSTATES 115 +#define YYNSTATES 120 -#define YYUNDEFTOK 2 -#define YYMAXUTOK 270 +/* YYMAXUTOK -- Last valid token kind. */ +#define YYMAXUTOK 271 /* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM as returned by yylex, with out-of-bounds checking. */ -#define YYTRANSLATE(YYX) \ - (0 <= (YYX) && (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) /* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM as returned by yylex. */ @@ -528,15 +592,15 @@ static const yytype_int8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 21, 22, 25, 2, 17, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, - 2, 20, 2, 2, 2, 2, 2, 2, 2, 2, + 23, 24, 27, 2, 18, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 21, 17, + 2, 22, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 23, 2, 24, 2, 2, 2, 2, 2, 2, + 2, 25, 2, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 18, 2, 19, 2, 2, 2, 2, + 2, 2, 2, 19, 2, 20, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -551,57 +615,72 @@ static const yytype_int8 yytranslate[] = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15 + 15, 16 }; #if GRAMMARDEBUG /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_uint8 yyrline[] = { - 0, 62, 62, 63, 67, 68, 72, 73, 74, 75, - 76, 77, 78, 79, 83, 84, 88, 93, 94, 95, - 99, 100, 101, 102, 103, 104, 108, 109, 113, 117, - 118, 119, 120, 124, 125, 129, 134, 135, 136, 137, - 138, 142, 143, 147, 148, 152, 153, 157, 158, 159, - 160, 162, 167, 168, 169, 170, 174, 175, 179, 180, - 184, 185, 189, 190, 191, 195, 196, 197, 201, 203, - 205, 209 + 0, 63, 63, 64, 68, 69, 73, 74, 75, 76, + 77, 78, 79, 80, 84, 85, 89, 94, 95, 96, + 100, 101, 102, 103, 104, 105, 109, 110, 114, 118, + 119, 120, 121, 125, 126, 130, 131, 135, 136, 137, + 138, 139, 143, 144, 148, 149, 153, 154, 158, 159, + 163, 164, 165, 166, 168, 173, 174, 175, 176, 180, + 181, 185, 186, 190, 191, 195, 196, 197, 201, 202, + 203, 207, 209, 211, 215 }; #endif -#if GRAMMARDEBUG || YYERROR_VERBOSE || 0 +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if GRAMMARDEBUG || 0 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. First, the terminals, then, starting at YYNTOKENS, nonterminals. */ static const char *const yytname[] = { - "$end", "error", "$undefined", "DOTDOTDOT", "BADTOKEN", "STRUCT", - "UNION", "ENUM", "DECLARATION_RESULT", "PARAM_RESULT", "NUMBER", - "IDENTIFIER", "STORAGE_CLASS_SPECIFIER", "TYPE_QUALIFIER", - "FUNCTION_SPECIFIER", "TYPE_NAME", "';'", "','", "'{'", "'}'", "'='", - "'('", "')'", "'['", "']'", "'*'", "$accept", "document", "declaration", + "\"end of file\"", "error", "\"invalid token\"", "DOTDOTDOT", + "BADTOKEN", "STRUCT", "UNION", "ENUM", "DECLARATION_RESULT", + "PARAM_RESULT", "SCOPERES", "NUMBER", "IDENTIFIER", + "STORAGE_CLASS_SPECIFIER", "TYPE_QUALIFIER", "FUNCTION_SPECIFIER", + "TYPE_NAME", "';'", "','", "'{'", "'}'", "':'", "'='", "'('", "')'", + "'['", "']'", "'*'", "$accept", "document", "declaration", "declaration_specifiers", "init_declarator_list", "init_declarator", "type_specifier", "struct_or_union_specifier", "struct_declaration_list", "struct_declaration", "specifier_qualifier_list", "struct_declarator_list", "struct_declarator", "enum_specifier", - "enumerator_list", "enumerator", "declarator", "direct_declarator", - "pointer", "type_qualifier_list", "parameter_type_list", - "parameter_list", "parameter_declaration", "abstract_declarator", - "direct_abstract_declarator", "assignment_expression", YY_NULLPTR + "enumerator_list", "enumerator", "declarator", "var_identifier", + "direct_declarator", "pointer", "type_qualifier_list", + "parameter_type_list", "parameter_list", "parameter_declaration", + "abstract_declarator", "direct_abstract_declarator", + "assignment_expression", YY_NULLPTR }; + +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} #endif -# ifdef YYPRINT +#ifdef YYPRINT /* YYTOKNUM[NUM] -- (External) token number corresponding to the (internal) symbol number NUM (which must be that of a token). */ static const yytype_int16 yytoknum[] = { 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 59, 44, 123, 125, - 61, 40, 41, 91, 93, 42 + 265, 266, 267, 268, 269, 270, 271, 59, 44, 123, + 125, 58, 61, 40, 41, 91, 93, 42 }; -# endif +#endif -#define YYPACT_NINF (-71) +#define YYPACT_NINF (-73) #define yypact_value_is_default(Yyn) \ ((Yyn) == YYPACT_NINF) @@ -615,18 +694,18 @@ static const yytype_int16 yytoknum[] = STATE-NUM. */ static const yytype_int16 yypact[] = { - 114, 102, 102, 16, 41, 59, 108, 102, 102, 102, - -71, -71, 62, 102, -71, -71, 9, -71, -71, 31, - 105, 46, 105, 54, 68, -71, -71, -71, -71, -71, - 12, 2, 116, -71, -71, 104, 70, -71, 9, -71, - 71, -71, 107, 105, 105, 105, 35, -71, 12, 105, - 38, 68, 65, 39, -71, 91, -71, -71, 11, -71, - 12, 102, 8, 104, 117, 107, 102, 128, 56, -71, - -71, -71, -71, 118, -71, -71, 61, -71, 112, 130, - 3, -71, -71, -71, -71, -71, 119, 76, -71, -71, - 111, 120, -71, 121, 122, -71, -71, 12, -71, 36, - -71, -71, -71, -71, -71, 83, 123, -71, -71, -71, - -71, -71, -71, -71, -71 + 119, 106, 106, 21, 42, 78, 105, 106, 106, 106, + -73, -73, 11, 106, -73, -73, 48, -73, -73, 29, + 109, 32, 109, 62, 18, -73, -73, -73, -73, -73, + 61, 8, 77, -73, -73, 66, 57, 2, -73, 48, + -73, 73, -73, 107, 109, 109, 109, 36, -73, 61, + 109, 39, 18, 64, 113, -73, 79, -73, -73, 10, + -73, 61, 95, 106, 4, 57, 111, 107, 106, 130, + 58, -73, -73, -73, -73, 120, -73, 121, 63, -73, + 116, 132, 37, -73, -73, -73, -73, -73, -73, 122, + 100, -73, -73, 115, 118, -73, 123, 124, -73, -73, + 61, 134, -73, 46, -73, -73, -73, -73, -73, 86, + 125, -73, -73, -73, -73, -73, -73, -73, -73, -73 }; /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. @@ -635,33 +714,33 @@ static const yytype_int16 yypact[] = static const yytype_int8 yydefact[] = { 0, 0, 0, 0, 0, 0, 0, 6, 8, 9, - 17, 2, 0, 7, 18, 19, 63, 3, 1, 22, - 0, 25, 0, 40, 0, 10, 12, 13, 47, 4, - 0, 52, 0, 14, 16, 45, 0, 11, 0, 62, - 65, 64, 66, 0, 31, 29, 0, 26, 0, 0, - 0, 0, 43, 0, 41, 0, 56, 54, 53, 5, - 0, 0, 0, 46, 0, 67, 0, 0, 0, 32, - 30, 20, 27, 0, 33, 35, 0, 23, 0, 0, - 0, 37, 48, 57, 55, 15, 0, 58, 60, 71, - 0, 0, 68, 0, 0, 21, 28, 0, 24, 0, - 36, 44, 39, 42, 51, 0, 0, 50, 70, 69, - 34, 38, 59, 61, 49 + 17, 2, 0, 7, 18, 19, 66, 3, 1, 22, + 0, 25, 0, 41, 0, 10, 12, 13, 48, 4, + 0, 55, 0, 14, 16, 50, 46, 0, 11, 0, + 65, 68, 67, 69, 0, 31, 29, 0, 26, 0, + 0, 0, 0, 44, 0, 42, 0, 59, 57, 56, + 5, 0, 0, 0, 0, 47, 0, 70, 0, 0, + 0, 32, 30, 20, 27, 0, 33, 35, 0, 23, + 0, 0, 0, 38, 51, 60, 58, 15, 49, 0, + 61, 63, 74, 0, 0, 71, 0, 0, 21, 28, + 0, 0, 24, 0, 37, 45, 40, 43, 54, 0, + 0, 53, 73, 72, 34, 36, 39, 62, 64, 52 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int8 yypgoto[] = { - -71, -71, -71, 93, -71, 82, -1, -71, -11, -37, - 92, -71, 48, -71, 97, -70, -13, 63, -12, 87, - 84, -71, 0, 113, 115, -62 + -73, -73, -73, 97, -73, 87, -1, -73, -11, -38, + 94, -73, 49, -73, 101, -72, -13, -73, -21, -12, + 88, 89, -73, 0, 117, 114, -64 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int8 yydefgoto[] = { - -1, 3, 11, 16, 32, 33, 45, 14, 46, 47, - 48, 73, 74, 15, 53, 54, 34, 35, 36, 58, - 86, 87, 88, 41, 42, 91 + -1, 3, 11, 16, 32, 33, 46, 14, 47, 48, + 49, 75, 76, 15, 54, 55, 34, 35, 36, 37, + 59, 89, 90, 91, 42, 43, 94 }; /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If @@ -669,73 +748,73 @@ static const yytype_int8 yydefgoto[] = number is the opposite. If YYTABLE_NINF, syntax error. */ static const yytype_int8 yytable[] = { - 13, 13, 17, 39, 40, 94, 13, 13, 13, 72, - 103, 50, 13, 72, 52, 56, 18, 55, 89, 57, - 28, 56, 102, 28, 83, 55, 40, 31, 106, 103, - 38, 72, 68, 30, 31, 75, 31, 31, 76, 72, - 4, 5, 6, 4, 5, 6, 84, 52, 44, 43, - 10, 44, 19, 10, 71, 111, 80, 77, 81, 20, - 13, 4, 5, 6, 49, 13, 4, 5, 6, 44, - 21, 10, 51, 28, 44, 95, 10, 22, 29, 52, - 98, 28, 28, 30, 75, 79, 112, 31, 4, 5, - 6, 30, 38, 105, 12, 7, 8, 9, 10, 63, - 25, 26, 27, 63, 13, 113, 37, 4, 5, 6, - 4, 5, 6, 82, 7, 8, 9, 10, 44, 23, - 10, 89, 1, 2, 83, 61, 24, 62, 66, 99, - 67, 100, 59, 60, 96, 97, 69, 70, 89, 92, - 101, 104, 85, 108, 107, 110, 109, 114, 78, 90, - 93, 64, 0, 0, 0, 65 + 13, 13, 17, 40, 41, 97, 13, 13, 13, 74, + 107, 51, 13, 74, 28, 92, 65, 56, 57, 58, + 65, 18, 57, 28, 85, 30, 56, 41, 29, 110, + 53, 107, 74, 70, 30, 31, 77, 31, 31, 78, + 74, 4, 5, 6, 4, 5, 6, 86, 44, 53, + 45, 50, 10, 45, 19, 10, 73, 106, 53, 79, + 28, 20, 13, 4, 5, 6, 116, 13, 4, 5, + 6, 39, 45, 28, 10, 31, 62, 45, 98, 10, + 63, 52, 64, 102, 30, 28, 81, 77, 31, 117, + 21, 4, 5, 6, 60, 61, 39, 22, 12, 7, + 8, 9, 10, 84, 25, 26, 27, 88, 13, 118, + 38, 4, 5, 6, 4, 5, 6, 23, 109, 7, + 8, 9, 10, 45, 24, 10, 92, 1, 2, 85, + 68, 82, 69, 83, 103, 95, 104, 99, 100, 71, + 72, 92, 101, 105, 111, 115, 108, 112, 87, 114, + 113, 119, 93, 80, 0, 67, 66, 96 }; static const yytype_int8 yycheck[] = { - 1, 2, 2, 16, 16, 67, 7, 8, 9, 46, - 80, 22, 13, 50, 11, 13, 0, 30, 10, 31, - 11, 13, 19, 11, 13, 38, 38, 25, 90, 99, - 21, 68, 43, 21, 25, 48, 25, 25, 49, 76, - 5, 6, 7, 5, 6, 7, 58, 11, 13, 18, - 15, 13, 11, 15, 19, 19, 17, 19, 19, 18, - 61, 5, 6, 7, 18, 66, 5, 6, 7, 13, - 11, 15, 18, 11, 13, 19, 15, 18, 16, 11, - 19, 11, 11, 21, 97, 20, 3, 25, 5, 6, - 7, 21, 21, 17, 1, 12, 13, 14, 15, 36, - 7, 8, 9, 40, 105, 105, 13, 5, 6, 7, - 5, 6, 7, 22, 12, 13, 14, 15, 13, 11, - 15, 10, 8, 9, 13, 21, 18, 23, 21, 17, - 23, 19, 16, 17, 16, 17, 44, 45, 10, 22, - 10, 22, 60, 22, 24, 97, 24, 24, 51, 62, - 66, 38, -1, -1, -1, 40 + 1, 2, 2, 16, 16, 69, 7, 8, 9, 47, + 82, 22, 13, 51, 12, 11, 37, 30, 14, 31, + 41, 0, 14, 12, 14, 23, 39, 39, 17, 93, + 12, 103, 70, 44, 23, 27, 49, 27, 27, 50, + 78, 5, 6, 7, 5, 6, 7, 59, 19, 12, + 14, 19, 16, 14, 12, 16, 20, 20, 12, 20, + 12, 19, 63, 5, 6, 7, 20, 68, 5, 6, + 7, 23, 14, 12, 16, 27, 10, 14, 20, 16, + 23, 19, 25, 20, 23, 12, 22, 100, 27, 3, + 12, 5, 6, 7, 17, 18, 23, 19, 1, 13, + 14, 15, 16, 24, 7, 8, 9, 12, 109, 109, + 13, 5, 6, 7, 5, 6, 7, 12, 18, 13, + 14, 15, 16, 14, 19, 16, 11, 8, 9, 14, + 23, 18, 25, 20, 18, 24, 20, 17, 18, 45, + 46, 11, 21, 11, 26, 11, 24, 24, 61, 100, + 26, 26, 64, 52, -1, 41, 39, 68 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing symbol of state STATE-NUM. */ static const yytype_int8 yystos[] = { - 0, 8, 9, 27, 5, 6, 7, 12, 13, 14, - 15, 28, 29, 32, 33, 39, 29, 48, 0, 11, - 18, 11, 18, 11, 18, 29, 29, 29, 11, 16, - 21, 25, 30, 31, 42, 43, 44, 29, 21, 42, - 44, 49, 50, 18, 13, 32, 34, 35, 36, 18, - 34, 18, 11, 40, 41, 42, 13, 44, 45, 16, - 17, 21, 23, 43, 49, 50, 21, 23, 34, 36, - 36, 19, 35, 37, 38, 42, 34, 19, 40, 20, - 17, 19, 22, 13, 44, 31, 46, 47, 48, 10, - 45, 51, 22, 46, 51, 19, 16, 17, 19, 17, - 19, 10, 19, 41, 22, 17, 51, 24, 22, 24, - 38, 19, 3, 48, 24 + 0, 8, 9, 29, 5, 6, 7, 13, 14, 15, + 16, 30, 31, 34, 35, 41, 31, 51, 0, 12, + 19, 12, 19, 12, 19, 31, 31, 31, 12, 17, + 23, 27, 32, 33, 44, 45, 46, 47, 31, 23, + 44, 47, 52, 53, 19, 14, 34, 36, 37, 38, + 19, 36, 19, 12, 42, 43, 44, 14, 47, 48, + 17, 18, 10, 23, 25, 46, 52, 53, 23, 25, + 36, 38, 38, 20, 37, 39, 40, 44, 36, 20, + 42, 22, 18, 20, 24, 14, 47, 33, 12, 49, + 50, 51, 11, 48, 54, 24, 49, 54, 20, 17, + 18, 21, 20, 18, 20, 11, 20, 43, 24, 18, + 54, 26, 24, 26, 40, 11, 20, 3, 51, 26 }; /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const yytype_int8 yyr1[] = { - 0, 26, 27, 27, 28, 28, 29, 29, 29, 29, - 29, 29, 29, 29, 30, 30, 31, 32, 32, 32, - 33, 33, 33, 33, 33, 33, 34, 34, 35, 36, - 36, 36, 36, 37, 37, 38, 39, 39, 39, 39, - 39, 40, 40, 41, 41, 42, 42, 43, 43, 43, - 43, 43, 44, 44, 44, 44, 45, 45, 46, 46, - 47, 47, 48, 48, 48, 49, 49, 49, 50, 50, - 50, 51 + 0, 28, 29, 29, 30, 30, 31, 31, 31, 31, + 31, 31, 31, 31, 32, 32, 33, 34, 34, 34, + 35, 35, 35, 35, 35, 35, 36, 36, 37, 38, + 38, 38, 38, 39, 39, 40, 40, 41, 41, 41, + 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, + 46, 46, 46, 46, 46, 47, 47, 47, 47, 48, + 48, 49, 49, 50, 50, 51, 51, 51, 52, 52, + 52, 53, 53, 53, 54 }; /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ @@ -744,18 +823,18 @@ static const yytype_int8 yyr2[] = 0, 2, 2, 2, 2, 3, 1, 1, 1, 1, 2, 2, 2, 2, 1, 3, 1, 1, 1, 1, 4, 5, 2, 4, 5, 2, 1, 2, 3, 1, - 2, 1, 2, 1, 3, 1, 5, 4, 6, 5, - 2, 1, 3, 1, 3, 1, 2, 1, 3, 5, - 4, 4, 1, 2, 2, 3, 1, 2, 1, 3, - 1, 3, 2, 1, 2, 1, 1, 2, 3, 4, - 4, 1 + 2, 1, 2, 1, 3, 1, 3, 5, 4, 6, + 5, 2, 1, 3, 1, 3, 1, 2, 1, 3, + 1, 3, 5, 4, 4, 1, 2, 2, 3, 1, + 2, 1, 3, 1, 3, 2, 1, 2, 1, 1, + 2, 3, 4, 4, 1 }; +enum { YYENOMEM = -2 }; + #define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 +#define yyclearin (yychar = GRAMMAREMPTY) #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab @@ -766,7 +845,7 @@ static const yytype_int8 yyr2[] = #define YYBACKUP(Token, Value) \ do \ - if (yychar == YYEMPTY) \ + if (yychar == GRAMMAREMPTY) \ { \ yychar = (Token); \ yylval = (Value); \ @@ -781,10 +860,9 @@ static const yytype_int8 yyr2[] = } \ while (0) -/* Error token number */ -#define YYTERROR 1 -#define YYERRCODE 256 - +/* Backward compatibility with an undocumented macro. + Use GRAMMARerror or GRAMMARUNDEF. */ +#define YYERRCODE GRAMMARUNDEF /* Enable debugging if requested. */ @@ -802,18 +880,18 @@ do { \ } while (0) /* This macro is provided for backward compatibility. */ -#ifndef YY_LOCATION_PRINT -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -#endif +# ifndef YY_LOCATION_PRINT +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \ do { \ if (yydebug) \ { \ YYFPRINTF (stderr, "%s ", Title); \ yy_symbol_print (stderr, \ - Type, Value); \ + Kind, Value); \ YYFPRINTF (stderr, "\n"); \ } \ } while (0) @@ -824,18 +902,19 @@ do { \ `-----------------------------------*/ static void -yy_symbol_value_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep) +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep) { FILE *yyoutput = yyo; YYUSE (yyoutput); if (!yyvaluep) return; # ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyo, yytoknum[yytype], *yyvaluep); + if (yykind < YYNTOKENS) + YYPRINT (yyo, yytoknum[yykind], *yyvaluep); # endif YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - YYUSE (yytype); + YYUSE (yykind); YY_IGNORE_MAYBE_UNINITIALIZED_END } @@ -845,12 +924,13 @@ yy_symbol_value_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep) `---------------------------*/ static void -yy_symbol_print (FILE *yyo, int yytype, YYSTYPE const * const yyvaluep) +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep) { YYFPRINTF (yyo, "%s %s (", - yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); - yy_symbol_value_print (yyo, yytype, yyvaluep); + yy_symbol_value_print (yyo, yykind, yyvaluep); YYFPRINTF (yyo, ")"); } @@ -883,7 +963,8 @@ do { \ `------------------------------------------------*/ static void -yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, int yyrule) +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, + int yyrule) { int yylno = yyrline[yyrule]; int yynrhs = yyr2[yyrule]; @@ -895,9 +976,8 @@ yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, int yyrule) { YYFPRINTF (stderr, " $%d = ", yyi + 1); yy_symbol_print (stderr, - yystos[+yyssp[yyi + 1 - yynrhs]], - &yyvsp[(yyi + 1) - (yynrhs)] - ); + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)]); YYFPRINTF (stderr, "\n"); } } @@ -912,8 +992,8 @@ do { \ multiple parsers can coexist. */ int yydebug; #else /* !GRAMMARDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) # define YY_STACK_PRINT(Bottom, Top) # define YY_REDUCE_PRINT(Rule) #endif /* !GRAMMARDEBUG */ @@ -936,259 +1016,30 @@ int yydebug; #endif -#if YYERROR_VERBOSE -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen(S) (YY_CAST (YYPTRDIFF_T, strlen (S))) -# else -/* Return the length of YYSTR. */ -static YYPTRDIFF_T -yystrlen (const char *yystr) -{ - YYPTRDIFF_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -static char * -yystpcpy (char *yydest, const char *yysrc) -{ - char *yyd = yydest; - const char *yys = yysrc; - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYPTRDIFF_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYPTRDIFF_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - else - goto append; - - append: - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (yyres) - return yystpcpy (yyres, yystr) - yyres; - else - return yystrlen (yystr); -} -# endif - -/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message - about the unexpected token YYTOKEN for the state stack whose top is - YYSSP. - - Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is - not large enough to hold the message. In that case, also set - *YYMSG_ALLOC to the required number of bytes. Return 2 if the - required number of bytes is too large to store. */ -static int -yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, - yy_state_t *yyssp, int yytoken) -{ - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - /* Internationalized format string. */ - const char *yyformat = YY_NULLPTR; - /* Arguments of yyformat: reported tokens (one for the "unexpected", - one per "expected"). */ - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - /* Actual size of YYARG. */ - int yycount = 0; - /* Cumulated lengths of YYARG. */ - YYPTRDIFF_T yysize = 0; - - /* There are many possibilities here to consider: - - If this state is a consistent state with a default action, then - the only way this function was invoked is if the default action - is an error action. In that case, don't check for expected - tokens because there are none. - - The only way there can be no lookahead present (in yychar) is if - this state is a consistent state with a default action. Thus, - detecting the absence of a lookahead is sufficient to determine - that there is no unexpected or expected token to report. In that - case, just report a simple "syntax error". - - Don't assume there isn't a lookahead just because this state is a - consistent state with a default action. There might have been a - previous inconsistent state, consistent state with a non-default - action, or user semantic action that manipulated yychar. - - Of course, the expected token list depends on states to have - correct lookahead information, and it depends on the parser not - to perform extra reductions after fetching a lookahead from the - scanner and before detecting a syntax error. Thus, state merging - (from LALR or IELR) and default reductions corrupt the expected - token list. However, the list is correct for canonical LR with - one exception: it will still contain any token that will not be - accepted due to an error action in a later state. - */ - if (yytoken != YYEMPTY) - { - int yyn = yypact[+*yyssp]; - YYPTRDIFF_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); - yysize = yysize0; - yyarg[yycount++] = yytname[yytoken]; - if (!yypact_value_is_default (yyn)) - { - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. In other words, skip the first -YYN actions for - this state because they are default actions. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yyx; - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR - && !yytable_value_is_error (yytable[yyx + yyn])) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - break; - } - yyarg[yycount++] = yytname[yyx]; - { - YYPTRDIFF_T yysize1 - = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); - if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) - yysize = yysize1; - else - return 2; - } - } - } - } - - switch (yycount) - { -# define YYCASE_(N, S) \ - case N: \ - yyformat = S; \ - break - default: /* Avoid compiler warnings. */ - YYCASE_(0, YY_("syntax error")); - YYCASE_(1, YY_("syntax error, unexpected %s")); - YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); - YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); - YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); - YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); -# undef YYCASE_ - } - - { - /* Don't count the "%s"s in the final size, but reserve room for - the terminator. */ - YYPTRDIFF_T yysize1 = yysize + (yystrlen (yyformat) - 2 * yycount) + 1; - if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) - yysize = yysize1; - else - return 2; - } - - if (*yymsg_alloc < yysize) - { - *yymsg_alloc = 2 * yysize; - if (! (yysize <= *yymsg_alloc - && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) - *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; - return 1; - } - - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - { - char *yyp = *yymsg; - int yyi = 0; - while ((*yyp = *yyformat) != '\0') - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyformat += 2; - } - else - { - ++yyp; - ++yyformat; - } - } - return 0; -} -#endif /* YYERROR_VERBOSE */ /*-----------------------------------------------. | Release the memory associated to this symbol. | `-----------------------------------------------*/ static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep) +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep) { YYUSE (yyvaluep); if (!yymsg) yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp); YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - YYUSE (yytype); + YYUSE (yykind); YY_IGNORE_MAYBE_UNINITIALIZED_END } - - -/* The lookahead symbol. */ +/* Lookahead token kind. */ int yychar; /* The semantic value of the lookahead symbol. */ @@ -1197,6 +1048,8 @@ YYSTYPE yylval; int yynerrs; + + /*----------. | yyparse. | `----------*/ @@ -1204,43 +1057,36 @@ int yynerrs; int yyparse (void) { - yy_state_fast_t yystate; + yy_state_fast_t yystate = 0; /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; + int yyerrstatus = 0; - /* The stacks and their tools: - 'yyss': related to states. - 'yyvs': related to semantic values. - - Refer to the stacks through separate pointers, to allow yyoverflow + /* Refer to the stacks through separate pointers, to allow yyoverflow to reallocate them elsewhere. */ - /* The state stack. */ + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ yy_state_t yyssa[YYINITDEPTH]; - yy_state_t *yyss; - yy_state_t *yyssp; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; - /* The semantic value stack. */ + /* The semantic value stack: array, bottom, top. */ YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs; - YYSTYPE *yyvsp; - - YYPTRDIFF_T yystacksize; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; int yyn; + /* The return value of yyparse. */ int yyresult; - /* Lookahead token as an internal (translated) token number. */ - int yytoken = 0; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; /* The variables used to return semantic value and location from the action routines. */ YYSTYPE yyval; -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYPTRDIFF_T yymsg_alloc = sizeof yymsgbuf; -#endif + #define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) @@ -1248,16 +1094,9 @@ yyparse (void) Keep to zero when no symbol should be popped. */ int yylen = 0; - yyssp = yyss = yyssa; - yyvsp = yyvs = yyvsa; - yystacksize = YYINITDEPTH; - YYDPRINTF ((stderr, "Starting parse\n")); - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ + yychar = GRAMMAREMPTY; /* Cause a token to be read. */ goto yysetstate; @@ -1279,6 +1118,7 @@ yysetstate: YY_IGNORE_USELESS_CAST_BEGIN *yyssp = YY_CAST (yy_state_t, yystate); YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp); if (yyss + yystacksize - 1 <= yyssp) #if !defined yyoverflow && !defined YYSTACK_RELOCATE @@ -1324,7 +1164,7 @@ yysetstate: goto yyexhaustedlab; YYSTACK_RELOCATE (yyss_alloc, yyss); YYSTACK_RELOCATE (yyvs_alloc, yyvs); -# undef YYSTACK_RELOCATE +# undef YYSTACK_RELOCATE if (yyss1 != yyssa) YYSTACK_FREE (yyss1); } @@ -1363,18 +1203,29 @@ yybackup: /* Not known => get a lookahead token if don't already have one. */ - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == GRAMMAREMPTY) { - YYDPRINTF ((stderr, "Reading a token: ")); + YYDPRINTF ((stderr, "Reading a token\n")); yychar = yylex (); } - if (yychar <= YYEOF) + if (yychar <= GRAMMAREOF) { - yychar = yytoken = YYEOF; + yychar = GRAMMAREOF; + yytoken = YYSYMBOL_YYEOF; YYDPRINTF ((stderr, "Now at end of input.\n")); } + else if (yychar == GRAMMARerror) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = GRAMMARUNDEF; + yytoken = YYSYMBOL_YYerror; + goto yyerrlab1; + } else { yytoken = YYTRANSLATE (yychar); @@ -1408,7 +1259,7 @@ yybackup: YY_IGNORE_MAYBE_UNINITIALIZED_END /* Discard the shifted token. */ - yychar = YYEMPTY; + yychar = GRAMMAREMPTY; goto yynewstate; @@ -1443,283 +1294,295 @@ yyreduce: YY_REDUCE_PRINT (yyn); switch (yyn) { - case 2: + case 2: /* document: DECLARATION_RESULT declaration */ { parse->setResultDeclarations((yyvsp[0].declist)); } break; - case 3: + case 3: /* document: PARAM_RESULT parameter_declaration */ { vector *res = parse->newVecDeclarator(); res->push_back((yyvsp[0].dec)); parse->setResultDeclarations(res); } break; - case 4: + case 4: /* declaration: declaration_specifiers ';' */ { (yyval.declist) = parse->mergeSpecDecVec((yyvsp[-1].spec)); } break; - case 5: + case 5: /* declaration: declaration_specifiers init_declarator_list ';' */ { (yyval.declist) = parse->mergeSpecDecVec((yyvsp[-2].spec),(yyvsp[-1].declist)); } break; - case 6: + case 6: /* declaration_specifiers: STORAGE_CLASS_SPECIFIER */ { (yyval.spec) = parse->newSpecifier(); parse->addSpecifier((yyval.spec),(yyvsp[0].str)); } break; - case 7: + case 7: /* declaration_specifiers: type_specifier */ { (yyval.spec) = parse->newSpecifier(); parse->addTypeSpecifier((yyval.spec),(yyvsp[0].type)); } break; - case 8: + case 8: /* declaration_specifiers: TYPE_QUALIFIER */ { (yyval.spec) = parse->newSpecifier(); parse->addSpecifier((yyval.spec),(yyvsp[0].str)); } break; - case 9: + case 9: /* declaration_specifiers: FUNCTION_SPECIFIER */ { (yyval.spec) = parse->newSpecifier(); parse->addFuncSpecifier((yyval.spec),(yyvsp[0].str)); } break; - case 10: + case 10: /* declaration_specifiers: STORAGE_CLASS_SPECIFIER declaration_specifiers */ { (yyval.spec) = parse->addSpecifier((yyvsp[0].spec),(yyvsp[-1].str)); } break; - case 11: + case 11: /* declaration_specifiers: type_specifier declaration_specifiers */ { (yyval.spec) = parse->addTypeSpecifier((yyvsp[0].spec),(yyvsp[-1].type)); } break; - case 12: + case 12: /* declaration_specifiers: TYPE_QUALIFIER declaration_specifiers */ { (yyval.spec) = parse->addSpecifier((yyvsp[0].spec),(yyvsp[-1].str)); } break; - case 13: + case 13: /* declaration_specifiers: FUNCTION_SPECIFIER declaration_specifiers */ { (yyval.spec) = parse->addFuncSpecifier((yyvsp[0].spec),(yyvsp[-1].str)); } break; - case 14: + case 14: /* init_declarator_list: init_declarator */ { (yyval.declist) = parse->newVecDeclarator(); (yyval.declist)->push_back((yyvsp[0].dec)); } break; - case 15: + case 15: /* init_declarator_list: init_declarator_list ',' init_declarator */ { (yyval.declist) = (yyvsp[-2].declist); (yyval.declist)->push_back((yyvsp[0].dec)); } break; - case 16: + case 16: /* init_declarator: declarator */ { (yyval.dec) = (yyvsp[0].dec); } break; - case 17: + case 17: /* type_specifier: TYPE_NAME */ { (yyval.type) = (yyvsp[0].type); } break; - case 18: + case 18: /* type_specifier: struct_or_union_specifier */ { (yyval.type) = (yyvsp[0].type); } break; - case 19: + case 19: /* type_specifier: enum_specifier */ { (yyval.type) = (yyvsp[0].type); } break; - case 20: + case 20: /* struct_or_union_specifier: STRUCT '{' struct_declaration_list '}' */ { (yyval.type) = parse->newStruct("",(yyvsp[-1].declist)); } break; - case 21: + case 21: /* struct_or_union_specifier: STRUCT IDENTIFIER '{' struct_declaration_list '}' */ { (yyval.type) = parse->newStruct(*(yyvsp[-3].str),(yyvsp[-1].declist)); } break; - case 22: + case 22: /* struct_or_union_specifier: STRUCT IDENTIFIER */ { (yyval.type) = parse->oldStruct(*(yyvsp[0].str)); } break; - case 23: + case 23: /* struct_or_union_specifier: UNION '{' struct_declaration_list '}' */ { (yyval.type) = parse->newUnion("",(yyvsp[-1].declist)); } break; - case 24: + case 24: /* struct_or_union_specifier: UNION IDENTIFIER '{' struct_declaration_list '}' */ { (yyval.type) = parse->newUnion(*(yyvsp[-3].str),(yyvsp[-1].declist)); } break; - case 25: + case 25: /* struct_or_union_specifier: UNION IDENTIFIER */ { (yyval.type) = parse->oldUnion(*(yyvsp[0].str)); } break; - case 26: + case 26: /* struct_declaration_list: struct_declaration */ { (yyval.declist) = (yyvsp[0].declist); } break; - case 27: + case 27: /* struct_declaration_list: struct_declaration_list struct_declaration */ { (yyval.declist) = (yyvsp[-1].declist); (yyval.declist)->insert((yyval.declist)->end(),(yyvsp[0].declist)->begin(),(yyvsp[0].declist)->end()); } break; - case 28: + case 28: /* struct_declaration: specifier_qualifier_list struct_declarator_list ';' */ { (yyval.declist) = parse->mergeSpecDecVec((yyvsp[-2].spec),(yyvsp[-1].declist)); } break; - case 29: + case 29: /* specifier_qualifier_list: type_specifier */ { (yyval.spec) = parse->newSpecifier(); parse->addTypeSpecifier((yyval.spec),(yyvsp[0].type)); } break; - case 30: + case 30: /* specifier_qualifier_list: type_specifier specifier_qualifier_list */ { (yyval.spec) = parse->addTypeSpecifier((yyvsp[0].spec),(yyvsp[-1].type)); } break; - case 31: + case 31: /* specifier_qualifier_list: TYPE_QUALIFIER */ { (yyval.spec) = parse->newSpecifier(); parse->addSpecifier((yyval.spec),(yyvsp[0].str)); } break; - case 32: + case 32: /* specifier_qualifier_list: TYPE_QUALIFIER specifier_qualifier_list */ { (yyval.spec) = parse->addSpecifier((yyvsp[0].spec),(yyvsp[-1].str)); } break; - case 33: + case 33: /* struct_declarator_list: struct_declarator */ { (yyval.declist) = parse->newVecDeclarator(); (yyval.declist)->push_back((yyvsp[0].dec)); } break; - case 34: + case 34: /* struct_declarator_list: struct_declarator_list ',' struct_declarator */ { (yyval.declist) = (yyvsp[-2].declist); (yyval.declist)->push_back((yyvsp[0].dec)); } break; - case 35: + case 35: /* struct_declarator: declarator */ { (yyval.dec) = (yyvsp[0].dec); } break; - case 36: + case 36: /* struct_declarator: declarator ':' NUMBER */ + { (yyval.dec) = (yyvsp[-2].dec); (yyvsp[-2].dec)->setNumBits((int4)*(yyvsp[0].i)); } + break; + + case 37: /* enum_specifier: ENUM IDENTIFIER '{' enumerator_list '}' */ { (yyval.type) = parse->newEnum(*(yyvsp[-3].str),(yyvsp[-1].vecenum)); } break; - case 37: + case 38: /* enum_specifier: ENUM '{' enumerator_list '}' */ { (yyval.type) = parse->newEnum("",(yyvsp[-1].vecenum)); } break; - case 38: + case 39: /* enum_specifier: ENUM IDENTIFIER '{' enumerator_list ',' '}' */ { (yyval.type) = parse->newEnum(*(yyvsp[-4].str),(yyvsp[-2].vecenum)); } break; - case 39: + case 40: /* enum_specifier: ENUM '{' enumerator_list ',' '}' */ { (yyval.type) = parse->newEnum("",(yyvsp[-2].vecenum)); } break; - case 40: + case 41: /* enum_specifier: ENUM IDENTIFIER */ { (yyval.type) = parse->oldEnum(*(yyvsp[0].str)); } break; - case 41: + case 42: /* enumerator_list: enumerator */ { (yyval.vecenum) = parse->newVecEnumerator(); (yyval.vecenum)->push_back((yyvsp[0].enumer)); } break; - case 42: + case 43: /* enumerator_list: enumerator_list ',' enumerator */ { (yyval.vecenum) = (yyvsp[-2].vecenum); (yyval.vecenum)->push_back((yyvsp[0].enumer)); } break; - case 43: + case 44: /* enumerator: IDENTIFIER */ { (yyval.enumer) = parse->newEnumerator(*(yyvsp[0].str)); } break; - case 44: + case 45: /* enumerator: IDENTIFIER '=' NUMBER */ { (yyval.enumer) = parse->newEnumerator(*(yyvsp[-2].str),*(yyvsp[0].i)); } break; - case 45: + case 46: /* declarator: direct_declarator */ { (yyval.dec) = (yyvsp[0].dec); } break; - case 46: + case 47: /* declarator: pointer direct_declarator */ { (yyval.dec) = parse->mergePointer((yyvsp[-1].ptrspec),(yyvsp[0].dec)); } break; - case 47: - { (yyval.dec) = parse->newDeclarator((yyvsp[0].str)); } + case 48: /* var_identifier: IDENTIFIER */ + { (yyval.str) = (yyvsp[0].str); } break; - case 48: + case 49: /* var_identifier: var_identifier SCOPERES IDENTIFIER */ + { (yyval.str) = (yyvsp[-2].str); (yyval.str)->append("::"); (yyval.str)->append(*(yyvsp[0].str)); } + break; + + case 50: /* direct_declarator: var_identifier */ + { (yyval.dec) = parse->newDeclarator((yyvsp[0].str)); } + break; + + case 51: /* direct_declarator: '(' declarator ')' */ { (yyval.dec) = (yyvsp[-1].dec); } break; - case 49: + case 52: /* direct_declarator: direct_declarator '[' type_qualifier_list assignment_expression ']' */ { (yyval.dec) = parse->newArray((yyvsp[-4].dec),(yyvsp[-2].flags),(yyvsp[-1].i)); } break; - case 50: + case 53: /* direct_declarator: direct_declarator '[' assignment_expression ']' */ { (yyval.dec) = parse->newArray((yyvsp[-3].dec),0,(yyvsp[-1].i)); } break; - case 51: + case 54: /* direct_declarator: direct_declarator '(' parameter_type_list ')' */ { (yyval.dec) = parse->newFunc((yyvsp[-3].dec),(yyvsp[-1].declist)); } break; - case 52: + case 55: /* pointer: '*' */ { (yyval.ptrspec) = parse->newPointer(); (yyval.ptrspec)->push_back(0); } break; - case 53: + case 56: /* pointer: '*' type_qualifier_list */ { (yyval.ptrspec) = parse->newPointer(); (yyval.ptrspec)->push_back((yyvsp[0].flags)); } break; - case 54: + case 57: /* pointer: '*' pointer */ { (yyval.ptrspec) = (yyvsp[0].ptrspec); (yyval.ptrspec)->push_back(0); } break; - case 55: + case 58: /* pointer: '*' type_qualifier_list pointer */ { (yyval.ptrspec) = (yyvsp[0].ptrspec); (yyval.ptrspec)->push_back((yyvsp[-1].flags)); } break; - case 56: + case 59: /* type_qualifier_list: TYPE_QUALIFIER */ { (yyval.flags) = parse->convertFlag((yyvsp[0].str)); } break; - case 57: + case 60: /* type_qualifier_list: type_qualifier_list TYPE_QUALIFIER */ { (yyval.flags) = (yyvsp[-1].flags); (yyval.flags) |= parse->convertFlag((yyvsp[0].str)); } break; - case 58: + case 61: /* parameter_type_list: parameter_list */ { (yyval.declist) = (yyvsp[0].declist); } break; - case 59: + case 62: /* parameter_type_list: parameter_list ',' DOTDOTDOT */ { (yyval.declist) = (yyvsp[-2].declist); (yyval.declist)->push_back((TypeDeclarator *)0); } break; - case 60: + case 63: /* parameter_list: parameter_declaration */ { (yyval.declist) = parse->newVecDeclarator(); (yyval.declist)->push_back((yyvsp[0].dec)); } break; - case 61: + case 64: /* parameter_list: parameter_list ',' parameter_declaration */ { (yyval.declist) = (yyvsp[-2].declist); (yyval.declist)->push_back((yyvsp[0].dec)); } break; - case 62: + case 65: /* parameter_declaration: declaration_specifiers declarator */ { (yyval.dec) = parse->mergeSpecDec((yyvsp[-1].spec),(yyvsp[0].dec)); } break; - case 63: + case 66: /* parameter_declaration: declaration_specifiers */ { (yyval.dec) = parse->mergeSpecDec((yyvsp[0].spec)); } break; - case 64: + case 67: /* parameter_declaration: declaration_specifiers abstract_declarator */ { (yyval.dec) = parse->mergeSpecDec((yyvsp[-1].spec),(yyvsp[0].dec)); } break; - case 65: + case 68: /* abstract_declarator: pointer */ { (yyval.dec) = parse->newDeclarator(); parse->mergePointer((yyvsp[0].ptrspec),(yyval.dec)); } break; - case 66: + case 69: /* abstract_declarator: direct_abstract_declarator */ { (yyval.dec) = (yyvsp[0].dec); } break; - case 67: + case 70: /* abstract_declarator: pointer direct_abstract_declarator */ { (yyval.dec) = parse->mergePointer((yyvsp[-1].ptrspec),(yyvsp[0].dec)); } break; - case 68: + case 71: /* direct_abstract_declarator: '(' abstract_declarator ')' */ { (yyval.dec) = (yyvsp[-1].dec); } break; - case 69: + case 72: /* direct_abstract_declarator: direct_abstract_declarator '[' assignment_expression ']' */ { (yyval.dec) = parse->newArray((yyvsp[-3].dec),0,(yyvsp[-1].i)); } break; - case 70: + case 73: /* direct_abstract_declarator: direct_abstract_declarator '(' parameter_type_list ')' */ { (yyval.dec) = parse->newFunc((yyvsp[-3].dec),(yyvsp[-1].declist)); } break; - case 71: + case 74: /* assignment_expression: NUMBER */ { (yyval.i) = (yyvsp[0].i); } break; @@ -1738,11 +1601,10 @@ yyreduce: case of YYERROR or YYBACKUP, subsequent parser actions might lead to an incorrect destructor call or verbose syntax error message before the lookahead is translated. */ - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc); YYPOPSTACK (yylen); yylen = 0; - YY_STACK_PRINT (yyss, yyssp); *++yyvsp = yyval; @@ -1766,66 +1628,30 @@ yyreduce: yyerrlab: /* Make sure we have latest lookahead translation. See comments at user semantic actions for why this is necessary. */ - yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); - + yytoken = yychar == GRAMMAREMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); /* If not already recovering from an error, report this error. */ if (!yyerrstatus) { ++yynerrs; -#if ! YYERROR_VERBOSE yyerror (YY_("syntax error")); -#else -# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ - yyssp, yytoken) - { - char const *yymsgp = YY_("syntax error"); - int yysyntax_error_status; - yysyntax_error_status = YYSYNTAX_ERROR; - if (yysyntax_error_status == 0) - yymsgp = yymsg; - else if (yysyntax_error_status == 1) - { - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = YY_CAST (char *, YYSTACK_ALLOC (YY_CAST (YYSIZE_T, yymsg_alloc))); - if (!yymsg) - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - yysyntax_error_status = 2; - } - else - { - yysyntax_error_status = YYSYNTAX_ERROR; - yymsgp = yymsg; - } - } - yyerror (yymsgp); - if (yysyntax_error_status == 2) - goto yyexhaustedlab; - } -# undef YYSYNTAX_ERROR -#endif } - - if (yyerrstatus == 3) { /* If just tried and failed to reuse lookahead token after an error, discard it. */ - if (yychar <= YYEOF) + if (yychar <= GRAMMAREOF) { /* Return failure if at end of input. */ - if (yychar == YYEOF) + if (yychar == GRAMMAREOF) YYABORT; } else { yydestruct ("Error: discarding", yytoken, &yylval); - yychar = YYEMPTY; + yychar = GRAMMAREMPTY; } } @@ -1858,13 +1684,14 @@ yyerrorlab: yyerrlab1: yyerrstatus = 3; /* Each real token shifted decrements this. */ + /* Pop stack until we find a state that shifts the error token. */ for (;;) { yyn = yypact[yystate]; if (!yypact_value_is_default (yyn)) { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) { yyn = yytable[yyn]; if (0 < yyn) @@ -1878,7 +1705,7 @@ yyerrlab1: yydestruct ("Error: popping", - yystos[yystate], yyvsp); + YY_ACCESSING_SYMBOL (yystate), yyvsp); YYPOPSTACK (1); yystate = *yyssp; YY_STACK_PRINT (yyss, yyssp); @@ -1890,7 +1717,7 @@ yyerrlab1: /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp); yystate = yyn; goto yynewstate; @@ -1912,22 +1739,22 @@ yyabortlab: goto yyreturn; -#if !defined yyoverflow || YYERROR_VERBOSE +#if !defined yyoverflow /*-------------------------------------------------. | yyexhaustedlab -- memory exhaustion comes here. | `-------------------------------------------------*/ yyexhaustedlab: yyerror (YY_("memory exhausted")); yyresult = 2; - /* Fall through. */ + goto yyreturn; #endif -/*-----------------------------------------------------. -| yyreturn -- parsing is finished, return the result. | -`-----------------------------------------------------*/ +/*-------------------------------------------------------. +| yyreturn -- parsing is finished, clean up and return. | +`-------------------------------------------------------*/ yyreturn: - if (yychar != YYEMPTY) + if (yychar != GRAMMAREMPTY) { /* Make sure we have latest lookahead translation. See comments at user semantic actions for why this is necessary. */ @@ -1942,21 +1769,19 @@ yyreturn: while (yyssp != yyss) { yydestruct ("Cleanup: popping", - yystos[+*yyssp], yyvsp); + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp); YYPOPSTACK (1); } #ifndef yyoverflow if (yyss != yyssa) YYSTACK_FREE (yyss); #endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif + return yyresult; } + void GrammarToken::set(uint4 tp) { @@ -2107,6 +1932,9 @@ uint4 GrammarLexer::moveState(char lookahead) state = punctuation; bufstart = bufend-1; break; + case ':': + state = scoperes1; + break; case '-': case '0': case '1': @@ -2215,6 +2043,19 @@ uint4 GrammarLexer::moveState(char lookahead) state = start; res = GrammarToken::dotdotdot; break; + case scoperes1: + if (lookahead == ':') { + state = scoperes2; + } + else { + state = start; + res = ':'; + } + break; + case scoperes2: + state = start; + res = GrammarToken::scoperes; + break; case punctuation: state = start; res = (uint4)buffer[bufstart]; @@ -2275,7 +2116,7 @@ uint4 GrammarLexer::moveState(char lookahead) } else if ((lookahead>='a')&&(lookahead<='z')) { } - else if (lookahead == '_' || lookahead == ':') { + else if (lookahead == '_') { } else { state = start; @@ -2781,6 +2622,8 @@ Datatype *CParse::newStruct(const string &ident,vector *declis { // Build a new structure TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion) vector sublist; + vector bitlist; + bool isBigEndian = glb->getDefaultDataSpace()->isBigEndian(); for(uint4 i=0;isize();++i) { TypeDeclarator *decl = (*declist)[i]; @@ -2789,14 +2632,14 @@ Datatype *CParse::newStruct(const string &ident,vector *declis glb->types->destroyType(res); return (Datatype *)0; } - sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); + if (decl->getNumBits() != 0) + bitlist.emplace_back(sublist.size(),decl->getNumBits(),isBigEndian,decl->getIdentifier(),decl->buildType(glb)); + else + sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); } try { - int4 newSize; - int4 newAlign; - TypeStruct::assignFieldOffsets(sublist,newSize,newAlign); - glb->types->setFields(sublist,res,newSize,newAlign,0); + glb->types->assignRawFields(res,sublist,bitlist); } catch (LowlevelError &err) { setError(err.explain); @@ -2832,10 +2675,7 @@ Datatype *CParse::newUnion(const string &ident,vector *declist } try { - int4 newSize; - int4 newAlign; - TypeUnion::assignFieldOffsets(sublist,newSize,newAlign,res); - glb->types->setFields(sublist,res,newSize,newAlign,0); + glb->types->assignRawFields(res,sublist); } catch (LowlevelError &err) { setError(err.explain); @@ -3028,6 +2868,8 @@ int4 CParse::lex(void) return BADTOKEN; case GrammarToken::dotdotdot: return DOTDOTDOT; + case GrammarToken::scoperes: + return SCOPERES; case GrammarToken::badtoken: setError(lexer.getError()); // Error from lexer return BADTOKEN; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.hh index 7bbdc0aaf3..ca63db4aa2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.hh @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -37,11 +37,12 @@ public: badtoken = 0x100, endoffile = 0x101, dotdotdot = 0x102, + scoperes = 0x103, - integer = 0x103, - charconstant = 0x104, - identifier = 0x105, - stringval = 0x106, + integer = 0x104, + charconstant = 0x105, + identifier = 0x106, + stringval = 0x107, }; private: uint4 type; @@ -85,6 +86,8 @@ class GrammarLexer { dot1, dot2, dot3, + scoperes1, + scoperes2, punctuation, endofline_comment, c_comment, @@ -169,12 +172,15 @@ class TypeDeclarator { string ident; // variable identifier associated with type string model; // name of model associated with function pointer uint4 flags; // Specifiers qualifiers + int4 numBits; // Number of bits associated with declaration (0=unspecified) public: - TypeDeclarator(void) { basetype=(Datatype *)0; flags=0; } - TypeDeclarator(const string &nm) { ident=nm; basetype=(Datatype *)0; flags=0; } + TypeDeclarator(void) { basetype=(Datatype *)0; flags=0; numBits=0; } + TypeDeclarator(const string &nm) { ident=nm; basetype=(Datatype *)0; flags=0; numBits=0; } ~TypeDeclarator(void); Datatype *getBaseType(void) const { return basetype; } int4 numModifiers(void) const { return mods.size(); } + int4 getNumBits(void) const { return numBits; } + void setNumBits(int4 val) { numBits = val; } const string &getIdentifier(void) const { return ident; } ProtoModel *getModel(Architecture *glb) const; bool getPrototype(PrototypePieces &pieces,Architecture *glb) const; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y index 298843cc1b..059993e004 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/grammar.y @@ -39,7 +39,7 @@ static CParse *parse; // Grammar taken from ISO/IEC 9899 -%token DOTDOTDOT BADTOKEN STRUCT UNION ENUM DECLARATION_RESULT PARAM_RESULT +%token DOTDOTDOT BADTOKEN STRUCT UNION ENUM DECLARATION_RESULT PARAM_RESULT SCOPERES %token NUMBER %token IDENTIFIER %token STORAGE_CLASS_SPECIFIER TYPE_QUALIFIER FUNCTION_SPECIFIER @@ -56,6 +56,7 @@ static CParse *parse; %type type_specifier struct_or_union_specifier enum_specifier %type enumerator %type enumerator_list +%type var_identifier %% document: @@ -127,7 +128,7 @@ struct_declarator_list: struct_declarator: declarator { $$ = $1; } -// declarator ':' NUMBER + | declarator ':' NUMBER { $$ = $1; $1->setNumBits((int4)*$3); } ; enum_specifier: @@ -153,8 +154,13 @@ declarator: | pointer direct_declarator { $$ = parse->mergePointer($1,$2); } ; +var_identifier: + IDENTIFIER { $$ = $1; } + | var_identifier SCOPERES IDENTIFIER { $$ = $1; $$->append("::"); $$->append(*$3); } +; + direct_declarator: - IDENTIFIER { $$ = parse->newDeclarator($1); } + var_identifier { $$ = parse->newDeclarator($1); } | '(' declarator ')' { $$ = $2; } | direct_declarator '[' type_qualifier_list assignment_expression ']' { $$ = parse->newArray($1,$3,$4); } | direct_declarator '[' assignment_expression ']' { $$ = parse->newArray($1,0,$3); } @@ -361,6 +367,9 @@ uint4 GrammarLexer::moveState(char lookahead) state = punctuation; bufstart = bufend-1; break; + case ':': + state = scoperes1; + break; case '-': case '0': case '1': @@ -469,6 +478,19 @@ uint4 GrammarLexer::moveState(char lookahead) state = start; res = GrammarToken::dotdotdot; break; + case scoperes1: + if (lookahead == ':') { + state = scoperes2; + } + else { + state = start; + res = ':'; + } + break; + case scoperes2: + state = start; + res = GrammarToken::scoperes; + break; case punctuation: state = start; res = (uint4)buffer[bufstart]; @@ -529,7 +551,7 @@ uint4 GrammarLexer::moveState(char lookahead) } else if ((lookahead>='a')&&(lookahead<='z')) { } - else if (lookahead == '_' || lookahead == ':') { + else if (lookahead == '_') { } else { state = start; @@ -1035,6 +1057,8 @@ Datatype *CParse::newStruct(const string &ident,vector *declis { // Build a new structure TypeStruct *res = glb->types->getTypeStruct(ident); // Create stub (for recursion) vector sublist; + vector bitlist; + bool isBigEndian = glb->getDefaultDataSpace()->isBigEndian(); for(uint4 i=0;isize();++i) { TypeDeclarator *decl = (*declist)[i]; @@ -1043,14 +1067,14 @@ Datatype *CParse::newStruct(const string &ident,vector *declis glb->types->destroyType(res); return (Datatype *)0; } - sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); + if (decl->getNumBits() != 0) + bitlist.emplace_back(sublist.size(),decl->getNumBits(),isBigEndian,decl->getIdentifier(),decl->buildType(glb)); + else + sublist.emplace_back(0,-1,decl->getIdentifier(),decl->buildType(glb)); } try { - int4 newSize; - int4 newAlign; - TypeStruct::assignFieldOffsets(sublist,newSize,newAlign); - glb->types->setFields(sublist,res,newSize,newAlign,0); + glb->types->assignRawFields(res,sublist,bitlist); } catch (LowlevelError &err) { setError(err.explain); @@ -1086,10 +1110,7 @@ Datatype *CParse::newUnion(const string &ident,vector *declist } try { - int4 newSize; - int4 newAlign; - TypeUnion::assignFieldOffsets(sublist,newSize,newAlign,res); - glb->types->setFields(sublist,res,newSize,newAlign,0); + glb->types->assignRawFields(res,sublist); } catch (LowlevelError &err) { setError(err.explain); @@ -1282,6 +1303,8 @@ int4 CParse::lex(void) return BADTOKEN; case GrammarToken::dotdotdot: return DOTDOTDOT; + case GrammarToken::scoperes: + return SCOPERES; case GrammarToken::badtoken: setError(lexer.getError()); // Error from lexer return BADTOKEN; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc index 1c138774ac..f1a43d865a 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/marshal.cc @@ -1268,6 +1268,6 @@ ElementId ELEM_VAL = ElementId("val",8); ElementId ELEM_VALUE = ElementId("value",9); ElementId ELEM_VOID = ElementId("void",10); -ElementId ELEM_UNKNOWN = ElementId("XMLunknown",289); // Number serves as next open index +ElementId ELEM_UNKNOWN = ElementId("XMLunknown",290); // Number serves as next open index } // End namespace ghidra diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc index dcf7553fd3..c60abf92e8 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/op.cc @@ -474,6 +474,7 @@ uintb PcodeOp::collapse(bool &markedInput) const { /// The p-code op must be \e special, or an exception is thrown. The operation is performed /// and if there is no evaluation error, the result is returned and \b evalError is set to \b false. /// \param in is an array of input values +/// \param evalError passes back \b false if there is no evaluation error /// \return the result of applying \b this operation to the input values uintb PcodeOp::executeSimple(uintb *in,bool &evalError) const @@ -732,8 +733,14 @@ uintb PcodeOp::getNZMaskLocal(bool cliploop) const case CPUI_INT_ADD: resmask = getIn(0)->getNZMask(); if (resmask!=fullmask) { - resmask |= getIn(1)->getNZMask(); - resmask |= (resmask<<1); // Account for possible carries + uintb othermask = getIn(1)->getNZMask(); + if ((othermask & resmask) == 0) { + resmask |= othermask; + } + else { + resmask |= othermask; + resmask |= (resmask << 1); // Account for possible carries + } resmask &= fullmask; } break; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/opbehavior.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/opbehavior.cc index ab23e94b83..2d989384d4 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/opbehavior.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/opbehavior.cc @@ -102,9 +102,10 @@ void OpBehavior::registerInstructions(vector &inst,const Translate inst[CPUI_CPOOLREF] = new OpBehavior(CPUI_CPOOLREF,false,true); inst[CPUI_NEW] = new OpBehavior(CPUI_NEW,false,true); inst[CPUI_INSERT] = new OpBehavior(CPUI_INSERT,false); - inst[CPUI_EXTRACT] = new OpBehavior(CPUI_EXTRACT,false); + inst[CPUI_ZPULL] = new OpBehavior(CPUI_ZPULL,false); inst[CPUI_POPCOUNT] = new OpBehaviorPopcount(); inst[CPUI_LZCOUNT] = new OpBehaviorLzcount(); + inst[CPUI_SPULL] = new OpBehavior(CPUI_SPULL,false); } /// \param sizeout is the size of the output in bytes diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/opcodes.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/opcodes.hh index 5cb4482737..ba5812135e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/opcodes.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/opcodes.hh @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -123,11 +123,12 @@ enum OpCode { CPUI_CPOOLREF = 68, ///< Recover a value from the \e constant \e pool CPUI_NEW = 69, ///< Allocate a new object (new) CPUI_INSERT = 70, ///< Insert a bit-range - CPUI_EXTRACT = 71, ///< Extract a bit-range + CPUI_ZPULL = 71, ///< Extract an unsigned bit-range CPUI_POPCOUNT = 72, ///< Count the 1-bits CPUI_LZCOUNT = 73, ///< Count the leading 0-bits + CPUI_SPULL = 74, ///< Extract a signed bit-range - CPUI_MAX = 74 ///< Value indicating the end of the op-code values + CPUI_MAX = 75 ///< Value indicating the end of the op-code values }; extern const char *get_opname(OpCode opc); ///< Convert an OpCode to the name as a string diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc index ad8a6ded10..f85f01854f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.cc @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -252,6 +252,25 @@ void EmitMarkup::tagField(const string &name,syntax_highlight hl,const Datatype encoder->closeElement(ELEM_FIELD); } +void EmitMarkup::tagBitField(const string &name,syntax_highlight hl,const Datatype *ct,int4 id,const PcodeOp *op) + +{ + encoder->openElement(ELEM_BITFIELD); + if (hl != no_color) + encoder->writeUnsignedInteger(ATTRIB_COLOR,hl); + + encoder->writeString(ATTRIB_NAME,ct->getName()); + uint8 typeId = ct->getUnsizedId(); + if (typeId != 0) { + encoder->writeUnsignedInteger(ATTRIB_ID, typeId); + } + encoder->writeSignedInteger(ATTRIB_OFF, id); + if (op != (const PcodeOp *)0) + encoder->writeUnsignedInteger(ATTRIB_OPREF, op->getTime()); + encoder->writeString(ATTRIB_CONTENT,name); + encoder->closeElement(ELEM_BITFIELD); +} + void EmitMarkup::tagComment(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off) { @@ -407,6 +426,9 @@ void TokenSplit::print(Emit *emit) const case field_t: // tagField emit->tagField(tok,hl,ptr_second.ct,(int4)off,op); break; + case bitfield_t: // tagBitField + emit->tagBitField(tok,hl,ptr_second.ct,(int4)off,op); + break; case comm_t: // tagComment emit->tagComment(tok,hl,ptr_second.spc,off); break; @@ -501,6 +523,9 @@ void TokenSplit::printDebug(ostream &s) const case field_t: // tagField s << "field_t"; break; + case bitfield_t: // tagBitField + s << "bitfield_t"; + break; case comm_t: // tagComment s << "comm_t"; break; @@ -1055,6 +1080,15 @@ void EmitPrettyPrint::tagField(const string &name,syntax_highlight hl,const Data scan(); } +void EmitPrettyPrint::tagBitField(const string &name,syntax_highlight hl,const Datatype *ct,int4 id,const PcodeOp *op) + +{ + checkstring(); + TokenSplit &tok( tokqueue.push() ); + tok.tagBitField(name,hl,ct,id,op); + scan(); +} + void EmitPrettyPrint::tagComment(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh index 9f6ac1b999..bd65f4e292 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/prettyprint.hh @@ -273,11 +273,22 @@ public: /// possibly with additional markup. /// \param name is the character data for the identifier /// \param hl indicates how the identifier should be highlighted - /// \param ct is the data-type associated with the field + /// \param ct is the structured data-type containing the field /// \param off is the (byte) offset of the field within its structured data-type /// \param op is the PcodeOp associated with the field (usually PTRSUB or SUBPIECE) virtual void tagField(const string &name,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op)=0; + /// \brief Emit an identifier for a bitfield within a structured data-type + /// + /// A string representing an individual component of a structured data-type is emitted, + /// possibly with additional markup. + /// \param name is the character data for the identifier + /// \param hl indicates how the identifier should be highlighted + /// \param ct is the structured data-type containing the field + /// \param id is an identifier for the field within its structured data-type + /// \param op is the PcodeOp associated with the field (usually PTRSUB or SUBPIECE) + virtual void tagBitField(const string &name,syntax_highlight hl,const Datatype *ct,int4 id,const PcodeOp *op)=0; + /// \brief Emit a comment string as part of the generated source code /// /// Individual comments can be broken up and emitted using multiple calls to this method, @@ -527,6 +538,7 @@ public: virtual void tagFuncName(const string &name,syntax_highlight hl,const Funcdata *fd,const PcodeOp *op); virtual void tagType(const string &name,syntax_highlight hl,const Datatype *ct); virtual void tagField(const string &name,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op); + virtual void tagBitField(const string &name,syntax_highlight hl,const Datatype *ct,int4 id,const PcodeOp *op); virtual void tagComment(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off); virtual void tagLabel(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off); virtual void tagCaseLabel(const string &name,syntax_highlight hl,const PcodeOp *op,uintb value); @@ -576,6 +588,8 @@ public: *s << name; } virtual void tagField(const string &name,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op) { *s << name; } + virtual void tagBitField(const string &name,syntax_highlight hl,const Datatype *ct,int4 id,const PcodeOp *op) { + *s << name; } virtual void tagComment(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off) { *s << name; } virtual void tagLabel(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off) { @@ -642,6 +656,7 @@ public: fnam_t, ///< A function identifier type_t, ///< A data-type identifier field_t, ///< A field name for a structured data-type + bitfield_t, ///< A bitfield name in a structured data-type comm_t, ///< Part of a comment block label_t, ///< A code label case_t, ///< A case label @@ -808,13 +823,24 @@ public: /// /// \param name is the character data for the identifier /// \param h indicates how the identifier should be highlighted - /// \param ct is the data-type associated with the field + /// \param ct is the structured data-type containing the field /// \param o is the (byte) offset of the field within its structured data-type /// \param inOp is the PcodeOp associated with the field (usually PTRSUB or SUBPIECE) void tagField(const string &name,EmitMarkup::syntax_highlight h,const Datatype *ct,int4 o,const PcodeOp *inOp) { tok = name; size = tok.size(); tagtype=field_t; delimtype=tokenstring; hl=h; ptr_second.ct=ct; off=(uintb)o; op=inOp; } + /// \brief Create an identifier for a bitfield within a structured data-type + /// + /// \param name is the character data for the identifier + /// \param h indicates how the identifier should be highlighted + /// \param ct is the structured data-type containing the field + /// \param id is an identifier for the field within its structured data-type + /// \param inOp is the PcodeOp associated with the field (usually PTRSUB or SUBPIECE) + void tagBitField(const string &name,EmitMarkup::syntax_highlight h,const Datatype *ct,int4 id,const PcodeOp *inOp) { + tok = name; size = tok.size(); + tagtype=bitfield_t; delimtype=tokenstring; hl=h; ptr_second.ct=ct; off=(uintb)id; op=inOp; } + /// \brief Create a comment string in the generated source code /// /// \param name is the character data for the comment @@ -1088,6 +1114,7 @@ public: virtual void tagFuncName(const string &name,syntax_highlight hl,const Funcdata *fd,const PcodeOp *op); virtual void tagType(const string &name,syntax_highlight hl,const Datatype *ct); virtual void tagField(const string &name,syntax_highlight hl,const Datatype *ct,int4 off,const PcodeOp *op); + virtual void tagBitField(const string &name,syntax_highlight hl,const Datatype *ct,int4 id,const PcodeOp *op); virtual void tagComment(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off); virtual void tagLabel(const string &name,syntax_highlight hl,const AddrSpace *spc,uintb off); virtual void tagCaseLabel(const string &name,syntax_highlight hl,const PcodeOp *op,uintb value); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc index dfdc6b4bed..bc9148e455 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.cc @@ -368,6 +368,25 @@ bool PrintC::checkArrayDeref(const Varnode *vn) const return true; } +/// Bitfield accesses through a LOAD or STORE may have a PTRSUB accessing the +/// bitfield storage range. But with any additional PTRSUB or PTRADD, we can use +/// member syntax. +/// \param vn is the root of the pointer expression (the input into LOAD or STORE) +/// \param field is the bitfield being displayed +/// \return \b true if member syntax ('.') should be used or \b false for pointer syntax ('->') +bool PrintC::checkBitFieldMember(const Varnode *vn,const TypeBitField *field) const + +{ + if (field->bits.byteOffset != 0) { // Bitfield not at offset 0, a PTRSUB should be present + const PcodeOp *op; + if (!vn->isWritten()) return false; + op = vn->getDef(); + if (op->code() != CPUI_PTRSUB) return false; + vn = op->getIn(0); // Skip this PTRSUB + } + return checkArrayDeref(vn); +} + /// Check that the output data-type is a pointer to an array and then that /// the second data-type is a pointer to the element type (of the array). /// If this holds and the input variable represents a symbol with an \e array data-type, @@ -1264,13 +1283,59 @@ void PrintC::opNewOp(const PcodeOp *op) void PrintC::opInsertOp(const PcodeOp *op) { - opFunc(op); // If no other way to print it, print as functional operator + opFunc(op); } -void PrintC::opExtractOp(const PcodeOp *op) +void PrintC::opZpullOp(const PcodeOp *op) { - opFunc(op); // If no other way to print it, print as functional operator + PullExpression expr(op); + if (!expr.isValid()) { + opFunc(op); // If no other way to print it, print as functional operator + return; + } + if (expr.loadOp != (const PcodeOp *)0) { + uint4 m = mods; + if (checkBitFieldMember(expr.loadOp->getIn(1),expr.bitfield)) { + m |= print_load_value; + pushOp(&object_member,op); + } + else + pushOp(&pointer_member,op); + pushVn(expr.structPtr,expr.loadOp,m); + pushAtom(Atom(expr.bitfield->name,bitfieldtoken,EmitMarkup::no_color,expr.theStruct,expr.bitfield->ident,op)); + } + else { + pushOp(&object_member,op); + pushSymbolDetail(op->getIn(0),op,true); + pushAtom(Atom(expr.bitfield->name,bitfieldtoken,EmitMarkup::no_color,expr.theStruct,expr.bitfield->ident,op)); + } +} + +void PrintC::opSpullOp(const PcodeOp *op) + +{ + PullExpression expr(op); + if (!expr.isValid()) { + opFunc(op); // If no other way to print it, print as functional operator + return; + } + if (expr.loadOp != (const PcodeOp *)0) { + uint4 m = mods; + if (checkBitFieldMember(expr.loadOp->getIn(1),expr.bitfield)) { + m |= print_load_value; + pushOp(&object_member,op); + } + else + pushOp(&pointer_member,op); + pushVn(expr.structPtr,expr.loadOp,m); + pushAtom(Atom(expr.bitfield->name,bitfieldtoken,EmitMarkup::no_color,expr.theStruct,expr.bitfield->ident,op)); + } + else { + pushOp(&object_member,op); + pushSymbolDetail(op->getIn(0),op,true); + pushAtom(Atom(expr.bitfield->name,bitfieldtoken,EmitMarkup::no_color,expr.theStruct,expr.bitfield->ident,op)); + } } /// \brief Push a constant with an integer data-type to the RPN stack @@ -1982,6 +2047,10 @@ void PrintC::pushPartialSymbol(const Symbol *sym,int4 off,int4 sz, ct = field->type; succeeded = true; } + else if (op->code() == CPUI_ZPULL || op->code() == CPUI_SPULL) { + // Cannot resolve final byte field because it is a bit field + break; // But we have fully resolved the Varnode + } } else if (ct->getMetatype() == TYPE_ARRAY) { int4 el; @@ -2468,21 +2537,31 @@ bool PrintC::emitInplaceOp(const PcodeOp *op) void PrintC::emitExpression(const PcodeOp *op) { + if (op->doesSpecialPrinting()) { + if (op->isCall()) { + emitConstructor(op); + return; + } + OpCode opc = op->code(); + if (opc == CPUI_STORE) { + emitBitFieldStore(op); + return; + } + else if (opc == CPUI_INSERT) { + emitBitFieldExpression(op); + return; + } + else if (opc == CPUI_SUBPIECE) { + // Don't modify printing here + } + else + throw LowlevelError("Unsupported special printing"); + } const Varnode *outvn = op->getOut(); if (outvn != (Varnode *)0) { if (option_inplace_ops && emitInplaceOp(op)) return; pushOp(&assignment,op); pushSymbolDetail(outvn,op,false); - } - else if (op->doesSpecialPrinting()) { - // Printing of constructor syntax - const PcodeOp *newop = op->getIn(1)->getDef(); - outvn = newop->getOut(); - pushOp(&assignment,newop); - pushSymbolDetail(outvn,newop,false); - opConstructor(op,true); - recurse(); - return; } // If STORE, print *( ) = ( ) // If BRANCH, print nothing @@ -2494,6 +2573,62 @@ void PrintC::emitExpression(const PcodeOp *op) recurse(); } +void PrintC::emitConstructor(const PcodeOp *op) + +{ + // Printing of constructor syntax + const PcodeOp *newop = op->getIn(1)->getDef(); + const Varnode *outvn = newop->getOut(); + pushOp(&assignment,newop); + pushSymbolDetail(outvn,newop,false); + opConstructor(op,true); + recurse(); +} + +void PrintC::emitBitFieldStore(const PcodeOp *op) + +{ + InsertStoreExpression expr(op); + if (!expr.isValid()) { + op->getOpcode()->push(this,op,(PcodeOp *)0); + recurse(); + return; + } + + // We assume the STORE is a statement + pushOp(&assignment,op); // This is an assignment + uint4 m = mods; + if (checkBitFieldMember(op->getIn(1),expr.bitfield)) { + m |= print_store_value; + pushOp(&object_member,expr.insertOp); + } + else + pushOp(&pointer_member,expr.insertOp); + pushVn(expr.structPtr,op,m); + pushAtom(Atom(expr.bitfield->name,bitfieldtoken,EmitMarkup::no_color,expr.theStruct,expr.bitfield->ident,op)); + // implied vn's pushed on in reverse order for efficiency + // see PrintLanguage::pushVnImplied + pushVn(expr.insertOp->getIn(1),op,mods); + recurse(); +} + +void PrintC::emitBitFieldExpression(const PcodeOp *op) + +{ + InsertExpression expr(op); + if (!expr.isValid()) { + opFunc(op); // If no other way to print it, print as functional operator + recurse(); + return; + } + pushOp(&assignment,op); // This is an assignment + pushOp(&object_member,expr.insertOp); + pushPartialSymbol(expr.symbol, expr.offsetToBitStruct, expr.theStruct->getSize(), op->getOut(), op, -1, false); + pushAtom(Atom(expr.bitfield->name,bitfieldtoken,EmitMarkup::no_color,expr.theStruct,expr.bitfield->ident,op)); + pushVn(op->getIn(1),op,mods); + recurse(); +} + void PrintC::emitVarDecl(const Symbol *sym) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh index e1fa73c224..3a0bd6a402 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printc.hh @@ -177,6 +177,7 @@ protected: virtual bool doEmitWideCharPrefix(void) const; bool checkArrayDeref(const Varnode *vn) const; ///< Determine whether a LOAD/STORE expression requires pointer '*' syntax + bool checkBitFieldMember(const Varnode *vn,const TypeBitField *field) const; ///< Determine whether a ZPULL/SPULL/INSERT should use '->' or '.' notation bool checkAddressOfCast(const PcodeOp *op) const; ///< Check if CAST can be printed as an '&' void emitStructDefinition(const TypeStruct *ct); ///< Emit the definition of a \e structure data-type void emitEnumDefinition(const TypeEnum *ct); ///< Emit the definition of an \e enumeration data-type @@ -226,6 +227,9 @@ protected: virtual string genericTypeName(const Datatype *ct); virtual void emitExpression(const PcodeOp *op); + virtual void emitConstructor(const PcodeOp *op); + virtual void emitBitFieldStore(const PcodeOp *op); + virtual void emitBitFieldExpression(const PcodeOp *op); virtual void emitVarDecl(const Symbol *sym); virtual void emitVarDeclStatement(const Symbol *sym); virtual bool emitScopeVarDecls(const Scope *symScope,int4 cat); @@ -339,7 +343,8 @@ public: virtual void opCpoolRefOp(const PcodeOp *op); virtual void opNewOp(const PcodeOp *op); virtual void opInsertOp(const PcodeOp *op); - virtual void opExtractOp(const PcodeOp *op); + virtual void opZpullOp(const PcodeOp *op); + virtual void opSpullOp(const PcodeOp *op); virtual void opPopcountOp(const PcodeOp *op) { opFunc(op); } virtual void opLzcountOp(const PcodeOp *op) { opFunc(op); } }; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc index 79642ef234..6346b889de 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.cc @@ -394,6 +394,9 @@ void PrintLanguage::emitAtom(const Atom &atom) case fieldtoken: emit->tagField(atom.name,atom.highlight,atom.ptr_second.ct,atom.offset,atom.op); break; + case bitfieldtoken: + emit->tagBitField(atom.name,atom.highlight,atom.ptr_second.ct,atom.offset,atom.op); + break; case casetoken: emit->tagCaseLabel(atom.name, atom.highlight, atom.op, atom.ptr_second.intValue); break; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh index af6e87cb04..ea151b76ed 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/printlanguage.hh @@ -167,6 +167,7 @@ public: optoken, ///< Emit atom as operator typetoken, ///< Emit atom as operator fieldtoken, ///< Emit atom as structure field + bitfieldtoken, ///< Emit atom as structure bitfield casetoken, ///< Emit atom as a \e case label blanktoken ///< For anonymous types }; @@ -429,6 +430,24 @@ protected: /// \param op is the given PcodeOp performing the final operation of the expression virtual void emitExpression(const PcodeOp *op)=0; + /// \brief Emit a call as a \e constructor expression + /// + /// Use language specific constructor syntax to represent the CALL. + /// \param op is the CALL op + virtual void emitConstructor(const PcodeOp *op)=0; + + /// \brief Emit STORE to a bit field + /// + /// Printing for the sequence: `STORE( ptr, INSERT( LOAD(ptr), val, #pos, #sz ) )` + /// \param op is the STORE + virtual void emitBitFieldStore(const PcodeOp *op)=0; + + /// \brief Emit expression writing to a bitfield + /// + /// Printing for an expression rooted at INSERT + /// \param op is the INSERT + virtual void emitBitFieldExpression(const PcodeOp *op)=0; + /// \brief Emit a function declaration /// /// This prints the formal defining prototype for a function. @@ -577,9 +596,10 @@ public: virtual void opCpoolRefOp(const PcodeOp *op)=0; ///< Emit a CPOOLREF operator virtual void opNewOp(const PcodeOp *op)=0; ///< Emit a NEW operator virtual void opInsertOp(const PcodeOp *op)=0; ///< Emit an INSERT operator - virtual void opExtractOp(const PcodeOp *op)=0; ///< Emit an EXTRACT operator + virtual void opZpullOp(const PcodeOp *op)=0; ///< Emit a ZPULL operator virtual void opPopcountOp(const PcodeOp *op)=0; ///< Emit a POPCOUNT operator virtual void opLzcountOp(const PcodeOp *op)=0; ///< Emit a LZCOUNT operator + virtual void opSpullOp(const PcodeOp *op)=0; ///< Emit an SPULL operator virtual string unnamedField(int4 off,int4 size); ///< Generate an artificial field name static int4 mostNaturalBase(uintb val); ///< Determine the most natural base for an integer diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc index 023d87c933..c894e310b2 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/ruleaction.cc @@ -10945,7 +10945,8 @@ int4 RuleExpandLoad::applyOp(PcodeOp *op,Funcdata &data) if (elType->getSize() < outSize + offset) return 0; type_metatype meta = elType->getMetatype(); - if (meta == TYPE_UNKNOWN) return 0; + if (meta == TYPE_UNKNOWN || meta == TYPE_STRUCT || meta == TYPE_ARRAY || meta == TYPE_UNION + || meta == TYPE_PARTIALSTRUCT || meta == TYPE_PARTIALUNION) return 0; bool addForm = checkAndComparison(outVn); AddrSpace *spc = op->getIn(0)->getSpaceFromConst(); int4 lsbCut = 0; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc index 260313c576..b191cdbd6b 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/subflow.cc @@ -2764,6 +2764,7 @@ bool SplitDatatype::splitLoad(PcodeOp *loadOp,Datatype *inType) if (copyOp != (PcodeOp *)0) { OpCode opc = copyOp->code(); if (opc == CPUI_STORE) return false; // Handled by RuleSplitStore + if (opc == CPUI_ZPULL || opc == CPUI_SPULL) return false; if (opc != CPUI_COPY) copyOp = (PcodeOp *)0; } diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc index 6dc548520b..2d48650e70 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.cc @@ -71,6 +71,7 @@ ElementId ELEM_TYPEREF = ElementId("typeref",63); //ElementId ELEM_USE_MS_CONVENTION = ElementId("use_MS_convention", 64); ElementId ELEM_WCHAR_SIZE = ElementId("wchar_size", 65); //ElementId ELEM_ZERO_LENGTH_BOUNDARY = ElementId("zero_length_boundary", 66); +ElementId ELEM_BITFIELD = ElementId("bitfield", 289); // Some default routines for displaying data @@ -793,6 +794,36 @@ TypeField::TypeField(Decoder &decoder,TypeFactory &typegrp) decoder.closeElement(elemId); } +/// Compare meta-data of the two fields for use with TypeStruct::compare +/// \param op2 is the other TypeField to compare with \b this +/// \return -1, 0, or 1 for less than, equal, or greater than ordering +int4 TypeField::compare(const TypeField &op2) const + +{ + if (offset != op2.offset) + return (offset < op2.offset) ? -1:1; + if (name != op2.name) + return (name < op2.name) ? -1:1; + if (type->getMetatype() != op2.type->getMetatype()) + return (type->getMetatype() < op2.type->getMetatype()) ? -1 : 1; + return 0; +} + +/// Compare structure of the two fields for use with TypeStruct::compareDependency. +/// \param op2 is the other TypeField to compare with \b this +/// \return -1, 0, or 1 for less than, equal, or greater than ordering +int4 TypeField::compareDependency(const TypeField &op2) const + +{ + if (offset != op2.offset) + return (offset < op2.offset) ? -1:1; + if (name != op2.name) + return (name < op2.name) ? -1:1; + if (type != op2.type) + return (type < op2.type) ? -1 : 1; // compare the pointers directly + return 0; +} + /// Encode a formal description of \b this as a \ element. /// \param encoder is the stream encoder void TypeField::encode(Encoder &encoder) const @@ -807,6 +838,113 @@ void TypeField::encode(Encoder &encoder) const encoder.closeElement(ELEM_FIELD); } +TypeBitField::TypeBitField(Decoder &decoder,TypeFactory &typegrp) + +{ + ident = -1; + uint4 elemId = decoder.openElement(ELEM_BITFIELD); + for(;;) { + uint4 attrib = decoder.getNextAttributeId(); + if (attrib == 0) break; + if (attrib == ATTRIB_NAME) + name = decoder.readString(); + else if (attrib == ATTRIB_ID) + ident = decoder.readSignedInteger(); + else if (attrib == ATTRIB_OFFSET) + bits.byteOffset = decoder.readSignedInteger(); + else if (attrib == ATTRIB_SIZE) + bits.numBits = decoder.readSignedInteger(); + else if (attrib == ATTRIB_FIRST) + bits.leastSigBit = decoder.readSignedInteger(); + } + type = typegrp.decodeType( decoder ); + if (name.size()==0) + throw LowlevelError(" name attribute must not be empty"); + if (ident < 0) + throw LowlevelError(" id attribute must not be empty"); + if (bits.byteOffset < 0 || bits.leastSigBit < 0 || bits.numBits < 0) + throw LowlevelError(" missing offset/size/first attributes"); + bits.byteSize = (bits.leastSigBit + bits.numBits + 7) / 8; + + decoder.closeElement(elemId); + bits.isBigEndian = typegrp.getArch()->getDefaultDataSpace()->isBigEndian(); +} + +/// Create the bitfield only knowing the number of bits and the position of the field within the declaration. +/// The bit offset and byte offset must be calculated later via TypeStruct::assignFieldOffsets +/// \param id is the position of the bitfield within the declaration, where contiguous bitfields share the same position +/// \param numBits is the number of bits in the bitfield +/// \param isBigEndian is \b true if the bitfield is stored in a big endian container +/// \param nm is the name of the bitfield +/// \param ct is the integer data-type associated with the bitfield +TypeBitField::TypeBitField(int4 id,int4 numBits,bool isBigEndian,const string &nm,Datatype *ct) + : name(nm), bits(0,(numBits+7)/8,0,numBits,isBigEndian) +{ + type = ct; + ident = id; +} + +/// Compare meta-data of the two bit-fields for use with TypeStruct::compare +/// \param op2 is the other TypeBitField to compare with \b this +/// \return -1, 0, or 1 for less than, equal, or greater than ordering +int4 TypeBitField::compare(const TypeBitField &op2) const + +{ + int4 res = bits.compare(op2.bits); + if (res != 0) return res; + if (name != op2.name) + return (name < op2.name) ? -1:1; + if (type->getMetatype() != op2.type->getMetatype()) + return (type->getMetatype() < op2.type->getMetatype()) ? -1 : 1; + return 0; +} + +/// Compare structure of the two bit-fields for use with TypeStruct::compareDependency. +/// \param op2 is the other TypeBitField to compare with \b this +/// \return -1, 0, or 1 for less than, equal, or greater than ordering +int4 TypeBitField::compareDependency(const TypeBitField &op2) const + +{ + int4 res = bits.compare(op2.bits); + if (res != 0) return res; + if (name != op2.name) + return (name < op2.name) ? -1:1; + if (type != op2.type) + return (type < op2.type) ? -1 : 1; // compare the pointers directly + return 0; +} + +void TypeBitField::encode(Encoder &encoder) const + +{ + encoder.openElement(ELEM_BITFIELD); + encoder.writeString(ATTRIB_NAME,name); + encoder.writeSignedInteger(ATTRIB_OFFSET, bits.byteOffset); + encoder.writeSignedInteger(ATTRIB_SIZE, bits.numBits); + encoder.writeSignedInteger(ATTRIB_FIRST, bits.leastSigBit); + type->encodeRef(encoder); + encoder.closeElement(ELEM_BITFIELD); +} + +bool BitFieldTriple::compare(const BitFieldTriple &op1,const BitFieldTriple &op2) + +{ + bool isBigEndian = op1.bitfield->bits.isBigEndian; + int4 byteOff1 = op1.offset + op1.bitfield->bits.byteOffset; + int4 byteOff2 = op2.offset + op2.bitfield->bits.byteOffset; + if (byteOff1 != byteOff2) { + if (isBigEndian) // Return least significant container + return (byteOff1 > byteOff2); // Bigger byte offset is less significant + return (byteOff1 < byteOff2); // Smaller byte offset is less significant + } + int4 lsb1 = op1.bitfield->bits.leastSigBit; + int4 lsb2 = op2.bitfield->bits.leastSigBit; + if (lsb1 != lsb2) { + return (lsb1 < lsb2); + } + return false; // fields start at the same bit +} + /// Parse a \ element for attributes of the character data-type /// \param decoder is the stream decoder /// \param typegrp is the factory owning \b this data-type @@ -1551,19 +1689,21 @@ void TypeEnum::assignValues(map &nmap,const vector &nameli TypeStruct::TypeStruct(const TypeStruct &op) : Datatype(op) { - setFields(op.field,op.size,op.alignment); + setFields(op.field,op.bitfield,op.size,op.alignment); alignSize = op.alignSize; } /// Copy a list of fields into this structure, establishing its size and alignment. /// Should only be called once when constructing the type. /// \param fd is the list of fields to copy in +/// \param bit is the list of fields, not aligned/sized to byte boundaries, to copy in /// \param newSize is the final size of the structure in bytes /// \param newAlign is the final alignment of the structure -void TypeStruct::setFields(const vector &fd,int4 newSize,int4 newAlign) +void TypeStruct::setFields(const vector &fd,const vector &bit,int4 newSize,int4 newAlign) { field = fd; + bitfield = bit; size = newSize; alignment = newAlign; if (field.size() == 1) { // A single field @@ -1597,6 +1737,86 @@ int4 TypeStruct::getFieldIter(int4 off) const return -1; } +/// If the bitfield matches the given range exactly it is returned, otherwise null is returned. +/// \param range is the given range to match +/// \return the matching bitfield or null +const TypeBitField *TypeStruct::findMatchingBitField(const BitRange &range) const + +{ + int4 min = 0; + int4 max = bitfield.size()-1; + + while(min <= max) { + int4 mid = (min + max)/2; + const TypeBitField &curfield( bitfield[mid] ); + int4 code = range.overlapTest(curfield.bits); + if (code == 0) + return &curfield; + if (code == -1) + max = mid - 1; + else if (code == 1) + min = mid + 1; + else + break; // Partial overlap + } + return (const TypeBitField *)0; +} + +/// The bitfields passed back may not be in order. +/// \param baseOffset is the byte offset of \b this structure in the root structure +/// \param res stores references to the overlapping bitfields +/// \param offset is the byte offset of the given range to find overlaps in +/// \param sz is the number of bytes in the given range +void TypeStruct::collectBitFields(int4 baseOffset,vector &res,int4 offset,int4 sz) const + +{ + vector::const_iterator iter = upper_bound(bitfield.begin(),bitfield.end(),offset,TypeBitField::compareMaxByte); + if (iter != bitfield.end()) { + BitRange range(offset,sz,(*iter).bits.isBigEndian); + for(;iter!=bitfield.end();++iter) { + const TypeBitField &curBitField(*iter); + int4 code = curBitField.bits.overlapTest(range); + if (code == 1) break; + if (code == -1) continue; + res.emplace_back(this,&curBitField,baseOffset); + } + } + vector::const_iterator fiter = upper_bound(field.begin(),field.end(),offset,TypeField::compareMaxByte); + for(;fiter!=field.end();++fiter) { + const TypeField &curField(*fiter); + if (curField.offset >= offset + sz) break; + if (curField.type->getMetatype() != TYPE_STRUCT) continue; + if (!curField.type->hasBitfields()) continue; + ((TypeStruct *)curField.type)->collectBitFields(baseOffset + curField.offset,res,offset-curField.offset,sz); // Recurse into nested structure + } +} + +bool TypeStruct::hasBitFieldsInRange(int4 offset,int4 sz) const + +{ + vector::const_iterator iter = upper_bound(bitfield.begin(),bitfield.end(),offset,TypeBitField::compareMaxByte); + if (iter != bitfield.end()) { + BitRange range(offset,sz,(*iter).bits.isBigEndian); + for(;iter!=bitfield.end();++iter) { + const TypeBitField &curBitField(*iter); + int4 code = curBitField.bits.overlapTest(range); + if (code == 1) break; + if (code == -1) continue; + return true; + } + } + vector::const_iterator fiter = upper_bound(field.begin(),field.end(),offset,TypeField::compareMaxByte); + for(;fiter!=field.end();++fiter) { + const TypeField &curField(*fiter); + if (curField.offset >= offset + sz) break; + if (curField.type->getMetatype() != TYPE_STRUCT) continue; + if (!curField.type->hasBitfields()) continue; + if (((const TypeStruct *)curField.type)->hasBitFieldsInRange(offset - curField.offset, sz)) // Recurse into nested structure + return true; + } + return false; +} + /// The field returned may or may not contain the offset. If there are no fields /// that occur earlier than the offset, return -1. /// \param off is the given offset @@ -1751,15 +1971,21 @@ int4 TypeStruct::compare(const Datatype &op,int4 level) const iter2 = ts->field.begin(); // Test only the name and first level metatype first while(iter1 != field.end()) { - if ((*iter1).offset != (*iter2).offset) - return ((*iter1).offset < (*iter2).offset) ? -1:1; - if ((*iter1).name != (*iter2).name) - return ((*iter1).name < (*iter2).name) ? -1:1; - if ((*iter1).type->getMetatype() != (*iter2).type->getMetatype()) - return ((*iter1).type->getMetatype() < (*iter2).type->getMetatype()) ? -1 : 1; + int4 cmp = (*iter1).compare(*iter2); + if (cmp != 0) return cmp; ++iter1; ++iter2; } + if (bitfield.size() != ts->bitfield.size()) return (ts->bitfield.size()-bitfield.size()); + vector::const_iterator iter3,iter4; + iter3 = bitfield.begin(); + iter4 = ts->bitfield.begin(); + while(iter3 != bitfield.end()) { + int4 cmp = (*iter3).compare(*iter4); + if (cmp != 0) return cmp; + ++iter3; + ++iter4; + } level -= 1; if (level < 0) { if (id == op.getId()) return 0; @@ -1776,6 +2002,16 @@ int4 TypeStruct::compare(const Datatype &op,int4 level) const ++iter1; ++iter2; } + iter3 = bitfield.begin(); + iter4 = ts->bitfield.begin(); + while(iter3 != bitfield.end()) { + if ((*iter3).type != (*iter4).type) { + int4 c = (*iter3).type->compare( *(*iter4).type, level ); + if (c != 0) return c; + } + ++iter3; + ++iter4; + } return 0; } @@ -1792,17 +2028,21 @@ int4 TypeStruct::compareDependency(const Datatype &op) const iter2 = ts->field.begin(); // Test only the name and first level metatype first while(iter1 != field.end()) { - if ((*iter1).offset != (*iter2).offset) - return ((*iter1).offset < (*iter2).offset) ? -1:1; - if ((*iter1).name != (*iter2).name) - return ((*iter1).name < (*iter2).name) ? -1:1; - Datatype *fld1 = (*iter1).type; - Datatype *fld2 = (*iter2).type; - if (fld1 != fld2) - return (fld1 < fld2) ? -1 : 1; // compare the pointers directly + int4 cmp = (*iter1).compareDependency(*iter2); + if (cmp != 0) return cmp; ++iter1; ++iter2; } + if (bitfield.size() != ts->bitfield.size()) return (ts->bitfield.size()-bitfield.size()); + vector::const_iterator iter3,iter4; + iter3 = bitfield.begin(); + iter4 = ts->bitfield.begin(); + while(iter3 != bitfield.end()) { + int4 cmp = (*iter3).compareDependency(*iter4); + if (cmp != 0) return cmp; + ++iter3; + ++iter4; + } return 0; } @@ -1815,13 +2055,123 @@ void TypeStruct::encode(Encoder &encoder) const } encoder.openElement(ELEM_TYPE); encodeBasic(metatype,alignment,encoder); - vector::const_iterator iter; - for(iter=field.begin();iter!=field.end();++iter) { - (*iter).encode(encoder); + vector::const_iterator iter1 = field.begin(); + vector::const_iterator iter2 = bitfield.begin(); + while(iter1 != field.end() && iter2 != bitfield.end()) { + if ((*iter1).offset < (*iter2).bits.byteOffset) { + (*iter1).encode(encoder); + ++iter1; + } + else { + (*iter2).encode(encoder); + ++iter2; + } } + for(;iter1!=field.end();++iter1) + (*iter1).encode(encoder); + for(;iter2!=bitfield.end();++iter2) + (*iter2).encode(encoder); encoder.closeElement(ELEM_TYPE); } +/// \brief Decode a single field, check for errors, and accumulate size and alignment +/// +/// Make sure the field fits, doesn't overlap other fields, and has a sensible name and data-type. +/// \param decoder is the stream decoder +/// \param typegrp is the TypeFactory to decode field data-types +/// \param accum contains the accumulated size and alignment seen over multiple fields +void TypeStruct::decodeField(Decoder &decoder,TypeFactory &typegrp,FieldAccum &accum) + +{ + field.emplace_back(decoder,typegrp); + TypeField &curField(field.back()); + if (curField.type == (Datatype *)0 || curField.type->getMetatype() == TYPE_VOID) + throw LowlevelError("Bad field data-type for structure: "+getName()); + if (curField.name.size() == 0) + throw LowlevelError("Bad field name for structure: "+getName()); + if (curField.offset < accum.lastOff) + throw LowlevelError("Fields are out of order"); + if (curField.offset < accum.calcSize) { + ostringstream s; + if (accum.warning.empty()) { + s << "Struct \"" << name << "\": ignoring overlapping field \"" << curField.name << "\""; + } + else { + s << "Struct \"" << name << "\": ignoring multiple overlapping fields"; + } + accum.warning = s.str(); + field.pop_back(); // Throw out the overlapping field + return; + } + if (curField.type->hasBitfields()) + flags |= has_bitfields; + accum.lastOff = curField.offset; + accum.calcSize = curField.offset + curField.type->getSize(); + if (accum.calcSize > size) { + ostringstream s; + s << "Field " << curField.name << " does not fit in structure " + name; + throw LowlevelError(s.str()); + } + int4 curAlign = curField.type->getAlignment(); + if (curAlign > accum.calcAlign) + accum.calcAlign = curAlign; +} + +/// \brief Decode a single bit-field, check for errors, and accumulate size and alignment +/// +/// Make sure the bit-field fits, doesn't overlap other fields or bit-fields, and has a sensible name and data-type. +/// \param decoder is the stream decoder +/// \param typegrp is the TypeFactory to decode field data-types +/// \param accum contains the accumulated size and alignment seen over multiple fields +void TypeStruct::decodeBitField(Decoder &decoder,TypeFactory &typegrp,FieldAccum &accum) + +{ + bitfield.emplace_back(decoder,typegrp); + TypeBitField &curBitField(bitfield.back()); + if (curBitField.name.size() == 0) + throw LowlevelError("Bad bitfield name for structure: "+getName()); + if (curBitField.type == (Datatype *)0) + throw LowlevelError("Bad bitfield data-type for bitfield \""+bitfield.back().name+"\" in structure: "+getName()); + type_metatype meta = curBitField.type->getMetatype(); + if (meta != TYPE_INT && meta != TYPE_UINT && meta != TYPE_BOOL && meta != TYPE_ENUM_INT && meta != TYPE_ENUM_UINT) + throw LowlevelError("Non integer data-type for bitfield \""+bitfield.back().name+"\" in structure: "+getName()); + if (curBitField.bits.byteOffset < accum.lastOff) + throw LowlevelError("Bitfields are out of order in structure: "+getName()); + if (curBitField.bits.byteOffset < accum.calcSize) { + if (bitfield.size() < 2 || bitfield[bitfield.size()-2].bits.overlapTest(curBitField.bits) != -1) { + ostringstream s; + if (accum.warning.empty()) { + s << "Struct \"" << name << "\": ignoring overlapping bit field \"" << curBitField.name << "\""; + } + else { + s << "Struct \"" << name << "\": ignoring multiple overlapping fields"; + } + accum.warning = s.str(); + bitfield.pop_back(); // Throw out the overlapping field + return; + } + } + accum.lastOff = curBitField.bits.byteOffset; + accum.calcSize = curBitField.bits.byteOffset + curBitField.bits.byteSize; + if (accum.calcSize > size) { + ostringstream s; + s << "Bitfield " << curBitField.name << " does not fit in structure " + name; + throw LowlevelError(s.str()); + } + if (curBitField.bits.isByteRange()) { + curBitField.bits.minimizeContainer(); + Datatype *dt = curBitField.type; + if (dt->getSize() != curBitField.bits.byteSize) { + type_metatype meta = dt->getMetatype(); + if (meta != TYPE_INT && meta != TYPE_UINT) + meta = TYPE_UNKNOWN; + dt = typegrp.getBase(curBitField.bits.byteSize, meta); + } + field.emplace_back(curBitField.bits.byteOffset,curBitField.bits.byteOffset,curBitField.name,dt); + bitfield.pop_back(); // Remove from bitfield list + } +} + /// Read children of the structure element describing each field. Alignment is calculated from fields unless /// the \b alignment field is already >0. The fields must be in order, fit within the \b size field, have a /// valid name, and have a valid data-type, or an exception is thrown. Any fields that overlap their previous @@ -1832,54 +2182,34 @@ void TypeStruct::encode(Encoder &encoder) const string TypeStruct::decodeFields(Decoder &decoder,TypeFactory &typegrp) { - int4 calcAlign = 1; - int4 calcSize = 0; - int4 lastOff = -1; - string warning; - while(decoder.peekElement() != 0) { - field.emplace_back(decoder,typegrp); - TypeField &curField(field.back()); - if (curField.type == (Datatype *)0 || curField.type->getMetatype() == TYPE_VOID) - throw LowlevelError("Bad field data-type for structure: "+getName()); - if (curField.name.size() == 0) - throw LowlevelError("Bad field name for structure: "+getName()); - if (curField.offset < lastOff) - throw LowlevelError("Fields are out of order"); - lastOff = curField.offset; - if (curField.offset < calcSize) { - ostringstream s; - if (warning.empty()) { - s << "Struct \"" << name << "\": ignoring overlapping field \"" << curField.name << "\""; - } - else { - s << "Struct \"" << name << "\": ignoring multiple overlapping fields"; - } - warning = s.str(); - field.pop_back(); // Throw out the overlapping field - continue; - } - calcSize = curField.offset + curField.type->getSize(); - if (calcSize > size) { - ostringstream s; - s << "Field " << curField.name << " does not fit in structure " + name; - throw LowlevelError(s.str()); - } - int4 curAlign = curField.type->getAlignment(); - if (curAlign > calcAlign) - calcAlign = curAlign; + FieldAccum accum; + accum.calcAlign = 1; + accum.calcSize = 0; + accum.lastOff = -1; + for(;;) { + uint4 el = decoder.peekElement(); + if (el == 0) break; + if (el == ELEM_FIELD) + decodeField(decoder,typegrp,accum); + else if (el == ELEM_BITFIELD) + decodeBitField(decoder,typegrp,accum); + else + throw DecoderError("Expecting or "); } if (size == 0) // Old way to indicate an incomplete structure flags |= type_incomplete; - if (field.size() > 0) + if (field.size() > 0 || bitfield.size() > 0) markComplete(); // If we have fields, mark as complete + if (bitfield.size() > 0) + flags |= has_bitfields; if (field.size() == 1) { // A single field if (field[0].type->getSize() == size) // that fills the whole structure flags |= needs_resolution; // needs special resolution } if (alignment < 1) - alignment = calcAlign; + alignment = accum.calcAlign; alignSize = calcAlignSize(size, alignment); - return warning; + return accum.warning; } /// If this method is called, the given data-type has a single component that fills it entirely @@ -1964,31 +2294,101 @@ int4 TypeStruct::findCompatibleResolve(Datatype *ct) const return -1; } -/// Assign an offset to fields in order so that each field starts at an aligned offset within the structure -/// \param list is the list of fields -/// \param newSize passes back the calculated size of the structure -/// \param newAlign passes back the calculated alignment -void TypeStruct::assignFieldOffsets(vector &list,int4 &newSize,int4 &newAlign) +/// \brief For a subset of bitfields, assign specific positions within \b this structure +/// +/// The name, data-type, and number of bits must already filled in the bitfield records. +/// This method fills in the byte offset, byte size, and starting bit. +/// The subset is determined by the bitfield \b ident, labels fields that are grouped together. +/// \param bitlist is the list of all bitfield records +/// \param pos is the first unassigned bitfield and is updated when the subset gets assigned positions +/// \param offset is the starting byte offset assigned to the subset and is updated when the subset gets assigned +/// \param newAlign is the alignment for the structure up to the current offset and is updated +void TypeStruct::assignContiguousBitfields(vector &bitlist,int4 &pos,int4 &offset,int4 &newAlign) { + int4 totalSize = 0; + int4 startInd = pos; + int4 nextBitPos = bitlist[pos].ident; + // Calculate total number of bits in contiguous bitfields + while(pos < bitlist.size() && bitlist[pos].ident == nextBitPos) { + totalSize += bitlist[pos].bits.numBits; + pos += 1; + } + // Align the offset for bitfields + int4 align = bitlist[startInd].type->getAlignment(); + if (align > newAlign) + newAlign = align; + align -= 1; + if (align > 0 && (offset & align)!=0) + offset = (offset-(offset & align) + (align+1)); + totalSize = (totalSize + 7) / 8; // Calculate number of bytes for this set of bitfields + int4 lsb = 0; + for(int4 i=startInd;i 1) { + // Big-endian bitfields are assigned least significant bit to most significant, but the data-type still + // expects the fields to be in order from most to least, so after assignment is complete, we reverse the order. + std::reverse(bitlist.begin()+startInd,bitlist.begin()+pos); + } +} + +/// \brief Assign offsets to a list of fields and bitfields that define a structure +/// +/// Assign an offset to fields in order so that each field starts at an aligned offset within the structure. +/// \param list is the list of fields +/// \param bitlist is the list of bitfields +/// \param newSize passes back the calculated size of the structure +/// \param newAlign passes back the calculated alignment +/// \param flags passes back any additional flags that should be set on the structure +void TypeStruct::assignFieldOffsets(vector &list,vector &bitlist,int4 &newSize,int4 &newAlign,uint4 &flags) + +{ + int4 nextBitPos = -1; + int4 curBitInd = -1; + if (!bitlist.empty()) { + curBitInd = 0; + nextBitPos = bitlist[curBitInd].ident; + } int4 offset = 0; newAlign = 1; - vector::iterator iter; - for(iter=list.begin();iter!=list.end();++iter) { - if ((*iter).type->getMetatype() == TYPE_VOID) + flags = 0; + for(int4 pos=0;posgetMetatype() == TYPE_VOID) throw LowlevelError("Illegal field data-type: void"); - if ((*iter).offset != -1) continue; - int4 cursize = (*iter).type->getAlignSize(); - int4 align = (*iter).type->getAlignment(); + if (curField.offset != -1) continue; + int4 cursize = curField.type->getAlignSize(); + int4 align = curField.type->getAlignment(); if (align > newAlign) newAlign = align; align -= 1; if (align > 0 && (offset & align)!=0) offset = (offset-(offset & align) + (align+1)); - (*iter).offset = offset; - (*iter).ident = offset; + curField.offset = offset; + curField.ident = offset; offset += cursize; + if (curField.type->hasBitfields()) + flags |= Datatype::has_bitfields; } + if (list.size() == nextBitPos) { + assignContiguousBitfields(bitlist, curBitInd, offset, newAlign); // Bitfields after any other fields + } + if (!bitlist.empty() && curBitInd != bitlist.size()) + throw LowlevelError("Malformed bitfield description"); + if (!bitlist.empty()) + flags |= Datatype::has_bitfields; newSize = calcAlignSize(offset, newAlign); } @@ -2330,6 +2730,11 @@ TypePartialStruct::TypePartialStruct(const TypePartialStruct &op) TypePartialStruct::TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype *strip) : Datatype(sz,1,TYPE_PARTIALSTRUCT) { + if (contain->getMetatype() == TYPE_PARTIALSTRUCT) { + TypePartialStruct *partial = (TypePartialStruct *)contain; + contain = partial->getParent(); + off += partial->getOffset(); + } #ifdef CPUI_DEBUG if (contain->getMetatype() != TYPE_STRUCT && contain->getMetatype() != TYPE_ARRAY) throw LowlevelError("Parent of partial struct is not a structure or array"); @@ -2338,6 +2743,10 @@ TypePartialStruct::TypePartialStruct(Datatype *contain,int4 off,int4 sz,Datatype stripped = strip; container = contain; offset = off; + if (container->hasBitfields()) { + if (((TypeStruct *)container)->hasBitFieldsInRange(offset, sz)) + flags |= has_bitfields; + } } /// If the parent is an array, return the element data-type. Otherwise return the \b stripped data-type. @@ -2605,6 +3014,18 @@ void TypePointerRel::printRaw(ostream &s) const s << ']'; } +Datatype *TypePointerRel::getPtrInto(int4 &off) const + +{ + type_metatype meta = ptrto->getMetatype(); + if (meta == TYPE_STRUCT || meta == TYPE_UNION) { + off = 0; + return ptrto; + } + off = offset; + return parent; +} + int4 TypePointerRel::compare(const Datatype &op,int4 level) const { @@ -3469,23 +3890,26 @@ void TypeFactory::setDisplayFormat(Datatype *ct,uint4 format) ct->setDisplayFormat(format); } +/// \brief Set fields on a TypeStruct +/// /// Set fields on a structure data-type, establishing its size, alignment, and other properties. /// This method should only be used on an incomplete structure. It will mark the structure as complete. /// \param fd is the list of fields to set +/// \param bit is the list of fields, not aligned/sized to byte boundaries, to set /// \param ot is the TypeStruct object to modify /// \param newSize is the new size of the structure in bytes /// \param newAlign is the new alignment of the structure /// \param flags are other flags to set on the structure -void TypeFactory::setFields(const vector &fd,TypeStruct *ot,int4 newSize,int4 newAlign,uint4 flags) - +void TypeFactory::setFields(const vector &fd,const vector &bit, + TypeStruct *ot,int4 newSize,int4 newAlign,uint4 flags) { if (!ot->isIncomplete()) throw LowlevelError("Can only set fields on an incomplete structure"); tree.erase(ot); - ot->setFields(fd,newSize,newAlign); + ot->setFields(fd,bit,newSize,newAlign); ot->flags &= ~(uint4)Datatype::type_incomplete; - ot->flags |= (flags & (Datatype::opaque_string | Datatype::variable_length | Datatype::type_incomplete)); + ot->flags |= (flags & (Datatype::opaque_string | Datatype::variable_length | Datatype::type_incomplete | Datatype::has_bitfields)); tree.insert(ot); recalcPointerSubmeta(ot, SUB_PTR); recalcPointerSubmeta(ot, SUB_PTR_STRUCT); @@ -3785,7 +4209,8 @@ void TypeFactory::resolveIncompleteTypedefs(void) if (dt->getMetatype() == TYPE_STRUCT) { TypeStruct *prevStruct = (TypeStruct *)dt; TypeStruct *defedStruct = (TypeStruct *)defedType; - setFields(defedStruct->field,prevStruct,defedStruct->size,defedStruct->alignment,defedStruct->flags); + setFields(defedStruct->field,defedStruct->bitfield,prevStruct,defedStruct->size,defedStruct->alignment, + defedStruct->flags); iter = incompleteTypedef.erase(iter); } else if (dt->getMetatype() == TYPE_UNION) { @@ -4078,6 +4503,21 @@ TypePointer *TypeFactory::resizePointer(TypePointer *ptr,int4 newSize) return (TypePointer *) findAdd(tmp); } +/// \param ct is the integer data-type +/// \param newSize is the size needed +/// \return the correctly sized variant of the data-type +Datatype *TypeFactory::resizeInteger(Datatype *ct,int4 newSize) + +{ + if (newSize == ct->getSize()) return ct; + type_metatype meta = ct->getMetatype(); + if (meta != TYPE_INT && meta != TYPE_UINT) + meta = TYPE_UINT; + if (ct->isCharPrint()) + return getBase(newSize, meta); + return getBaseNoChar(newSize, meta); +} + /// Drill down into nested data-types until we get to a data-type that exactly matches the /// given offset and size, and return this data-type. Any \e union data-type encountered /// terminates the process and a partial union data-type is constructed and returned. @@ -4108,7 +4548,8 @@ Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size) } while(ct != (Datatype *)0); if (lastType != (Datatype *)0) { // If we reach here, lastType is bigger than size - if (lastType->getMetatype() == TYPE_STRUCT || lastType->getMetatype() == TYPE_ARRAY) + type_metatype meta = lastType->getMetatype(); + if (meta == TYPE_STRUCT || meta == TYPE_ARRAY || meta == TYPE_PARTIALSTRUCT) return getTypePartialStruct(lastType, lastOff, size); else if (lastType->isEnumType() && !lastType->hasStripped()) return getTypePartialEnum((TypeEnum *)lastType, lastOff, size); @@ -4116,6 +4557,36 @@ Datatype *TypeFactory::getExactPiece(Datatype *ct,int4 offset,int4 size) return (Datatype *)0; } +/// \brief Assign fields to a struct data-type, establishing the size and alignment +/// +/// Offsets for both fields and bitfields are assigned. Size and alignment are calculated. +/// \param ct is the struct data-type +/// \param fd is the list of fields +/// \param bit is the list of bitfields +void TypeFactory::assignRawFields(TypeStruct *ct,vector &fd,vector &bit) + +{ + int4 newSize; + int4 newAlign; + uint4 flags; + TypeStruct::assignFieldOffsets(fd,bit,newSize,newAlign,flags); + glb->types->setFields(fd,bit,ct,newSize,newAlign,flags); +} + +/// \brief Assign fields to a union data-type, establishing the size and alignment +/// +/// Field offsets are assigned. Size and alignment are calculated. +/// \param ct is the union data-type +/// \param fd is the list of fields +void TypeFactory::assignRawFields(TypeUnion *ct,vector &fd) + +{ + int4 newSize; + int4 newAlign; + TypeUnion::assignFieldOffsets(fd,newSize,newAlign,ct); + glb->types->setFields(fd,ct,newSize,newAlign,0); +} + /// The indicated Datatype object is removed from this container. /// Indirect references (via TypeArray TypeStruct etc.) are not affected /// \param ct is the data-type to destroy @@ -4298,7 +4769,8 @@ Datatype *TypeFactory::decodeTypedef(Decoder &decoder) TypeStruct *prevStruct = (TypeStruct *)prev; TypeStruct *defedStruct = (TypeStruct *)defedType; if (prevStruct->field.size() != defedStruct->field.size()) - setFields(defedStruct->field,prevStruct,defedStruct->size,defedStruct->alignment,defedStruct->flags); + setFields(defedStruct->field,defedStruct->bitfield,prevStruct,defedStruct->size,defedStruct->alignment, + defedStruct->flags); } else { TypeUnion *prevUnion = (TypeUnion *)prev; @@ -4352,7 +4824,7 @@ Datatype* TypeFactory::decodeStruct(Decoder &decoder,bool forcecore) throw LowlevelError("Redefinition of structure: " + ts.name); } else { // If structure is a placeholder stub - setFields(ts.field,(TypeStruct*)ct,ts.size,ts.alignment,ts.flags); // Define structure now by copying fields + setFields(ts.field,ts.bitfield,(TypeStruct*)ct,ts.size,ts.alignment,ts.flags); // Define structure now by copying fields } if (!warning.empty()) insertWarning(ct, warning); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh index 92d4882b25..5a2edb37b9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/type.hh @@ -68,6 +68,7 @@ extern ElementId ELEM_TYPEREF; ///< Marshaling element \ //extern ElementId ELEM_USE_MS_CONVENTION; ///< Marshaling element \ extern ElementId ELEM_WCHAR_SIZE; ///< Marshaling element \ //extern ElementId ELEM_ZERO_LENGTH_BOUNDARY; ///< Marshaling element \ +extern ElementId ELEM_BITFIELD; ///< Marshaling element \ /// Print a hex dump of a data buffer to stream extern void print_data(ostream &s,uint1 *buffer,int4 size,const Address &baseaddr); @@ -155,6 +156,7 @@ extern type_class metatype2typeclass(type_metatype meta); class Architecture; // Forward declarations class PcodeOp; class Scope; +class TypeStruct; class TypeFactory; class TypeField; struct DatatypeCompare; @@ -182,7 +184,8 @@ protected: force_format = 0x7000, ///< 3-bits encoding display format, 0=none, 1=hex, 2=dec, 3=oct, 4=bin, 5=char truncate_bigendian = 0x8000, ///< Pointer can be truncated and is big endian pointer_to_array = 0x10000, ///< Data-type is a pointer to an array - warning_issued = 0x20000 ///< Data-type has an associated \e warning string + warning_issued = 0x20000, ///< Data-type has an associated \e warning string + has_bitfields = 0x40000 ///< Data-type contains bitfields }; friend class TypeFactory; friend struct DatatypeCompare; @@ -230,6 +233,7 @@ public: bool isIncomplete(void) const { return (flags & type_incomplete)!=0; } ///< Is \b this an incompletely defined data-type bool needsResolution(void) const { return (flags & needs_resolution)!=0; } ///< Is \b this a union or a pointer to union bool hasWarning(void) const { return (flags & warning_issued)!=0; } ///< Has a \e warning been issued about \b this data-type + bool hasBitfields(void) const { return (flags & has_bitfields)!=0; } ///< Return \b true if \b this contains/overlaps bitfields uint4 getInheritable(void) const { return (flags & coretype); } ///< Get properties pointers inherit uint4 getDisplayFormat(void) const; ///< Get the display format for constants with \b this data-type type_metatype getMetatype(void) const { return metatype; } ///< Get the type \b meta-type @@ -266,6 +270,14 @@ public: /// \return the i-th component sub-type virtual Datatype *getDepend(int4 index) const { return (Datatype *)0; } + /// \brief If \b this is a pointer, return the large data-type \b this points into + /// + /// If \b this is not a pointer, null is returned. For ordinary pointers, the data-type being pointed + /// at is returned. For a relative pointer, the innermost structured data-type is returned and the offset passed back. + /// \param off is used to pass back any offset + /// \return the data-type being pointed into or null + virtual Datatype *getPtrInto(int4 &off) const { return (Datatype *)0; } + /// \brief Print (part of) the name of \b this data-type as short prefix for a label /// /// This is used for building variable names to give some indication of the variable's underlying data-type @@ -292,14 +304,50 @@ public: /// \brief A field within a structure or union class TypeField { public: - int4 ident; ///< Id for identifying \b this within its containing structure or union + int4 ident; ///< Identifier of \b this within its containing structure or union int4 offset; ///< Offset (into containing structure or union) of subfield string name; ///< Name of subfield Datatype *type; ///< Data-type of subfield TypeField(Decoder &decoder,TypeFactory &typegrp); ///< Restore \b this field from a stream TypeField(int4 id,int4 off,const string &nm,Datatype *ct) { ident=id; offset=off; name=nm; type=ct; } ///< Construct from components - bool operator<(const TypeField &op2) const { return (offset < op2.offset); } ///< Compare based on offset - void encode(Encoder &encoder) const; ///< Encode \b this field to a stream + int4 compare(const TypeField &op2) const; ///< Compare \b this with another TypeField for propagation ordering + int4 compareDependency(const TypeField &op2) const; ///< Compare \b this with another TypeField for functional equivalence + void encode(Encoder &encoder) const; ///< Encode \b this field to a stream + static bool compareMaxByte(int4 off,const TypeField &field) { ///< Compare field end-point to the given offset + return (off < field.offset + field.type->getSize()); + } +}; + +/// \brief A field within a structure that is not aligned or sized on byte boundaries +class TypeBitField { +public: + string name; ///< Name of bitfield + Datatype *type; ///< Underlying (integer) data-type + BitRange bits; ///< Description of the bitfield within its structure + int4 ident; ///< Identifier of \b this within containing structure + TypeBitField(Decoder &decoder,TypeFactory &typegrp); ///< Restore \b this bitfield from a stream + TypeBitField(int4 id,int4 numBits,bool isBigEndian,const string &nm,Datatype *ct); ///< Construct from components + int4 compare(const TypeBitField &op2) const; ///< Compare definition of \b this with another TypeBitField for propagation ordering + int4 compareDependency(const TypeBitField &op2) const; ///< Compare \b this with another TypeBitField for functional equivalence + void encode(Encoder &encoder) const; ///< Encode \b this bitfield to a stream + static bool compareMaxByte(int4 off,const TypeBitField &bitfield) { ///< Compare byte container end-point to the given offset + return (off < bitfield.bits.byteOffset + bitfield.bits.byteSize); + } +}; + +/// \brief Helper class for collecting bitfields intersecting a byte range within a (possibly nested) structure +/// +/// A bitfield description, along with its immediate container, and offset within a root container all in one record. +class BitFieldTriple { +public: + const TypeStruct *immedContainer; ///< Immediate container of the bitfield + const TypeBitField *bitfield; ///< Description of the bitfield + int4 offset; ///< Byte offset of the immediate container within parent + BitFieldTriple(const TypeStruct *contain,const TypeBitField *bits,int4 off) { + immedContainer = contain; bitfield = bits; offset = off; } ///< Constructor + + /// \brief Comparator putting bitfields in byte order, least to most significant + static bool compare(const BitFieldTriple &op1,const BitFieldTriple &op2); }; /// Compare two Datatype pointers for equivalence of their description @@ -421,6 +469,7 @@ public: virtual Datatype *getSubType(int8 off,int8 *newoff) const; virtual int4 numDepend(void) const { return 1; } virtual Datatype *getDepend(int4 index) const { return ptrto; } + virtual Datatype *getPtrInto(int4 &off) const { off=0; return ptrto; } virtual void printNameBase(ostream &s) const { s << 'p'; ptrto->printNameBase(s); } virtual int4 compare(const Datatype &op,int4 level) const; virtual int4 compareDependency(const Datatype &op) const; @@ -508,16 +557,32 @@ public: class TypeStruct : public Datatype { protected: friend class TypeFactory; - vector field; ///< The list of fields - void setFields(const vector &fd,int4 fixedSize,int4 fixedAlign); ///< Establish fields for \b this + vector field; ///< List of fields + vector bitfield; ///< List of fields not aligned/sized on byte boundaries + /// \brief Helper function for decoding TypeField objects + struct FieldAccum { + int4 lastOff; ///< Offset of last field + int4 calcSize; ///< Current accumulated size of structure + int4 calcAlign; ///< Maximum alignment seen so far + string warning; ///< Warning(s) produced during decode + }; + void setFields(const vector &fd,const vector &bit,int4 fixedSize,int4 fixedAlign); ///< Establish fields for \b this int4 getFieldIter(int4 off) const; ///< Get index into field list int4 getLowerBoundField(int4 off) const; ///< Get index of last field before or equal to given offset + void decodeField(Decoder &decoder,TypeFactory &typegrp,FieldAccum &accum); + void decodeBitField(Decoder &decoder,TypeFactory &typegrp,FieldAccum &accum); string decodeFields(Decoder &decoder,TypeFactory &typegrp); ///< Restore fields from a stream + static void assignContiguousBitfields(vector &bitlist,int4 &pos,int4 &offset,int4 &newAlign); public: TypeStruct(const TypeStruct &op); ///< Construct from another TypeStruct TypeStruct(void) : Datatype(0,-1,TYPE_STRUCT) { flags |= type_incomplete; } ///< Construct incomplete/empty TypeStruct vector::const_iterator beginField(void) const { return field.begin(); } ///< Beginning of fields vector::const_iterator endField(void) const { return field.end(); } ///< End of fields + int4 numBitFields(void) const { return bitfield.size(); } ///< Return the number of bitfields contained by \b this + const TypeBitField &getBitField(int4 i) const { return bitfield[i]; } ///< Return the i-th bitfield + const TypeBitField *findMatchingBitField(const BitRange &range) const; ///< Return bitfield matching the given bit range + void collectBitFields(int4 baseOffset,vector &res,int4 offset,int4 sz) const; ///< Collect bitfield records that overlap given range + bool hasBitFieldsInRange(int4 offset,int4 sz) const; ///< Return \b true if \b this structure has 1 or more bitfields in the given byte range virtual const TypeField *findTruncation(int8 off,int4 sz,const PcodeOp *op,int4 slot,int8 &newoff) const; virtual Datatype *getSubType(int8 off,int8 *newoff) const; virtual Datatype *nearestArrayedComponentForward(int8 off,int8 *newoff,int8 *elSize) const; @@ -532,7 +597,7 @@ public: virtual Datatype *resolveInFlow(PcodeOp *op,int4 slot); virtual Datatype* findResolve(const PcodeOp *op,int4 slot); virtual int4 findCompatibleResolve(Datatype *ct) const; - static void assignFieldOffsets(vector &list,int4 &newSize,int4 &newAlign); ///< Assign field offsets + static void assignFieldOffsets(vector &list,vector &bitlist,int4 &newSize,int4 &newAlign,uint4 &flags); static int4 scoreSingleComponent(Datatype *parent,PcodeOp *op,int4 slot); ///< Determine best type fit for given PcodeOp use }; @@ -674,6 +739,7 @@ public: /// \return the offset value in \e byte units int4 getByteOffset(void) const { return offset; } virtual void printRaw(ostream &s) const; + virtual Datatype *getPtrInto(int4 &off) const; virtual int4 compare(const Datatype &op,int4 level) const; virtual int4 compareDependency(const Datatype &op) const; virtual Datatype *clone(void) const { return new TypePointerRel(*this); } @@ -798,6 +864,8 @@ class TypeFactory { void insertWarning(Datatype *dt,string warn); ///< Register a new data-type warning with \b this factory void removeWarning(Datatype *dt); ///< Remove the warning associated with the given data-type void resolveIncompleteTypedefs(void); ///< Redefine incomplete typedefs of data-types that are now complete + void setFields(const vector &fd,const vector &bit,TypeStruct *ot,int4 newSize,int4 newAlign,uint4 flags); + void setFields(const vector &fd,TypeUnion *ot,int4 newSize,int4 newAlign,uint4 flags); ///< Set fields on a TypeUnion protected: Architecture *glb; ///< The Architecture object that owns this TypeFactory Datatype *findByIdLocal(const string &nm,uint8 id) const; ///< Search locally by name and id @@ -820,8 +888,6 @@ public: Datatype *findByName(const string &n); ///< Return type of given name Datatype *setName(Datatype *ct,const string &n); ///< Set the given types name void setDisplayFormat(Datatype *ct,uint4 format); ///< Set the display format associated with the given data-type - void setFields(const vector &fd,TypeStruct *ot,int4 newSize,int4 newAlign,uint4 flags); ///< Set fields on a TypeStruct - void setFields(const vector &fd,TypeUnion *ot,int4 newSize,int4 newAlign,uint4 flags); ///< Set fields on a TypeUnion void setPrototype(const FuncProto *fp,TypeCode *newCode,uint4 flags); ///< Set the prototype on a TypeCode void setEnumValues(const map &nmap,TypeEnum *te); ///< Set named values for an enumeration Datatype *decodeType(Decoder &decoder); ///< Restore Datatype from a stream @@ -849,7 +915,10 @@ public: TypePointerRel *getTypePointerRel(int4 sz,Datatype *parent,Datatype *ptrTo,int4 ws,int4 off,const string &nm); TypePointer *getTypePointerWithSpace(Datatype *ptrTo,AddrSpace *spc,const string &nm); TypePointer *resizePointer(TypePointer *ptr,int4 newSize); ///< Build a resized pointer based on the given pointer + Datatype *resizeInteger(Datatype *ct,int4 newSize); ///< Build a resized integer based on the given integer Datatype *getExactPiece(Datatype *ct,int4 offset,int4 size); ///< Get the data-type associated with piece of a structured data-type + void assignRawFields(TypeStruct *ct,vector &fd,vector &bit); + void assignRawFields(TypeUnion *ct,vector &fd); void destroyType(Datatype *ct); ///< Remove a data-type from \b this Datatype *concretize(Datatype *ct); ///< Convert given data-type to concrete form void dependentOrder(vector &deporder) const; ///< Place all data-types in dependency order diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc index 5197e3eefd..025b843019 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.cc @@ -102,9 +102,10 @@ void TypeOp::registerInstructions(vector &inst,TypeFactory *tlst, inst[CPUI_CPOOLREF] = new TypeOpCpoolref(tlst); inst[CPUI_NEW] = new TypeOpNew(tlst); inst[CPUI_INSERT] = new TypeOpInsert(tlst); - inst[CPUI_EXTRACT] = new TypeOpExtract(tlst); + inst[CPUI_ZPULL] = new TypeOpZpull(tlst); inst[CPUI_POPCOUNT] = new TypeOpPopcount(tlst); inst[CPUI_LZCOUNT] = new TypeOpLzcount(tlst); + inst[CPUI_SPULL] = new TypeOpSpull(tlst); } /// Change basic data-type info (signed vs unsigned) and operator names ( '>>' vs '>>>' ) @@ -521,6 +522,7 @@ Datatype *TypeOpStore::getInputCast(const PcodeOp *op,int4 slot,const CastStrate { if (slot==0) return (Datatype *)0; + if (op->doesSpecialPrinting()) return (Datatype *)0; const Varnode *pointerVn = op->getIn(1); Datatype *pointerType = pointerVn->getHighTypeReadFacing(op); Datatype *pointedToType = pointerType; @@ -2535,19 +2537,31 @@ TypeOpInsert::TypeOpInsert(TypeFactory *t) Datatype *TypeOpInsert::getInputLocal(const PcodeOp *op,int4 slot) const { - if (slot == 0) + if (slot <= 1) return tlst->getBase(op->getIn(slot)->getSize(),TYPE_UNKNOWN); return TypeOpFunc::getInputLocal(op, slot); } -TypeOpExtract::TypeOpExtract(TypeFactory *t) - : TypeOpFunc(t,CPUI_EXTRACT,"EXTRACT",TYPE_INT,TYPE_INT) +Datatype *TypeOpInsert::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const + +{ + return (Datatype *)0; // Never need casts +} + +Datatype *TypeOpInsert::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const + +{ + return op->getOut()->getHighTypeDefFacing(); +} + +TypeOpZpull::TypeOpZpull(TypeFactory *t) + : TypeOpFunc(t,CPUI_ZPULL,"ZPULL",TYPE_UINT,TYPE_INT) { opflags = PcodeOp::ternary; - behave = new OpBehavior(CPUI_EXTRACT,false); // Dummy behavior + behave = new OpBehavior(CPUI_ZPULL,false); // Dummy behavior } -Datatype *TypeOpExtract::getInputLocal(const PcodeOp *op,int4 slot) const +Datatype *TypeOpZpull::getInputLocal(const PcodeOp *op,int4 slot) const { if (slot == 0) @@ -2555,6 +2569,45 @@ Datatype *TypeOpExtract::getInputLocal(const PcodeOp *op,int4 slot) const return TypeOpFunc::getInputLocal(op, slot); } +Datatype *TypeOpZpull::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const + +{ + return (Datatype *)0; // Never need casts +} + +Datatype *TypeOpZpull::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const + +{ + return op->getOut()->getHighTypeDefFacing(); +} + +TypeOpSpull::TypeOpSpull(TypeFactory *t) + : TypeOpFunc(t,CPUI_SPULL,"SPULL",TYPE_INT,TYPE_INT) +{ + opflags = PcodeOp::ternary; + behave = new OpBehavior(CPUI_SPULL,false); // Dummy behavior +} + +Datatype *TypeOpSpull::getInputLocal(const PcodeOp *op,int4 slot) const + +{ + if (slot == 0) + return tlst->getBase(op->getIn(slot)->getSize(),TYPE_UNKNOWN); + return TypeOpFunc::getInputLocal(op, slot); +} + +Datatype *TypeOpSpull::getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const + +{ + return (Datatype *)0; // Never need casts +} + +Datatype *TypeOpSpull::getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const + +{ + return op->getOut()->getHighTypeDefFacing(); +} + TypeOpPopcount::TypeOpPopcount(TypeFactory *t) : TypeOpFunc(t,CPUI_POPCOUNT,"POPCOUNT",TYPE_INT,TYPE_UNKNOWN) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh index 90ac4ed351..5a5fd1d5a9 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/typeop.hh @@ -887,15 +887,29 @@ class TypeOpInsert : public TypeOpFunc { public: TypeOpInsert(TypeFactory *t); ///< Constructor virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; + virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opInsertOp(op); } }; -/// \brief Information about the EXTRACT op-code -class TypeOpExtract : public TypeOpFunc { +/// \brief Information about the ZPULL op-code +class TypeOpZpull : public TypeOpFunc { public: - TypeOpExtract(TypeFactory *t); ///< Constructor + TypeOpZpull(TypeFactory *t); ///< Constructor virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; - virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opExtractOp(op); } + virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opZpullOp(op); } +}; + +/// \brief Information about the SPULL op-code +class TypeOpSpull : public TypeOpFunc { +public: + TypeOpSpull(TypeFactory *t); ///< Constructor + virtual Datatype *getInputLocal(const PcodeOp *op,int4 slot) const; + virtual Datatype *getInputCast(const PcodeOp *op,int4 slot,const CastStrategy *castStrategy) const; + virtual Datatype *getOutputToken(const PcodeOp *op,CastStrategy *castStrategy) const; + virtual void push(PrintLanguage *lng,const PcodeOp *op,const PcodeOp *readOp) const { lng->opSpullOp(op); } }; /// \brief Information about the POPCOUNT op-code diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/bitfields.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/bitfields.xml new file mode 100644 index 0000000000..16fa515465 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/bitfields.xml @@ -0,0 +1,117 @@ + + + + 89 +f083e00f83fe090f9ec2c1e2078d04c5 +0200000009d0880783c61483e61f0fb6 +470883e0e009f08847080fb747086625 +7fc080cc3266894708c383fe01742183 +fe0274230fb60701c03c2f7e2c0fb607 +c0e8073c0119c0258403000083c064c3 +0fb60783e007c30fb6570889d083e01f +c0ea0583e20301d0c30fb64708c0e805 +83e003c34883ec188d04fd0800000083 +e07883ff090f9ec1c1e10783e60709c8 +09f0884424040fb644240c83e08083c8 +2a8844240c83e27f89d0c1e0070fb754 +240c6681e27fc009c2668954240c488d +7c2404e8a8fcffff4883c418c34883ec +1089fe488d7c2404e812ffffff807c24 +040079170fb644240483e0070fb65424 +0c83e21f01d04883c410c3f644240440 +750e0fb744240c66c1e80783e07febe6 +0fb644240cc0e80583e003ebd90fb617 +89d183e107b80100000080f905744983 +e278b80200000080fa68743cb8030000 +00803f0078320fb6570889d183e11fb8 +0400000080f91b741fb805000000f6c2 +6074150fb747086625803f663d00210f +94c00fb6c083c006c34883ec1089fe48 +8d7c2404e866feffff0fb644240483e0 +7fba010000003c5d74160fb744240c66 +25e03f663d80100f94c20fb6d283c202 +89d04883c410c3 + + + 0fb60701c0c0 +f8040fbec0034704c30fb647088d5001 +83e21f83e0e009d0884708c3 + + + + + + + + + + + +ptr->field3 = 2; +ptr->fieldb = val < 10; +ptr->sfield4 = val; +ptr->field5 = \(uint1\)val \+ 0x14; +ptr->field7 = 100; +return loadptr->field3; +if \(.*2.* < loadptr->sfield4\) +-\(uint4\)!loadptr->fieldb & 900 +return loadptr->field2; +return loadptr->field5 \+ loadptr->field2; +mStack_14\.fieldb = stki_a < 10; +mStack_14\.sfield4 = \(char\)stki_a \+ 1; +mStack_14\.field3 = stki_b; +mStack_14\.field2 = 1; +mStack_14\.field5 = 10; +mStack_14\.field7 = stki_c; +if \(mStack_c\.fieldb\) +uVar1 = mStack_c\.field3 \+ mStack_c\.field5; +if \(mStack_c\.sfield4 < 0\) +uVar1 = mStack_c\.field2; +uVar1 = mStack_c\.field7; +ptrcomp->field3 .= 5\) +ptrcomp->sfield4 .= -3\) +ptrcomp->fieldb\) +ptrcomp->field5 .= 0x1b\) +ptrcomp->field2 .= 0\) +ptrcomp->field7 .= 0x42\) +\(mStack_c\.field3 .= 5 .. mStack_c\.sfield4 .= -5\) +\(mStack_c\.field2 .= 0 .. mStack_c\.field7 .= 0x21\) +return ap->sfield4 \+ ap->d; +ip->field5 = ip->field5 \+ 1; + diff --git a/Ghidra/Features/Decompiler/src/decompile/datatests/bitfields2.xml b/Ghidra/Features/Decompiler/src/decompile/datatests/bitfields2.xml new file mode 100644 index 0000000000..ff1c90272a --- /dev/null +++ b/Ghidra/Features/Decompiler/src/decompile/datatests/bitfields2.xml @@ -0,0 +1,148 @@ + + + + 0005110028a3000a3442000400431025 + a082000024a5001430a5001f000529c0 + 94820008000000003042f00000451025 + 3442006403e00008a482000800000000 + + + 2402000110a2000f2402000210a20012 + 00000000808200000000000028420030 + 14400015000000008082000000000000 + 30420001144000150000000003e00008 + 240203e8808200000000000000021042 + 03e00008304200078483000800000000 + 000311c23042001f00031b0230630003 + 03e00008004310218482000800000000 + 0002130203e000083042000303e00008 + 24020064 + + + 27bdffd0afbf002c2483000100031900 + 93a200182884000a3042000e00431025 + 0044102530a50007000528402403fff1 + 0043102400451025a3a2001897a20020 + 000000003042c00030c6007f34421500 + 00461025a7a2002027a400180c100000 + 000000008fbf002c0000000003e00008 + 27bd0030 + + + 27bdffd0afbf002c27a400180c100000 + 0000000083a200180000000030420001 + 1040000e0000000083a2001800000000 + 000210423042000787a3002000000000 + 000319c23063001f004310218fbf002c + 0000000003e0000827bd003083a20018 + 00000000044000040000000087a20020 + 1000fff63042007f87a2002000000000 + 000213021000fff13042000300000000 + + + 90820000000000003045000e2403000a + 10a30015240300d0304200f010430014 + 00000000808200000000000030420001 + 1440001124030d809482000800000000 + 30440f801083000e304330001060000e + 240300423042007f1043000d00000000 + 03e000082402000603e0000824020001 + 03e000082402000203e0000824020003 + 03e000082402000403e0000824020005 + 03e0000824020007 + + + 27bdffd0afbf002c27a400180c100000 + 0000000083a2001800000000304200fe + 240300ba1043000b2403002187a20020 + 000000003042307f1043000224020003 + 240200028fbf002c0000000003e00008 + 27bd00301000fffb2402000100000000 + + + 8082000000000000000211038c830004 + 03e0000800431021 + + + 8482000800000000000211c224420001 + 3042001f000211c09483000800000000 + 3063f07f0062102503e00008a4820008 + + + + + + + + + + + +ptr->field3 = 2; +ptr->sfield4 = val; +ptr->fieldb = val < 10; +ptr->field5 = val \+ 0x14; +ptr->field7 = 100; +return lp->field3; +return lp->field5 \+ lp->field2; +if \(lp->sfield4 < .*3.*\) +return lp->field2; +if \(!+lp->fieldb\) +mStack_18\.sfield4 = \(char\)stki_a \+ .*1.*; +mStack_18\.field3 = stki_b; +mStack_18\.fieldb = stki_a < 10; +mStack_18\.field2 = 1; +mStack_18\.field5 = 10; +mStack_18\.field7 = stki_c; +if \(mStack_18\.fieldb\) +uVar1 = mStack_18\.field3 \+ mStack_18\.field5; +if \(mStack_18\.sfield4 < '\\0'\) +uVar1 = mStack_18\.field2; +uVar1 = mStack_18\.field7; +ptrcomp->field3 .= 5\) +ptrcomp->sfield4 .= -3\) +ptrcomp->fieldb\) +ptrcomp->field5 .= 0x1b\) +ptrcomp->field2 .= 0\) +ptrcomp->field7 .= 0x42\) +\(mStack_18\.field3 .= 5 .. mStack_18\.sfield4 .= -5\) +\(mStack_18\.field7 .= 0x21 .. mStack_18\.field2 .= 0\) +return ap->sfield4 \+ ap->d; +ip->field5 = ip->field5 \+ 1; + diff --git a/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml b/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml index a26cb55afb..a3a301b715 100644 --- a/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml +++ b/Ghidra/Features/Decompiler/src/main/doc/decompileplugin.xml @@ -39,7 +39,7 @@ plug-in enabled, but if it is disabled for some reason, it can be enabled from within a Code Browser by selecting the - File -> Configure + File -> Configure... menu option, then clicking on the Configure link under the Ghidra Core section and checking the box next to @@ -2059,6 +2059,34 @@ Splitting Structure Accesses. + + Bitfields + + Bitfields are fully supported. Bitfields are a special type of integer field, defined within a structure data-type, + that does not respect byte boundaries. A bitfield can be as small as a single bit, and multiple bitfields can be packed + into a single byte or word of the structure. The Decompiler does not infer bitfields, but propagates them into + the function from structures that explicitly define them. + + + The Decompiler will attempt to display reads and writes to individual bitfields + using the field's name and a standard structure access operator, like '.' or '->'. This hides the + longer sequence of byte-based operations that the underlying code is using to isolate the individual bitfield + from its neighbors. + + + iVar1 = ptr->bit1; // iVar1 = ((*ptr) >> 3) & 7 + struct1.bit1 = i; // struct1._0_1 = ((i & 7) << 3) | (struct1._0_1 & 0xc7) + + + + + If a code sequence sets multiple bitfields simultaneously, the Decompiler will display the sequence + using multiple assignment statements, one for each bitfield affected. If bitfields are read simultaneously, the + Decompiler will display each bitfield as a separate element of the expression. If a code sequence does not seem to respect + the boundaries of individual bitfields, the Decompiler will revert to using auto-generated, byte-based, + field tokens to represent the sequence. + + Enumeration @@ -2589,6 +2617,7 @@ DEFAULT - for basic or no information + AI - for information that is produced with AI assistance ANALYSIS - for information derived by an Analyzer IMPORTED - for information imported from an external source USER_DEFINED - for information set by the user @@ -3054,7 +3083,9 @@ this to off lets the user see the dead code, which is typically demarcated by the control-flow structure: - if (false) { ... } + + if (false) { ... } + @@ -3127,7 +3158,9 @@ rendered using a standard for loop header that contains an initializer statement, condition, and iterating statement. - for (iVar2 = 10; iVar2 < len; iVar2 = iVar2 + 1) { ... + + for (iVar2 = 10; iVar2 < len; iVar2 = iVar2 + 1) { ... + @@ -3158,6 +3191,24 @@ + + Simplify bitfield access + + + When this option is active, the Decompiler attempts to identify expressions where bitfields, + as defined in structure data-types, are either being written to or read from. + Any sequence of logical operations that access an individual bitfield are collapsed into a + normal field access, displaying the bitfield's name. + + + uVar1 = my1._3_1 << 2 & 3; // Isolating a 2-bit field 'mode' within variable 'my1' + ... + uVar1 = my1.mode; // The same assignment after simplification + + + + + Simplify extended integer operations @@ -4962,7 +5013,7 @@ Edit Signature Override - Edit the existing overriding function prototype to the called function under the cursor. + Edit the overriding function prototype applied previously to the called function under the cursor. This action can only be triggered at call sites with an existing signature override. As with the Override diff --git a/Ghidra/Features/Decompiler/src/main/doc/pcoderef.xml b/Ghidra/Features/Decompiler/src/main/doc/pcoderef.xml index 670e372c29..e609b2d803 100644 --- a/Ghidra/Features/Decompiler/src/main/doc/pcoderef.xml +++ b/Ghidra/Features/Decompiler/src/main/doc/pcoderef.xml @@ -2,7 +2,7 @@
P-Code Reference Manual - Last updated March 2, 2023 + Last updated January 16, 2026 @@ -307,7 +307,7 @@ at the destination address. The list of possible -opcodes are similar to many RISC based instruction sets. The effect of +opcodes is similar to many RISC based instruction sets. The effect of each opcode is described in detail in the following sections, and a reference table is given in . In general, the size or @@ -560,7 +560,7 @@ In this case, the offset of input0 is considered a relative offset into the indexed list of p-code operations corresponding to the translation of the current machine instruction. This allows branching within the operations forming a single instruction. For example, if -the BRANCH occurs as the pcode +the BRANCH occurs as the p-code operation with index 5 for the instruction, it can branch to operation with index 8 by specifying a constant destination “address” of 3. Negative constants can be used for backward branches. @@ -3908,26 +3908,29 @@ interpretation as a data-type changes at this point.
-The values position and size must be constants. -The least significant size bits from input1 are -inserted into input0, overwriting a range of bits of the same size, -but leaving any other bits in input0 unchanged. The least significant bit of the overwritten -range is given by position, where bits in index0 are labeled from least significant -to most significant, starting at 0. The value obtained after this overwriting is returned -as output. -Varnodes input0 and output must be the same size and are intended to be the same varnode. -The value size must be not be bigger than the varnode input1, and -size + position must not be bigger than the varnode input0. + An INSERT operation takes the least significant + size bits from input1 and inserts them into input0, overwriting + a range of bits of the same size, but leaving any other bits in input0 unchanged. -This operation is never generated as raw p-code, even though it is equivalent -to SLEIGH bitrange syntax such as input0[10,1] = input1. + The least significant bit of the overwritten range is given by position, + where bits in index0 are labeled from least significant to most significant, starting at 0. + The value obtained after this overwriting is returned + as output. Varnodes input0 and output must be the same size and are intended to be the same + varnode. The values position and size must be + constants. The value size must be not be bigger than the varnode input1, + and size + position must not be bigger than the + varnode input0. + + + This operation is never generated as raw p-code, even though it is equivalent + to SLEIGH bitrange syntax such as input0[10,1] = input1. -EXTRACT +ZPULL - +
@@ -3956,7 +3959,7 @@ to SLEIGH bitrange syntax such as input0[10,1] + @@ -3972,19 +3975,90 @@ to SLEIGH bitrange syntax such as input0[10,1]
output - Varnode result containing the extracted value.Varnode containing the extracted value as an unsigned integer.
-The values position and size must be constants. -The operation extracts size bits from input0 and returns it in output. -The position indicates the least significant bit in the range being extracted, with -the bits in input0 labeled from least to most significant, starting at 0. The varnodes input0 and output -can be different sizes, and the extracted value is zero extended into output. -The value size must not be bigger than the varnode output, and -size + position must not be bigger -than the varnode input0. + A ZPULL operation extracts size bits + from input0 and returns them as an unsigned integer value in output, zero extending the + bits to the size of output. -This operation is never generated as raw p-code, even though it is equivalent -to SLEIGH bitrange syntax such as output = input0[10,1]. + The position indicates the least significant bit in the range + being extracted, with the bits in input0 labeled from least to most significant, + starting at 0. The varnodes input0 and output can be different size. + The values position and size must be constants. + The value size must not be bigger than the varnode output, and + size + position must not be bigger + than the varnode input0. + + ZPULL is never generated as raw p-code, even though it + is equivalent to SLEIGH bitrange syntax such as + output = input0[10,1]. + +
+ +SPULL + + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParametersDescription
input0 + Varnode to extract a value from.
position(constant)Constant indicating the bit position to extract from.
size(constant)Constant indicating the number of bits to extract.
output + Varnode containing the extracted value as a signed integer
Semantic statement +
+ Cannot be explicitly coded.
+
+ + An SPULL operation extracts size bits + from input0 and returns them as a signed integer value in output. The value is sign extended + to the size of output, duplicating the most significant extracted bit. + + + The position indicates the least significant bit in the range + being extracted, with the bits in input0 labeled from least to most significant, + starting at 0. The varnodes input0 and output can be different size. + The values position and size must be constants. + The value size must not be bigger than the varnode output, and + size + position must not be bigger + than the varnode input0. + + + SPULL is never generated as raw p-code. + +
@@ -4096,7 +4170,7 @@ to SLEIGH bitrange syntax such as output = inpu SUBPIECE v0:2 - The least signficant n bytes of v0. + The least significant n bytes of v0. SUBPIECE diff --git a/Ghidra/Features/Decompiler/src/main/doc/sleigh.xml b/Ghidra/Features/Decompiler/src/main/doc/sleigh.xml index 87f15a255f..838b544f1c 100644 --- a/Ghidra/Features/Decompiler/src/main/doc/sleigh.xml +++ b/Ghidra/Features/Decompiler/src/main/doc/sleigh.xml @@ -677,7 +677,7 @@ define alignment=integer; This specifies the byte alignment of instructions within their address space. It defaults to 1 or no alignment. When disassembling an -instruction at a particular, the disassembler checks the alignment of +instruction at a particular address, the disassembler checks the alignment of the address against this value and can opt to flag an unaligned instruction as an error. @@ -837,7 +837,7 @@ Many processors define registers that either consist of a single bit or otherwise don't use an integral number of bytes. A recurring example in many processors is the status register which is further subdivided into the overflow and result flags for the arithmetic -instructions. These flags are typically have labels like ZF for the +instructions. These flags typically have labels like ZF for the zero flag or CF for the carry flag and can be considered logical registers contained within the status register. SLEIGH allows registers to be defined like this using @@ -1097,10 +1097,10 @@ We list all of the symbols that are predefined by SLEIGH. The most important of these to be aware of are inst_start -and inst_next. These are family symbols which map -in the context of particular instruction to the integer offset of -either the address of the instruction or the address of the next -instruction respectively. These are used in any relative branching +and inst_next. These are family symbols that map +to the integer offset of either the instruction's address or the next +instruction's address, depending on the context of a particular instruction. +These are used in any relative branching situation. The inst_next2 is intended for conditional skip instruction situations. The remaining symbols are rarely used. The const and unique @@ -1624,7 +1624,7 @@ field. The '&' and '|' Operators More complicated patterns are built out of logical operators. The -meaning of these are fairly straightforward. We can force two or more +meanings of these are fairly straightforward. We can force two or more constraints to be true at the same time, a logical and ‘&’, or we can require that either one constraint or another must be true, a logical or ‘|’. By using these with @@ -1933,7 +1933,7 @@ if, when all the variables are evaluated, the equation is true. :xor r1,r2 is opcode=0xcd & r1 & r2 { r1 = r1 ^ r2; } -:clr r1 is opcode=0xcd & r1 & r2=r1 { r1 = 0; } +:clr r1 is opcode=0xcd & r1 & r1=r2 { r1 = 0; } @@ -1949,10 +1949,10 @@ feature of clr from xor is that the two fields, specifying the two register inputs to xor, are equal. The easiest way to specify this special case is with the general constraint, -“r2 = r1”, as in the second +“r1 = r2”, as in the second line of the example. The SLEIGH compiler will implement this by -enumerating all the cases where r2 -equals r1, creating as many states as there are +enumerating all the cases where r1 +equals r2, creating as many states as there are registers. But the specification itself, at least, remains compact. diff --git a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerAnnotations.html b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerAnnotations.html index 59ff502054..d5fe644c28 100644 --- a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerAnnotations.html +++ b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerAnnotations.html @@ -4,7 +4,7 @@ Program Annotations Affecting the Decompiler - + @@ -710,6 +710,38 @@
+Bitfields
+ +

+ Bitfields are fully supported. Bitfields are a special type of integer field, defined within a structure data-type, + that does not respect byte boundaries. A bitfield can be as small as a single bit, and multiple bitfields can be packed + into a single byte or word of the structure. The Decompiler does not infer bitfields, but propagates them into + the function from structures that explicitly define them. +

+

+ The Decompiler will attempt to display reads and writes to individual bitfields + using the field's name and a standard structure access operator, like '.' or '->'. This hides the + longer sequence of byte-based operations that the underlying code is using to isolate the individual bitfield + from its neighbors. +

+
+
+	      iVar1 = ptr->bit1;        //  iVar1 = ((*ptr) >> 3) & 7
+	      struct1.bit1 = i;         //  struct1._0_1 = ((i & 7) << 3) | (struct1._0_1 & 0xc7)
+	    
+
+

+

+

+ If a code sequence sets multiple bitfields simultaneously, the Decompiler will display the sequence + using multiple assignment statements, one for each bitfield affected. If bitfields are read simultaneously, the + Decompiler will display each bitfield as a separate element of the expression. If a code sequence does not seem to respect + the boundaries of individual bitfields, the Decompiler will revert to using auto-generated, byte-based, + field tokens to represent the sequence. +

+
+
+
Enumeration

diff --git a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerConcepts.html b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerConcepts.html index 2255c22195..d563ea5615 100644 --- a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerConcepts.html +++ b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerConcepts.html @@ -4,7 +4,7 @@ Decompiler Concepts - + diff --git a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerIntro.html b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerIntro.html index 1880986ce7..d6d603e624 100644 --- a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerIntro.html +++ b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerIntro.html @@ -4,7 +4,7 @@ Decompiler - + diff --git a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html index 87349ea37c..7eacb317eb 100644 --- a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html +++ b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerOptions.html @@ -4,7 +4,7 @@ Decompiler Options - + @@ -206,7 +206,9 @@ by the control-flow structure:

- if (false) { ... } +
+	      if (false) { ... }
+	    

@@ -286,7 +288,9 @@ that contains an initializer statement, condition, and iterating statement.

- for (iVar2 = 10; iVar2 < len; iVar2 = iVar2 + 1) { ... +
+	      for (iVar2 = 10; iVar2 < len; iVar2 = iVar2 + 1) { ...
+	    

@@ -318,6 +322,26 @@

+Simplify bitfield access +
+
+

+ When this option is active, the Decompiler attempts to identify expressions where bitfields, + as defined in structure data-types, are either being written to or read from. + Any sequence of logical operations that access an individual bitfield are collapsed into a + normal field access, displaying the bitfield's name. +

+
+
+	      uVar1 = my1._3_1 << 2 & 3;  // Isolating a 2-bit field 'mode' within variable 'my1'
+	        ...
+              uVar1 = my1.mode;   // The same assignment after simplification
+	    
+
+

+

+
+
Simplify extended integer operations
@@ -534,16 +558,14 @@ (see Find...).

- -
- Color for Highlighting Middle-mouse Matches -
-
-

- Assign the background color used to highlight characters when highlighting using the middle-mouse button. -

-
- +
+Color for Highlighting Middle-mouse Matches +
+
+

+ Assign the background color used to highlight characters when highlighting using the middle-mouse button. +

+
Comment line indent level
diff --git a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerWindow.html b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerWindow.html index 0d22fa6583..29a80d5a11 100644 --- a/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerWindow.html +++ b/Ghidra/Features/Decompiler/src/main/help/help/topics/DecompilePlugin/DecompilerWindow.html @@ -4,7 +4,7 @@ Decompiler Window - + @@ -536,7 +536,6 @@ token, within the Decompiler window. There are actions available from the popup menu and from the keyboard to navigate to each highlighted token.

-
@@ -965,10 +964,10 @@ Go To Next/Previous Highlight

- These actions are available from the popup menu and keyboard. Only tokens highlighted from the - middle-mouse will be navigated. Shift-Comma will - go to the previous highlighted token. Shift-Period - will go to the next highlighted token. These key bindings can be changed via the + These actions are available from the popup menu and keyboard. Only tokens highlighted from the + middle-mouse will be navigated. Shift-Comma will go to the + previous highlighted token. Shift-Period will go to the + next highlighted token. These key bindings can be changed via the Tool Options Dialog.

@@ -1134,9 +1133,9 @@ Edit the overriding function prototype applied previously to the called function under the cursor.

- This action can only be triggered at call sites with an existing signature override. As with the Override - Signature command, users must select either the token representing the called function's name or the - tokens representing the function pointer at the call site. The action brings up a dialog where the user + This action can only be triggered at call sites with an existing signature override. As with the Override + Signature command, users must select either the token representing the called function's name or the + tokens representing the function pointer at the call site. The action brings up a dialog where the user can edit the current overriding function prototype.

diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangBitFieldToken.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangBitFieldToken.java new file mode 100644 index 0000000000..13faca686b --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangBitFieldToken.java @@ -0,0 +1,96 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.decompiler; + +import static ghidra.program.model.pcode.AttributeId.*; + +import ghidra.program.model.data.*; +import ghidra.program.model.pcode.*; + +public class ClangBitFieldToken extends ClangToken { + private Composite dataType; // Structure containing the bitfield + private int ident; // Identifier for the bitfield within its container + private PcodeOp op; // The op associated with the read/write of the field + + public ClangBitFieldToken(ClangNode par) { + super(par); + dataType = null; + } + + /** + * @return the structure datatype associated with this field token + */ + public DataType getDataType() { + return dataType; + } + + /** + * @return the component corresponding to the bitfield if it exists, null otherwise + */ + public DataTypeComponent getComponent() { + if (ident < 0) { + return null; + } + return dataType.getComponent(ident); + } + + @Override + public PcodeOp getPcodeOp() { + return op; + } + + @Override + public void decode(Decoder decoder, PcodeFactory pfactory) throws DecoderException { + String datatypestring = null; + long id = 0; + ident = -1; + for (;;) { + int attribId = decoder.getNextAttributeId(); + if (attribId == 0) { + break; + } + if (attribId == ATTRIB_NAME.id()) { // Name of the structure + datatypestring = decoder.readString(); + } + else if (attribId == ATTRIB_ID.id()) { + id = decoder.readUnsignedInteger(); + } + else if (attribId == ATTRIB_OPREF.id()) { + int refid = (int) decoder.readUnsignedInteger(); + op = pfactory.getOpRef(refid); + } + else if (attribId == ATTRIB_OFF.id()) { + ident = (int) decoder.readSignedInteger(); + } + } + if (datatypestring != null) { + DataType dt = pfactory.getDataTypeManager().findBaseType(datatypestring, id); + if (dt == null) { + throw new DecoderException("Cannot find data-type in "); + } + if (dt instanceof TypeDef) { + dt = ((TypeDef) dt).getBaseDataType(); + } + if (!(dt instanceof Composite)) { + throw new DecoderException("Data-type in is not a composite"); + } + dataType = (Composite) dt; + } + decoder.rewindAttributes(); + super.decode(decoder, pfactory); + } + +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangToken.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangToken.java index dddf6d3f57..e33ae50844 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangToken.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/ClangToken.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -259,6 +259,9 @@ public class ClangToken implements ClangNode { else if (node == ELEM_FIELD.id()) { token = new ClangFieldToken(par); } + else if (node == ELEM_BITFIELD.id()) { + token = new ClangBitFieldToken(par); + } else if (node == ELEM_VALUE.id()) { token = new ClangCaseToken(par); } diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java index 34ad104847..120fe0781f 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/decompiler/DecompileOptions.java @@ -202,6 +202,13 @@ public class DecompileOptions { private final static AliasBlockEnum ALIASBLOCK_OPTIONDEFAULT = AliasBlockEnum.Array; // Must match Architecture::resetDefaultsInternal private AliasBlockEnum aliasBlock; + private final static String BITFIELD_OPTIONSTRING = "Analysis.Simplify bitfield access"; + private final static String BITFIELD_OPTIONDESCRIPTION = + "If set, expressions that insert to or pull from individual bitfields will be displayed " + + "as a normal member field access."; + private final static boolean BITFIELD_OPTIONDEFAULT = true; + private boolean bitfieldAccess; + private final static String CONVENTION_OPTIONSTRING = "Display.Print calling convention name"; private final static String CONVENTION_OPTIONDESCRIPTION = "If set, the names of calling conventions (when they differ " + @@ -499,6 +506,7 @@ public class DecompileOptions { nullToken = NULLTOKEN_OPTIONDEFAULT; inplaceTokens = INPLACEOP_OPTIONDEFAULT; aliasBlock = ALIASBLOCK_OPTIONDEFAULT; + bitfieldAccess = BITFIELD_OPTIONDEFAULT; conventionPrint = CONVENTION_OPTIONDEFAULT; noCastPrint = NOCAST_OPTIONDEFAULT; braceFunction = BRACEFUNCTION_OPTIONDEFAULT; @@ -563,6 +571,7 @@ public class DecompileOptions { nullToken = opt.getBoolean(NULLTOKEN_OPTIONSTRING, NULLTOKEN_OPTIONDEFAULT); inplaceTokens = opt.getBoolean(INPLACEOP_OPTIONSTRING, INPLACEOP_OPTIONDEFAULT); aliasBlock = opt.getEnum(ALIASBLOCK_OPTIONSTRING, ALIASBLOCK_OPTIONDEFAULT); + bitfieldAccess = opt.getBoolean(BITFIELD_OPTIONSTRING, BITFIELD_OPTIONDEFAULT); conventionPrint = opt.getBoolean(CONVENTION_OPTIONSTRING, CONVENTION_OPTIONDEFAULT); noCastPrint = opt.getBoolean(NOCAST_OPTIONSTRING, NOCAST_OPTIONDEFAULT); braceFunction = opt.getEnum(BRACEFUNCTION_OPTIONSTRING, BRACEFUNCTION_OPTIONDEFAULT); @@ -693,6 +702,9 @@ public class DecompileOptions { opt.registerOption(ALIASBLOCK_OPTIONSTRING, ALIASBLOCK_OPTIONDEFAULT, new HelpLocation(HelpTopics.DECOMPILER, "AnalysisAliasBlocking"), ALIASBLOCK_OPTIONDESCRIPTION); + opt.registerOption(BITFIELD_OPTIONSTRING, BITFIELD_OPTIONDEFAULT, + new HelpLocation(HelpTopics.DECOMPILER, "AnalysisBitfields"), + BITFIELD_OPTIONDESCRIPTION); opt.registerOption(CONVENTION_OPTIONSTRING, CONVENTION_OPTIONDEFAULT, new HelpLocation(HelpTopics.DECOMPILER, "DisplayConvention"), CONVENTION_OPTIONDESCRIPTION); @@ -892,6 +904,10 @@ public class DecompileOptions { if (aliasBlock != ALIASBLOCK_OPTIONDEFAULT) { appendOption(encoder, ELEM_ALIASBLOCK, aliasBlock.getOptionString(), "", ""); } + if (bitfieldAccess != BITFIELD_OPTIONDEFAULT) { + appendOption(encoder, ELEM_CURRENTACTION, "bitfields", bitfieldAccess ? "on" : "off", + ""); + } if (conventionPrint != CONVENTION_OPTIONDEFAULT) { appendOption(encoder, ELEM_CONVENTIONPRINTING, conventionPrint ? "on" : "off", "", ""); } @@ -1664,6 +1680,22 @@ public class DecompileOptions { this.aliasBlock = aliasBlock; } + /** + * {@return true if expressions accessing bitfields are simplified.} + * @see #BITFIELD_OPTIONDESCRIPTION + */ + public boolean isBitfieldAccess() { + return bitfieldAccess; + } + + /** + * Set whether expressions accessing bitfields are simplified. + * @param bitfield true to enable simplification of expressions + */ + public void setBitfieldAccess(boolean bitfield) { + this.bitfieldAccess = bitfield; + } + /** * {@return number of characters per indent level.} */ diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java index 37cf83d3ba..9314431ccd 100644 --- a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/DecompilerProvider.java @@ -950,6 +950,9 @@ public class DecompilerProvider extends NavigatableComponentProviderAdapter RenameFieldAction renameFieldAction = new RenameFieldAction(); setGroupInfo(renameFieldAction, variableGroup, subGroupPosition++); + RenameBitFieldAction renameBitFieldAction = new RenameBitFieldAction(); + setGroupInfo(renameBitFieldAction, variableGroup, subGroupPosition++); + ForceUnionAction forceUnionAction = new ForceUnionAction(); setGroupInfo(forceUnionAction, variableGroup, subGroupPosition++); @@ -1148,6 +1151,7 @@ public class DecompilerProvider extends NavigatableComponentProviderAdapter addLocalAction(renameLocalAction); addLocalAction(renameGlobalAction); addLocalAction(renameFieldAction); + addLocalAction(renameBitFieldAction); addLocalAction(forceUnionAction); addLocalAction(setSecondaryHighlightAction); addLocalAction(setSecondaryHighlightColorChooserAction); diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameBitFieldAction.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameBitFieldAction.java new file mode 100644 index 0000000000..9cdeac2335 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameBitFieldAction.java @@ -0,0 +1,61 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import java.awt.event.KeyEvent; + +import docking.action.KeyBindingData; +import docking.action.MenuData; +import ghidra.app.decompiler.ClangBitFieldToken; +import ghidra.app.decompiler.ClangToken; +import ghidra.app.plugin.core.decompile.DecompilerActionContext; +import ghidra.app.util.HelpTopics; +import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.listing.Function; +import ghidra.util.HelpLocation; +import ghidra.util.UndefinedFunction; + +public class RenameBitFieldAction extends AbstractDecompilerAction { + + public RenameBitFieldAction() { + super("Rename BitField"); + setHelpLocation(new HelpLocation(HelpTopics.DECOMPILER, "ActionRenameField")); + setPopupMenuData(new MenuData(new String[] { "Rename BitField" }, "Decompile")); + setKeyBindingData(new KeyBindingData(KeyEvent.VK_L, 0)); + } + + @Override + protected boolean isEnabledForDecompilerContext(DecompilerActionContext context) { + Function function = context.getFunction(); + if (function == null || function instanceof UndefinedFunction) { + return false; + } + + ClangToken tokenAtCursor = context.getTokenAtCursor(); + return (tokenAtCursor instanceof ClangBitFieldToken); + } + + @Override + protected void decompilerActionPerformed(DecompilerActionContext context) { + PluginTool tool = context.getTool(); + ClangBitFieldToken tokenAtCursor = (ClangBitFieldToken) context.getTokenAtCursor(); + + RenameTask nameTask = new RenameStructBitFieldTask(tool, context.getProgram(), + context.getComponentProvider(), tokenAtCursor); + nameTask.runTask(true); + } + +} diff --git a/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructBitFieldTask.java b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructBitFieldTask.java new file mode 100644 index 0000000000..4605c9fe29 --- /dev/null +++ b/Ghidra/Features/Decompiler/src/main/java/ghidra/app/plugin/core/decompile/actions/RenameStructBitFieldTask.java @@ -0,0 +1,69 @@ +/* ### + * IP: GHIDRA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package ghidra.app.plugin.core.decompile.actions; + +import ghidra.app.decompiler.ClangBitFieldToken; +import ghidra.app.plugin.core.decompile.DecompilerProvider; +import ghidra.framework.plugintool.PluginTool; +import ghidra.program.model.data.Composite; +import ghidra.program.model.data.DataTypeComponent; +import ghidra.program.model.listing.Program; +import ghidra.util.exception.DuplicateNameException; +import ghidra.util.exception.InvalidInputException; + +public class RenameStructBitFieldTask extends RenameTask { + + private ClangBitFieldToken token; + private DataTypeComponent component; + + public RenameStructBitFieldTask(PluginTool tool, Program program, DecompilerProvider provider, + ClangBitFieldToken token) { + super(tool, program, provider, token, token.getText()); + this.token = token; + } + + @Override + public String getTransactionName() { + return "Rename Structure BitField"; + } + + @Override + public boolean isValid(String newNm) { + newName = newNm; + component = token.getComponent(); + if (component == null) { + return false; + } + Composite structure = (Composite) token.getDataType(); + DataTypeComponent[] comp = structure.getDefinedComponents(); + for (DataTypeComponent element : comp) { + String fieldname = element.getFieldName(); + if (fieldname == null) { + continue; + } + if (fieldname.equals(newName)) { + errorMsg = "Duplicate Field Name"; + return false; + } + } + return true; + } + + @Override + public void commit() throws DuplicateNameException, InvalidInputException { + component.setFieldName(newName); + } +} diff --git a/Ghidra/Framework/Emulation/src/main/java/ghidra/pcode/opbehavior/OpBehaviorFactory.java b/Ghidra/Framework/Emulation/src/main/java/ghidra/pcode/opbehavior/OpBehaviorFactory.java index c68028bf1b..5d638d5d7d 100644 --- a/Ghidra/Framework/Emulation/src/main/java/ghidra/pcode/opbehavior/OpBehaviorFactory.java +++ b/Ghidra/Framework/Emulation/src/main/java/ghidra/pcode/opbehavior/OpBehaviorFactory.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -102,9 +102,10 @@ public class OpBehaviorFactory { opBehaviorMap.put(PcodeOp.CPOOLREF, new SpecialOpBehavior(PcodeOp.CPOOLREF)); opBehaviorMap.put(PcodeOp.NEW, new SpecialOpBehavior(PcodeOp.NEW)); opBehaviorMap.put(PcodeOp.INSERT, new SpecialOpBehavior(PcodeOp.INSERT)); - opBehaviorMap.put(PcodeOp.EXTRACT, new SpecialOpBehavior(PcodeOp.EXTRACT)); + opBehaviorMap.put(PcodeOp.ZPULL, new SpecialOpBehavior(PcodeOp.ZPULL)); opBehaviorMap.put(PcodeOp.POPCOUNT, new OpBehaviorPopcount()); opBehaviorMap.put(PcodeOp.LZCOUNT, new OpBehaviorLzcount()); + opBehaviorMap.put(PcodeOp.SPULL, new SpecialOpBehavior(PcodeOp.SPULL)); } private OpBehaviorFactory() { diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/pcodeCPort/opcodes/OpCode.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/pcodeCPort/opcodes/OpCode.java index 53ae1f5fbb..db24f5488b 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/pcodeCPort/opcodes/OpCode.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/pcodeCPort/opcodes/OpCode.java @@ -118,9 +118,10 @@ public enum OpCode { CPUI_CPOOLREF("CPOOLREF"), CPUI_NEW("NEW"), CPUI_INSERT("INSERT"), - CPUI_EXTRACT("EXTRACT"), + CPUI_ZPULL("ZPULL"), CPUI_POPCOUNT("POPCOUNT"), CPUI_LZCOUNT("LZCOUNT"), + CPUI_SPULL("SPULL"), CPUI_MAX(null); diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java index 5423b73d4f..b698845253 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/DynamicHash.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -74,8 +74,8 @@ public class DynamicHash { 0, // CAST is skipped PcodeOp.INT_ADD, PcodeOp.INT_ADD, // PTRADD and PTRSUB hash same as INT_ADD - PcodeOp.SEGMENTOP, PcodeOp.CPOOLREF, PcodeOp.NEW, PcodeOp.INSERT, PcodeOp.EXTRACT, - PcodeOp.POPCOUNT, PcodeOp.LZCOUNT }; + PcodeOp.SEGMENTOP, PcodeOp.CPOOLREF, PcodeOp.NEW, PcodeOp.INSERT, PcodeOp.ZPULL, + PcodeOp.POPCOUNT, PcodeOp.LZCOUNT, PcodeOp.SPULL }; /** * An edge between a Varnode and a PcodeOp diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/ElementId.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/ElementId.java index 29aa805dbb..287e533640 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/ElementId.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/ElementId.java @@ -127,6 +127,7 @@ public record ElementId(String name, int id) { public static final ElementId ELEM_WCHAR_SIZE = new ElementId("wchar_size", 65); public static final ElementId ELEM_ZERO_LENGTH_BOUNDARY = new ElementId("zero_length_boundary", 66); + public static final ElementId ELEM_BITFIELD = new ElementId("bitfield", 289); // database public static final ElementId ELEM_COLLISION = new ElementId("collision", 67); @@ -459,5 +460,5 @@ public record ElementId(String name, int id) { public static final ElementId ELEM_EXTRA_STACK = new ElementId("extra_stack", 287); public static final ElementId ELEM_CONSUME_REMAINING = new ElementId("consume_remaining", 288); - public static final ElementId ELEM_UNKNOWN = new ElementId("XMLunknown", 289); + public static final ElementId ELEM_UNKNOWN = new ElementId("XMLunknown", 290); } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java index 789b29f95c..e026957f81 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeDataTypeManager.java @@ -476,8 +476,6 @@ public class PcodeDataTypeManager { private void encodeStructure(Encoder encoder, Structure type, int size) throws IOException { encoder.openElement(ELEM_TYPE); encodeNameIdAttributes(encoder, type); - // if size is 0, insert an Undefined4 component - // int sz = type.getLength(); if (sz == 0) { type = new StructureDataType(type.getCategoryPath(), type.getName(), 1); @@ -488,20 +486,33 @@ public class PcodeDataTypeManager { encoder.writeSignedInteger(ATTRIB_ALIGNMENT, type.getAlignment()); DataTypeComponent[] comps = type.getDefinedComponents(); for (DataTypeComponent comp : comps) { - if (comp.isBitFieldComponent() || comp.getLength() == 0) { - // TODO: bitfields, zero-length components and zero-element arrays are not yet supported by decompiler + if (comp.getLength() == 0) { continue; } - encoder.openElement(ELEM_FIELD); String field_name = comp.getFieldName(); if (field_name == null || field_name.length() == 0) { field_name = comp.getDefaultFieldName(); } - encoder.writeString(ATTRIB_NAME, field_name); - encoder.writeSignedInteger(ATTRIB_OFFSET, comp.getOffset()); - DataType fieldtype = comp.getDataType(); - encodeTypeRef(encoder, fieldtype, comp.getLength()); - encoder.closeElement(ELEM_FIELD); + if (comp.isBitFieldComponent()) { + BitFieldDataType bitfield = (BitFieldDataType) comp.getDataType(); + encoder.openElement(ELEM_BITFIELD); + encoder.writeString(ATTRIB_NAME, field_name); + encoder.writeSignedInteger(ATTRIB_ID, comp.getOrdinal()); + encoder.writeSignedInteger(ATTRIB_OFFSET, comp.getOffset()); + encoder.writeSignedInteger(ATTRIB_FIRST, bitfield.getBitOffset()); + encoder.writeSignedInteger(ATTRIB_SIZE, bitfield.getBitSize()); + DataType inttype = bitfield.getBaseDataType(); + encodeTypeRef(encoder, inttype, inttype.getLength()); + encoder.closeElement(ELEM_BITFIELD); + } + else { + encoder.openElement(ELEM_FIELD); + encoder.writeString(ATTRIB_NAME, field_name); + encoder.writeSignedInteger(ATTRIB_OFFSET, comp.getOffset()); + DataType fieldtype = comp.getDataType(); + encodeTypeRef(encoder, fieldtype, comp.getLength()); + encoder.closeElement(ELEM_FIELD); + } } encoder.closeElement(ELEM_TYPE); } diff --git a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeOp.java b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeOp.java index 1811bdb7d2..e89815450b 100644 --- a/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeOp.java +++ b/Ghidra/Framework/SoftwareModeling/src/main/java/ghidra/program/model/pcode/PcodeOp.java @@ -130,11 +130,12 @@ public class PcodeOp { public static final int CPOOLREF = 68; public static final int NEW = 69; public static final int INSERT = 70; - public static final int EXTRACT = 71; + public static final int ZPULL = 71; public static final int POPCOUNT = 72; public static final int LZCOUNT = 73; + public static final int SPULL = 74; - public static final int PCODE_MAX = 74; + public static final int PCODE_MAX = 75; private static Hashtable opcodeTable; @@ -697,13 +698,14 @@ public class PcodeOp { return "NEW"; case INSERT: return "INSERT"; - case EXTRACT: - return "EXTRACT"; + case ZPULL: + return "ZPULL"; case POPCOUNT: return "POPCOUNT"; case LZCOUNT: return "LZCOUNT"; - + case SPULL: + return "SPULL"; default: return "INVALID_OP"; } diff --git a/GhidraDocs/languages/html/additionalpcode.html b/GhidraDocs/languages/html/additionalpcode.html index 86dec10094..98e12261a6 100644 --- a/GhidraDocs/languages/html/additionalpcode.html +++ b/GhidraDocs/languages/html/additionalpcode.html @@ -354,27 +354,30 @@ interpretation as a data-type changes at this point.

-The values position and size must be constants. -The least significant size bits from input1 are -inserted into input0, overwriting a range of bits of the same size, -but leaving any other bits in input0 unchanged. The least significant bit of the overwritten -range is given by position, where bits in index0 are labeled from least significant -to most significant, starting at 0. The value obtained after this overwriting is returned -as output. -Varnodes input0 and output must be the same size and are intended to be the same varnode. -The value size must be not be bigger than the varnode input1, and -size + position must not be bigger than the varnode input0. + An INSERT operation takes the least significant + size bits from input1 and inserts them into input0, overwriting + a range of bits of the same size, but leaving any other bits in input0 unchanged.

-This operation is never generated as raw p-code, even though it is equivalent -to SLEIGH bitrange syntax such as input0[10,1] = input1. + The least significant bit of the overwritten range is given by position, + where bits in index0 are labeled from least significant to most significant, starting at 0. + The value obtained after this overwriting is returned + as output. Varnodes input0 and output must be the same size and are intended to be the same + varnode. The values position and size must be + constants. The value size must be not be bigger than the varnode input1, + and size + position must not be bigger than the + varnode input0. +

+

+ This operation is never generated as raw p-code, even though it is equivalent + to SLEIGH bitrange syntax such as input0[10,1] = input1.

-EXTRACT

+ZPULL
- +
@@ -401,7 +404,7 @@ to SLEIGH bitrange syntax such as inp - + @@ -417,18 +420,87 @@ to SLEIGH bitrange syntax such as inp
output Varnode result containing the extracted value.Varnode containing the extracted value as an unsigned integer.

-The values position and size must be constants. -The operation extracts size bits from input0 and returns it in output. -The position indicates the least significant bit in the range being extracted, with -the bits in input0 labeled from least to most significant, starting at 0. The varnodes input0 and output -can be different sizes, and the extracted value is zero extended into output. -The value size must not be bigger than the varnode output, and -size + position must not be bigger -than the varnode input0. + A ZPULL operation extracts size bits + from input0 and returns them as an unsigned integer value in output, zero extending the + bits to the size of output.

-This operation is never generated as raw p-code, even though it is equivalent -to SLEIGH bitrange syntax such as output = input0[10,1]. + The position indicates the least significant bit in the range + being extracted, with the bits in input0 labeled from least to most significant, + starting at 0. The varnodes input0 and output can be different size. + The values position and size must be constants. + The value size must not be bigger than the varnode output, and + size + position must not be bigger + than the varnode input0. +

+

+ ZPULL is never generated as raw p-code, even though it + is equivalent to SLEIGH bitrange syntax such as + output = input0[10,1]. +

+ +
+

+SPULL

+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParametersDescription
input0Varnode to extract a value from.
position(constant)Constant indicating the bit position to extract from.
size(constant)Constant indicating the number of bits to extract.
outputVarnode containing the extracted value as a signed integer
Semantic statement
Cannot be explicitly coded.
+
+

+ An SPULL operation extracts size bits + from input0 and returns them as a signed integer value in output. The value is sign extended + to the size of output, duplicating the most significant extracted bit. +

+

+ The position indicates the least significant bit in the range + being extracted, with the bits in input0 labeled from least to most significant, + starting at 0. The varnodes input0 and output can be different size. + The values position and size must be constants. + The value size must not be bigger than the varnode output, and + size + position must not be bigger + than the varnode input0. +

+

+ SPULL is never generated as raw p-code.

diff --git a/GhidraDocs/languages/html/pcodedescription.html b/GhidraDocs/languages/html/pcodedescription.html index 067fc2b82a..3ee65928f0 100644 --- a/GhidraDocs/languages/html/pcodedescription.html +++ b/GhidraDocs/languages/html/pcodedescription.html @@ -262,7 +262,7 @@ In this case, the offset of input0 is considered a relative offset into the indexed list of p-code operations corresponding to the translation of the current machine instruction. This allows branching within the operations forming a single instruction. For example, if -the BRANCH occurs as the pcode +the BRANCH occurs as the p-code operation with index 5 for the instruction, it can branch to operation with index 8 by specifying a constant destination “address” of 3. Negative constants can be used for backward branches. diff --git a/GhidraDocs/languages/html/pcoderef.html b/GhidraDocs/languages/html/pcoderef.html index ff2043bffd..abc43d6a98 100644 --- a/GhidraDocs/languages/html/pcoderef.html +++ b/GhidraDocs/languages/html/pcoderef.html @@ -26,7 +26,7 @@

P-Code Reference Manual

-

Last updated March 2, 2023

+

Last updated January 16, 2026


diff --git a/GhidraDocs/languages/html/reference.html b/GhidraDocs/languages/html/reference.html index 0212d3ec62..4243924e62 100644 --- a/GhidraDocs/languages/html/reference.html +++ b/GhidraDocs/languages/html/reference.html @@ -127,7 +127,7 @@ SUBPIECE v0:2 - The least signficant n bytes of v0. + The least significant n bytes of v0. SUBPIECE diff --git a/GhidraDocs/languages/html/sleigh.html b/GhidraDocs/languages/html/sleigh.html index 4ab92f7e4d..be355d5d41 100644 --- a/GhidraDocs/languages/html/sleigh.html +++ b/GhidraDocs/languages/html/sleigh.html @@ -4,7 +4,7 @@ SLEIGH - + diff --git a/GhidraDocs/languages/html/sleigh_symbols.html b/GhidraDocs/languages/html/sleigh_symbols.html index 70598b7310..4e0506124f 100644 --- a/GhidraDocs/languages/html/sleigh_symbols.html +++ b/GhidraDocs/languages/html/sleigh_symbols.html @@ -194,10 +194,10 @@ We list all of the symbols that are predefined by SLEIGH.

The most important of these to be aware of are inst_start -and inst_next. These are family -symbols that map to the integer offset of either the instruction's -address or the next instruction's address, depending on the context -of a particular instruction. These are used in any relative branching +and inst_next. These are family symbols that map +to the integer offset of either the instruction's address or the next +instruction's address, depending on the context of a particular instruction. +These are used in any relative branching situation. The inst_next2 is intended for conditional skip instruction situations. The remaining symbols are rarely used. The const and unique