mirror of
https://github.com/NationalSecurityAgency/ghidra.git
synced 2026-05-27 21:45:55 +08:00
GDB: Fix parsing of \e and \a
The GDB "printchar" function encodes \x1b as \e and \x07 as \a, both of which are not handled by StringEscapeUtils.unescapeJava. Strings that contain these escape characters were incorrectly decoded as "e" and "a" respectively. Switch to a simple decoding loop instead. The cases here match precisely with the escape characters used in GDB's printchar function - in particular, there are no hex escapes, and all octal escapes are exactly three digits long.
This commit is contained in:
+59
-23
@@ -15,13 +15,14 @@
|
||||
*/
|
||||
package agent.gdb.manager.parsing;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.collections4.MultiMapUtils;
|
||||
import org.apache.commons.collections4.MultiValuedMap;
|
||||
import org.apache.commons.collections4.multimap.HashSetValuedHashMap;
|
||||
import org.apache.commons.lang3.StringEscapeUtils;
|
||||
|
||||
import agent.gdb.manager.parsing.GdbParsingUtils.AbstractGdbParser;
|
||||
import agent.gdb.manager.parsing.GdbParsingUtils.GdbParseError;
|
||||
@@ -266,9 +267,6 @@ public class GdbMiParser extends AbstractGdbParser {
|
||||
}
|
||||
}
|
||||
|
||||
// see #parseString() for why this is no longer used....
|
||||
//protected static final Pattern CSTRING = Pattern.compile("\\\"(\\\\.|[^\\\\\"])*\\\"");
|
||||
|
||||
protected static final Pattern COMMA = Pattern.compile(",");
|
||||
protected static final Pattern LBRACKET = Pattern.compile("\\[");
|
||||
protected static final Pattern RBRACKET = Pattern.compile("\\]");
|
||||
@@ -354,40 +352,78 @@ public class GdbMiParser extends AbstractGdbParser {
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the string at the cursor
|
||||
* Parse the string at the cursor, undoing GDB's printchar transformation.
|
||||
*
|
||||
* @see #parseString(CharSequence)
|
||||
* @return the string
|
||||
* @throws GdbParseError if no text matches the pattern
|
||||
*/
|
||||
public String parseString() throws GdbParseError {
|
||||
/*
|
||||
* Matching CSTRING for inputs of too many characters (2048, really?) causes a
|
||||
* StackOverflowException in Java's built-in Pattern object. Boo! Thus, I'll write this
|
||||
* myself. All said and done, this might actually look better than the old regex
|
||||
*/
|
||||
// String match = match(CSTRING);
|
||||
//return StringEscapeUtils.unescapeJava(match.substring(1, match.length() - 1));
|
||||
int start = buf.position();
|
||||
if ('"' != peek(false)) { // Keep whitespace that is in the string
|
||||
throw new GdbParseError("\"", buf);
|
||||
}
|
||||
buf.get(); // consume "
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
while (true) {
|
||||
char c = buf.get();
|
||||
if (c == '"') {
|
||||
char ch = buf.get();
|
||||
if (ch > 0xff) {
|
||||
throw new GdbParseError("byte", "U+" + String.format("%04X", ch));
|
||||
} else if (ch == '"') {
|
||||
break;
|
||||
} else if (ch != '\\') {
|
||||
baos.write(ch);
|
||||
continue;
|
||||
}
|
||||
else if (c == '\\') {
|
||||
buf.get();
|
||||
|
||||
/* Handle backslash-escape */
|
||||
ch = buf.get();
|
||||
switch (ch) {
|
||||
case 'n':
|
||||
baos.write('\n');
|
||||
break;
|
||||
case 'b':
|
||||
baos.write('\b');
|
||||
break;
|
||||
case 't':
|
||||
baos.write('\t');
|
||||
break;
|
||||
case 'f':
|
||||
baos.write('\f');
|
||||
break;
|
||||
case 'r':
|
||||
baos.write('\r');
|
||||
break;
|
||||
case 'e':
|
||||
baos.write(0x1b);
|
||||
break;
|
||||
case 'a':
|
||||
baos.write(0x07);
|
||||
break;
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
char ch2 = buf.get();
|
||||
if (ch2 < '0' || ch2 > '9') {
|
||||
throw new GdbParseError("octal", "" + ch2);
|
||||
}
|
||||
char ch3 = buf.get();
|
||||
if (ch3 < '0' || ch3 > '9') {
|
||||
throw new GdbParseError("octal", "" + ch3);
|
||||
}
|
||||
int octchar = ((ch - '0') << 6) | ((ch2 - '0') << 3) | (ch3 - '0');
|
||||
baos.write(octchar);
|
||||
break;
|
||||
case '\\':
|
||||
case '"':
|
||||
baos.write(ch);
|
||||
break;
|
||||
default:
|
||||
throw new GdbParseError("escape", "" + ch);
|
||||
}
|
||||
}
|
||||
// the closing " will already have been consumed
|
||||
int end = buf.position();
|
||||
buf.position(0);
|
||||
String result = buf.subSequence(start + 1, end - 1).toString(); // remove "s
|
||||
buf.position(end);
|
||||
return StringEscapeUtils.unescapeJava(result);
|
||||
return baos.toString(StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
+12
@@ -61,4 +61,16 @@ public class GdbMiParserTest {
|
||||
exp.add("w", "World");
|
||||
}), parser.parseMap());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseStringEscapes() throws GdbParseError {
|
||||
GdbMiParser parser = new GdbMiParser("\"basic=\\n\\b\\t\\f\\r c=\\e[0m\\a delim=\\\\\\\" octal=\\000\\177\"");
|
||||
assertEquals("basic=\n\b\t\f\r c=\033[0m\007 delim=\\\" octal=\000\177", parser.parseString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseStringUTF8() throws GdbParseError {
|
||||
GdbMiParser parser = new GdbMiParser("\"\\302\\244 \\342\\204\\212 \\343\\201\\251 \\351\\276\\231 \\360\\237\\230\\200\"");
|
||||
assertEquals("\u00a4 \u210a \u3069 \u9f99 \ud83d\ude00", parser.parseString());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user