GDB: Fix parsing of \e and \a

The GDB "printchar" function encodes \x1b as \e and \x07 as \a, both of which are not handled by StringEscapeUtils.unescapeJava. Strings that contain these escape characters were incorrectly decoded as "e" and "a" respectively. Switch to a simple decoding loop instead. The cases here match precisely with the escape characters used in GDB's printchar function - in particular, there are no hex escapes, and all octal escapes are exactly three digits long.
2026-05-27 21:45:55 +08:00 · 2022-04-21 01:07:38 -07:00
parent 47f76c78d6
commit aaba1de99d
2 changed files with 71 additions and 23 deletions
@@ -15,13 +15,14 @@
 */
 package agent.gdb.manager.parsing;

+import java.io.ByteArrayOutputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.*;
 import java.util.regex.Pattern;

 import org.apache.commons.collections4.MultiMapUtils;
 import org.apache.commons.collections4.MultiValuedMap;
 import org.apache.commons.collections4.multimap.HashSetValuedHashMap;
-import org.apache.commons.lang3.StringEscapeUtils;

 import agent.gdb.manager.parsing.GdbParsingUtils.AbstractGdbParser;
 import agent.gdb.manager.parsing.GdbParsingUtils.GdbParseError;
@@ -266,9 +267,6 @@ public class GdbMiParser extends AbstractGdbParser {
 		}
 	}

-	// see #parseString() for why this is no longer used....
-	//protected static final Pattern CSTRING = Pattern.compile("\\\"(\\\\.|[^\\\\\"])*\\\"");
-
 	protected static final Pattern COMMA = Pattern.compile(",");
 	protected static final Pattern LBRACKET = Pattern.compile("\\[");
 	protected static final Pattern RBRACKET = Pattern.compile("\\]");
@@ -354,40 +352,78 @@ public class GdbMiParser extends AbstractGdbParser {
 	}

 	/**
-	 * Parse the string at the cursor
+	 * Parse the string at the cursor, undoing GDB's printchar transformation.
 	 * 
 	 * @see #parseString(CharSequence)
 	 * @return the string
 	 * @throws GdbParseError if no text matches the pattern
 	 */
 	public String parseString() throws GdbParseError {
-		/*
-		 * Matching CSTRING for inputs of too many characters (2048, really?) causes a
-		 * StackOverflowException in Java's built-in Pattern object. Boo! Thus, I'll write this
-		 * myself. All said and done, this might actually look better than the old regex
-		 */
-		// String match = match(CSTRING);
-		//return StringEscapeUtils.unescapeJava(match.substring(1, match.length() - 1));
-		int start = buf.position();
 		if ('"' != peek(false)) { // Keep whitespace that is in the string
 			throw new GdbParseError("\"", buf);
 		}
 		buf.get(); // consume "
+
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 		while (true) {
-			char c = buf.get();
-			if (c == '"') {
+			char ch = buf.get();
+			if (ch > 0xff) {
+				throw new GdbParseError("byte", "U+" + String.format("%04X", ch));
+			} else if (ch == '"') {
 				break;
+			} else if (ch != '\\') {
+				baos.write(ch);
+				continue;
 			}
-			else if (c == '\\') {
-				buf.get();
+
+			/* Handle backslash-escape */
+			ch = buf.get();
+			switch (ch) {
+				case 'n':
+					baos.write('\n');
+					break;
+				case 'b':
+					baos.write('\b');
+					break;
+				case 't':
+					baos.write('\t');
+					break;
+				case 'f':
+					baos.write('\f');
+					break;
+				case 'r':
+					baos.write('\r');
+					break;
+				case 'e':
+					baos.write(0x1b);
+					break;
+				case 'a':
+					baos.write(0x07);
+					break;
+				case '0':
+				case '1':
+				case '2':
+				case '3':
+					char ch2 = buf.get();
+					if (ch2 < '0' || ch2 > '9') {
+						throw new GdbParseError("octal", "" + ch2);
+					}
+					char ch3 = buf.get();
+					if (ch3 < '0' || ch3 > '9') {
+						throw new GdbParseError("octal", "" + ch3);
+					}
+					int octchar = ((ch - '0') << 6) | ((ch2 - '0') << 3) | (ch3 - '0');
+					baos.write(octchar);
+					break;
+				case '\\':
+				case '"':
+					baos.write(ch);
+					break;
+				default:
+					throw new GdbParseError("escape", "" + ch);
 			}
 		}
-		// the closing " will already have been consumed
-		int end = buf.position();
-		buf.position(0);
-		String result = buf.subSequence(start + 1, end - 1).toString(); // remove "s
-		buf.position(end);
-		return StringEscapeUtils.unescapeJava(result);
+		return baos.toString(StandardCharsets.UTF_8);
 	}

 	/**
@@ -61,4 +61,16 @@ public class GdbMiParserTest {
 			exp.add("w", "World");
 		}), parser.parseMap());
 	}
+
+	@Test
+	public void testParseStringEscapes() throws GdbParseError {
+		GdbMiParser parser = new GdbMiParser("\"basic=\\n\\b\\t\\f\\r c=\\e[0m\\a delim=\\\\\\\" octal=\\000\\177\"");
+		assertEquals("basic=\n\b\t\f\r c=\033[0m\007 delim=\\\" octal=\000\177", parser.parseString());
+	}
+
+	@Test
+	public void testParseStringUTF8() throws GdbParseError {
+		GdbMiParser parser = new GdbMiParser("\"\\302\\244 \\342\\204\\212 \\343\\201\\251 \\351\\276\\231 \\360\\237\\230\\200\"");
+		assertEquals("\u00a4 \u210a \u3069 \u9f99 \ud83d\ude00", parser.parseString());
+	}
 }