Replace hex and octal escapes with literal UTF-8 characters

This makes the code more readable and there should be no reason not to use UTF-8 in string literals any longer. Also replace wxHAVE_U_ESCAPE (all still supported compilers have it) with wxMUST_USE_U_ESCAPE, which is only set for MSVS 2015 currently and will be removed when we drop support for it and can use literal Unicode characters in the wide strings everywhere.
2026-03-23 18:54:01 +08:00 · 2025-06-20 01:01:28 +02:00
parent d6dba57730
commit b672131185
16 changed files with 81 additions and 78 deletions
--- a/src/common/datetimefmt.cpp
+++ b/src/common/datetimefmt.cpp
@@ -1555,8 +1555,8 @@ wxDateTime::ParseFormat(const wxString& date,
                        minusFound = false;
                    else if
                    (
-                        *input == wxS('-')
-                        || *input == wxString::FromUTF8("\xe2\x88\x92")
+                        *input == wxS('-') ||
+                        *input == wxString::FromUTF8("−") // U+2212 MINUS SIGN
                    )
                        minusFound = true;
                    else
--- a/src/common/utilscmn.cpp
+++ b/src/common/utilscmn.cpp
@@ -1069,15 +1069,13 @@ wxVersionInfo wxGetLibraryVersionInfo()

    msg += wxPlatformInfo::Get().GetPlatformDescription();

-    const wxString copyrightSign = wxString::FromUTF8("\xc2\xa9");
-
    return wxVersionInfo(wxS("wxWidgets"),
                         wxMAJOR_VERSION,
                         wxMINOR_VERSION,
                         wxRELEASE_NUMBER,
                         msg,
-                         wxString::Format(wxS("Copyright %s 1992-2025 wxWidgets team"),
-                                          copyrightSign));
+                         wxString::FromUTF8("Copyright © 1992-2025 wxWidgets team")
+                         );
 }


--- a/src/generic/aboutdlgg.cpp
+++ b/src/generic/aboutdlgg.cpp
@@ -98,7 +98,7 @@ wxString wxAboutDialogInfo::GetCopyrightToDisplay() const
 {
    wxString ret = m_copyright;

-    const wxString copyrightSign = wxString::FromUTF8("\xc2\xa9");
+    const wxString copyrightSign = wxString::FromUTF8("©");
    ret.Replace("(c)", copyrightSign);
    ret.Replace("(C)", copyrightSign);

--- a/src/qt/renderer.cpp
+++ b/src/qt/renderer.cpp
@@ -481,8 +481,7 @@ wxRendererQt::DrawCheckMark(wxWindow *win, wxDC& dc, const wxRect& rect, int WXU
    wxDCClipper clip(dc, rect);
    wxDCFontChanger fontChanger(dc, win->GetFont());

-    // Draw the unicode character “✓” (U+2713)
-    const auto checkMark = wxString::FromUTF8("\xE2\x9C\x93");
+    const auto checkMark = wxString::FromUTF8("✓"); // U+2713 CHECK MARK
    dc.DrawText(checkMark, rect.GetPosition());
 }

--- a/tests/exec/exec.cpp
+++ b/tests/exec/exec.cpp
@@ -519,7 +519,7 @@ TEST_CASE("wxExecute::RedirectUTF8", "[.]")
        // It seems unlikely that this part of the output will change for GNU
        // ls, so check for its presence as a sign that the program output was
        // decoded correctly.
-        if ( output[n].find(wxString::FromUTF8("vous \xc3\xaates libre")) != wxString::npos )
+        if ( output[n].find(wxString::FromUTF8("vous êtes libre")) != wxString::npos )
            return;
    }

--- a/tests/file/filefn.cpp
+++ b/tests/file/filefn.cpp
@@ -496,7 +496,7 @@ TEST_CASE_METHOD(FileFunctionsTestCase,
                 "FileFunctions::Mkdir",
                 "[filefn]")
 {
-    wxString dirname = wxString::FromUTF8("__wxMkdir_test_dir_with_\xc3\xb6");
+    wxString dirname = wxString::FromUTF8("__wxMkdir_test_dir_with_ö");
    INFO("Dir: " << dirname);

    CHECK( wxMkdir(dirname) );
@@ -508,7 +508,7 @@ TEST_CASE_METHOD(FileFunctionsTestCase,
                 "FileFunctions::Rmdir",
                 "[filefn]")
 {
-    wxString dirname = wxString::FromUTF8("__wxRmdir_test_dir_with_\xc3\xb6");
+    wxString dirname = wxString::FromUTF8("__wxRmdir_test_dir_with_ö");
    INFO("Dir: " << dirname);

    CHECK( wxMkdir(dirname) );
--- a/tests/mbconv/convautotest.cpp
+++ b/tests/mbconv/convautotest.cpp
@@ -149,7 +149,7 @@ void ConvAutoTestCase::Empty()
 void ConvAutoTestCase::Encode()
 {
    wxConvAuto conv;
-    wxString str = wxString::FromUTF8("\xd0\x9f\xe3\x81\x82");
+    wxString str = wxString::FromUTF8("Пあ");
    wxCharBuffer buf = conv.cWC2MB(str.wc_str());
    CPPUNIT_ASSERT( buf );
    CPPUNIT_ASSERT_EQUAL( str, wxString::FromUTF8(buf) );
@@ -186,36 +186,34 @@ void ConvAutoTestCase::UTF16BE()
    TestFirstChar("\xfe\xff\0Y", wxT('Y'), 4, ConvState(wxBOM_UTF16BE, wxFONTENCODING_UTF16BE));
 }

+#ifdef wxMUST_USE_U_ESCAPE
+constexpr wchar_t CYRILLIC_LETTER_P = L'\u041f';
+#else
+constexpr wchar_t CYRILLIC_LETTER_P = L'П';
+#endif
+
 void ConvAutoTestCase::UTF8()
 {
-#ifdef wxHAVE_U_ESCAPE
-    TestFirstChar("\xef\xbb\xbf\xd0\x9f", L'\u041f', wxNO_LEN, ConvState(wxBOM_UTF8, wxFONTENCODING_UTF8));
-#endif
+    TestFirstChar("\xef\xbb\xbfП", CYRILLIC_LETTER_P, wxNO_LEN, ConvState(wxBOM_UTF8, wxFONTENCODING_UTF8));
 }

 void ConvAutoTestCase::UTF8NoBom()
 {
-#ifdef wxHAVE_U_ESCAPE
-    TestFirstChar("\xd0\x9f\xe3\x81\x82", L'\u041f', wxNO_LEN, ConvState(wxBOM_None, wxFONTENCODING_UTF8));
-#endif
+    TestFirstChar("Пあ", CYRILLIC_LETTER_P, wxNO_LEN, ConvState(wxBOM_None, wxFONTENCODING_UTF8));
 }

 void ConvAutoTestCase::Fallback()
 {
-#ifdef wxHAVE_U_ESCAPE
-    TestFirstChar("\xbf", L'\u041f', wxNO_LEN,
+    TestFirstChar("\xbf", CYRILLIC_LETTER_P, wxNO_LEN,
                  ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
                  wxFONTENCODING_ISO8859_5);
-#endif
 }

 void ConvAutoTestCase::FallbackMultibyte()
 {
-#ifdef wxHAVE_U_ESCAPE
-    TestFirstChar("\x84\x50", L'\u041f', wxNO_LEN,
+    TestFirstChar("\x84\x50", CYRILLIC_LETTER_P, wxNO_LEN,
                  ConvState(wxBOM_None, wxFONTENCODING_CP932, true),
                  wxFONTENCODING_CP932);
-#endif
 }

 void ConvAutoTestCase::FallbackShort()
@@ -246,8 +244,8 @@ void ConvAutoTestCase::TestTextStream(const char *src,
 namespace
 {

-const wxString line1 = wxString::FromUTF8("a\xe3\x81\x82");
-const wxString line2 = wxString::FromUTF8("\xce\xb2");
+const wxString line1 = wxString::FromUTF8("aあ");
+const wxString line2 = wxString::FromUTF8("β");

 } // anonymous namespace

--- a/tests/streams/datastreamtest.cpp
+++ b/tests/streams/datastreamtest.cpp
@@ -244,7 +244,7 @@ void DataStreamTestCase::StringRW()
    s.append(wxT("Test2"));
    CPPUNIT_ASSERT_EQUAL( TestRW(s), s );

-    s = wxString::FromUTF8("\xc3\xbc"); // U+00FC LATIN SMALL LETTER U WITH DIAERESIS
+    s = wxString::FromUTF8("ü");
    CPPUNIT_ASSERT_EQUAL( TestRW(s), s );
 }

--- a/tests/strings/crt.cpp
+++ b/tests/strings/crt.cpp
@@ -42,7 +42,7 @@ TEST_CASE("CRT::SetGetEnv", "[crt][getenv][setenv]")
    CHECK( val == "value" );
    CHECK( wxString(wxGetenv(TESTVAR_NAME)) == "value" );

-    const wxString nonASCII = wxString::FromUTF8("\xe2\x98\xba");
+    const wxString nonASCII = wxString::FromUTF8("☺");
    wxSetEnv(TESTVAR_NAME, nonASCII);
    CHECK( wxGetEnv(TESTVAR_NAME, &val) );
    CHECK( val == nonASCII );
@@ -68,12 +68,12 @@ TEST_CASE("CRT::Strchr", "[crt][strchr]")
 {
    // test that searching for a wide character in a narrow string simply
    // doesn't find it but doesn't fail with an assert (#11487)
-    const wxUniChar smiley = *wxString::FromUTF8("\xe2\x98\xba").begin();
+    const wxUniChar smiley = *wxString::FromUTF8("☺").begin();

    CHECK( !wxStrchr("hello", smiley) );

    // but searching for an explicitly wide character does find it
-    CHECK( wxStrchr(wxString::FromUTF8(":-) == \xe2\x98\xba"),
+    CHECK( wxStrchr(wxString::FromUTF8(":-) == ☺"),
                    static_cast<wchar_t>(smiley)) );
 }

--- a/tests/strings/stdstrings.cpp
+++ b/tests/strings/stdstrings.cpp
@@ -547,10 +547,9 @@ TEST_CASE("StdString::Resize", "[stdstring]")
    CHECK( s3 == wxT("abcABCdefDEF  ") );
    CHECK( s4 == wxT("abcABCdefDEFWW") );

-    wxString s =
-        wxString::FromUTF8("\xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82");
+    wxString s = wxString::FromUTF8("Привет");
    s.resize(3);
-    CHECK( s == wxString::FromUTF8("\xd0\x9f\xd1\x80\xd0\xb8") );
+    CHECK( s == wxString::FromUTF8("При") );
 }

 TEST_CASE("StdString::Riter", "[stdstring]")
--- a/tests/strings/strings.cpp
+++ b/tests/strings/strings.cpp
@@ -115,10 +115,7 @@ TEST_CASE("StringFormatUnicode", "[wxString]")
    wxLocaleSetter loc("C.UTF-8");
 #endif // !__WINDOWS__

-    const char *UNICODE_STR = "Iestat\xC4\xAB %i%i";
-    //const char *UNICODE_STR = "Iestat\xCC\x84 %i%i";
-
-    wxString fmt = wxString::FromUTF8(UNICODE_STR);
+    wxString fmt = wxString::FromUTF8("Iestatī");
    wxString s = wxString::Format(fmt, 1, 1);
    wxString expected(fmt);
    expected.Replace("%i", "1");
@@ -207,8 +204,7 @@ TEST_CASE("StringExtraction", "[wxString]")
    CHECK( wxStrcmp( s.substr(3, 5).c_str() , wxT("lo, w") ) == 0 );
    CHECK( wxStrcmp( s.substr(3).c_str() , wxT("lo, world!") ) == 0 );

-    static const char *germanUTF8 = "Oberfl\303\244che";
-    wxString strUnicode(wxString::FromUTF8(germanUTF8));
+    wxString strUnicode(wxString::FromUTF8("Oberfläche"));

    CHECK( strUnicode.Mid(0, 10) == strUnicode );
    CHECK( strUnicode.Mid(7, 2) == "ch" );
--- a/tests/strings/unicode.cpp
+++ b/tests/strings/unicode.cpp
@@ -189,9 +189,8 @@ void UnicodeTestCase::ToFromAscii()

 void UnicodeTestCase::ConstructorsWithConversion()
 {
-    // the string "Déjà" in UTF-8 and wchar_t:
-    const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
-    const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
+    const unsigned char utf8Buf[] = "Déjà";
+    const unsigned char utf8subBuf[] = "Déj";
    const char* utf8 = reinterpret_cast<const char*>(utf8Buf);
    const char* utf8sub = reinterpret_cast<const char*>(utf8subBuf);

@@ -311,10 +310,11 @@ void UnicodeTestCase::ConversionUTF8()
 {
    static const StringConversionData utf8data[] =
    {
-#ifdef wxHAVE_U_ESCAPE
+#ifdef wxMUST_USE_U_ESCAPE
        StringConversionData("\xc2\xa3", L"\u00a3"),
+#else
+        StringConversionData("£", L"£"),
 #endif
-        StringConversionData("\xc2", nullptr),
    };

    wxCSConv conv(wxT("utf-8"));
@@ -345,15 +345,15 @@ void UnicodeTestCase::ConversionUTF16()
 {
    static const StringConversionData utf16data[] =
    {
-#ifdef wxHAVE_U_ESCAPE
+#ifdef wxMUST_USE_U_ESCAPE
        StringConversionData(
            "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
            L"\u041f\u0440\u0438\u0432\u0435\u0442"),
+#else
        StringConversionData(
-            "\x01\0\0b\x01\0\0a\x01\0\0r\0\0",
-            L"\u0100b\u0100a\u0100r"),
+            "\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
+            L"Привет"),
 #endif
-        StringConversionData("\0f\0o\0o\0\0", L"foo"),
    };

    wxCSConv conv(wxFONTENCODING_UTF16BE);
@@ -397,12 +397,15 @@ void UnicodeTestCase::ConversionUTF32()
 {
    static const StringConversionData utf32data[] =
    {
-#ifdef wxHAVE_U_ESCAPE
+#ifdef wxMUST_USE_U_ESCAPE
        StringConversionData(
            "\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
          L"\u041f\u0440\u0438\u0432\u0435\u0442"),
+#else
+        StringConversionData(
+            "\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
+          L"Привет"),
 #endif
-        StringConversionData("\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L"foo"),
    };

    wxCSConv conv(wxFONTENCODING_UTF32BE);
@@ -431,8 +434,7 @@ void UnicodeTestCase::IsConvOk()

 void UnicodeTestCase::Iteration()
 {
-    // "czech" in Czech ("cestina"):
-    static const char *textUTF8 = "\304\215e\305\241tina";
+    static const char *textUTF8 = "čeština";// "czech" in Czech
    static const wchar_t textUTF16[] = {0x10D, 0x65, 0x161, 0x74, 0x69, 0x6E, 0x61, 0};

    wxString text(wxString::FromUTF8(textUTF8));
--- a/tests/strings/vsnprintf.cpp
+++ b/tests/strings/vsnprintf.cpp
@@ -254,13 +254,13 @@ TEST_CASE_METHOD(VsnprintfTestCase, "Vsnprintf::S", "[vsnprintf]")
    // Unicode code points from U+03B1 to U+03B9 are the greek letters alpha-iota;
    // UTF8 encoding of such code points is 0xCEB1 to 0xCEB9

-#define ALPHA       "\xCE\xB1"
+#define ALPHA       "α"
        // alpha
-#define ABC         "\xCE\xB1\xCE\xB2\xCE\xB3"
+#define ABC         "αβγ"
        // alpha+beta+gamma
-#define ABCDE       "\xCE\xB1\xCE\xB2\xCE\xB3\xCE\xB4\xCE\xB5"
+#define ABCDE       "αβγδε"
        // alpha+beta+gamma+delta+epsilon
-#define ABCDEFGHI   "\xCE\xB1\xCE\xB2\xCE\xB3\xCE\xB4\xCE\xB5\xCE\xB6\xCE\xB7\xCE\xB8\xCE\xB9"
+#define ABCDEFGHI   "αβγδεζηθι"
        // alpha+beta+gamma+delta+epsilon+zeta+eta+theta+iota

    // the 'expected' and 'arg' parameters of this macro are supposed to be
--- a/tests/testprec.h
+++ b/tests/testprec.h
@@ -27,14 +27,14 @@
    #define WXUISIM_TEST(test)
 #endif

-// define wxHAVE_U_ESCAPE if the compiler supports \uxxxx character constants
-#if defined(__VISUALC__) || defined(__GNUC__)
-    #define wxHAVE_U_ESCAPE
-
-    // and disable warning that using them results in with MSVC 8+
-    #if defined(__VISUALC__)
-        // universal-character-name encountered in source
-        #pragma warning(disable:4428)
+#if defined(__VISUALC__)
+    #if _MSC_VER < 1910
+        // MSVS 2015 doesn't handle literal Unicode characters in wide strings
+        // correctly, so use \uxxxx escapes for it instead.
+        //
+        // When support for MSVS 2015 is dropped, this symbol and all code
+        // guarded by it should be removed.
+        #define wxMUST_USE_U_ESCAPE
    #endif
 #endif

--- a/tests/textfile/textfiletest.cpp
+++ b/tests/textfile/textfiletest.cpp
@@ -267,8 +267,7 @@ void TextFileTestCase::ReadCRCRLF()

 void TextFileTestCase::ReadUTF8()
 {
-    CreateTestFile("\xd0\x9f\n"
-                   "\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82");
+    CreateTestFile("П\nривет");

    wxTextFile f;
    CPPUNIT_ASSERT( f.Open(wxString::FromAscii(GetTestFileName()), wxConvUTF8) );
@@ -276,11 +275,14 @@ void TextFileTestCase::ReadUTF8()
    CPPUNIT_ASSERT_EQUAL( (size_t)2, f.GetLineCount() );
    CPPUNIT_ASSERT_EQUAL( wxTextFileType_Unix, f.GetLineType(0) );
    CPPUNIT_ASSERT_EQUAL( wxTextFileType_None, f.GetLineType(1) );
-#ifdef wxHAVE_U_ESCAPE
+#ifdef wxMUST_USE_U_ESCAPE
    CPPUNIT_ASSERT_EQUAL( wxString(L"\u041f"), f.GetFirstLine() );
    CPPUNIT_ASSERT_EQUAL( wxString(L"\u0440\u0438\u0432\u0435\u0442"),
                          f.GetLastLine() );
-#endif // wxHAVE_U_ESCAPE
+#else
+    CPPUNIT_ASSERT_EQUAL( wxString(L"П"), f.GetFirstLine() );
+    CPPUNIT_ASSERT_EQUAL( wxString(L"ривет"), f.GetLastLine() );
+#endif
 }

 void TextFileTestCase::ReadUTF16()
@@ -297,11 +299,14 @@ void TextFileTestCase::ReadUTF16()
    CPPUNIT_ASSERT_EQUAL( wxTextFileType_Dos, f.GetLineType(0) );
    CPPUNIT_ASSERT_EQUAL( wxTextFileType_None, f.GetLineType(1) );

-#ifdef wxHAVE_U_ESCAPE
+#ifdef wxMUST_USE_U_ESCAPE
    CPPUNIT_ASSERT_EQUAL( wxString(L"\u041f"), f.GetFirstLine() );
    CPPUNIT_ASSERT_EQUAL( wxString(L"\u0440\u0438\u0432\u0435\u0442"),
                          f.GetLastLine() );
-#endif // wxHAVE_U_ESCAPE
+#else
+    CPPUNIT_ASSERT_EQUAL( wxString(L"П"), f.GetFirstLine() );
+    CPPUNIT_ASSERT_EQUAL( wxString(L"ривет"), f.GetLastLine() );
+#endif
 }

 void TextFileTestCase::ReadBig()
@@ -331,7 +336,7 @@ TEST_CASE("wxTextBuffer::Translate", "[textbuffer]")
 {
    // Bytes with the value of LF that are part of an UTF-8 character shouldn't
    // be mangled.
-    const wxString smiley = wxString::FromUTF8("\xf0\x9f\x98\x8a"); // U+1F60A
+    const wxString smiley = wxString::FromUTF8("😊"); // U+1F60A

    CHECK( wxTextBuffer::Translate(smiley, wxTextFileType_Dos) == smiley );
 }
--- a/tests/uris/uris.cpp
+++ b/tests/uris/uris.cpp
@@ -313,7 +313,7 @@ TEST_CASE("URI::Paths", "[uri]")
    URI_ASSERT_BADPATH("http:////BADPATH");

    // 8-bit characters in the path should be percent-encoded.
-    URI_ASSERT_PATH_EQUAL( wxString::FromUTF8("http://host/\xc3\xa9"), "/%c3%a9" );
+    URI_ASSERT_PATH_EQUAL( wxString::FromUTF8("http://host/é"), "/%c3%a9" );
 }

 TEST_CASE("URI::UserInfo", "[uri]")
@@ -361,7 +361,7 @@ TEST_CASE("URI::UserInfo", "[uri]")
    uri.SetUserAndPassword("you:", "?me");
    URI_ASSERT_EQUAL( uri, "https://you%3a:%3fme@host/" );

-    uri.SetUserAndPassword(wxString::FromUTF8("\xc3\xa7"));
+    uri.SetUserAndPassword(wxString::FromUTF8("ç"));
    URI_ASSERT_USER_EQUAL( uri, "%c3%a7");
 }

@@ -495,23 +495,29 @@ TEST_CASE("URI::Unescape", "[uri]")
    unescaped = wxURI::Unescape(escaped);

    CHECK( unescaped == wxString::FromUTF8(
-                            "http://ru.wikipedia.org/wiki/"
-                            "\xD0\xA6\xD0\xB5\xD0\xBB\xD0\xBE\xD0\xB5_"
-                            "\xD1\x87\xD0\xB8\xD1\x81\xD0\xBB\xD0\xBE"
+                            "http://ru.wikipedia.org/wiki/Целое_число"
                          ) );

+#ifdef wxMUST_USE_U_ESCAPE
    escaped = L"file://\u043C\u043E\u0439%5C%d1%84%d0%b0%d0%b9%d0%bb";
+#else
+    escaped = L"file://мой%5C%d1%84%d0%b0%d0%b9%d0%bb";
+#endif
    unescaped = wxURI::Unescape(escaped);

+#ifdef wxMUST_USE_U_ESCAPE
    CHECK
    (
        unescaped == L"file://\u043C\u043E\u0439\\\u0444\u0430\u0439\u043B"
    );
+#else
+    CHECK( unescaped == L"file://мой\\файл" );
+#endif


    escaped = "%2FH%C3%A4ll%C3%B6%5C";
    unescaped = wxURI(escaped).BuildUnescapedURI();
-    CHECK( unescaped == wxString::FromUTF8("\x2FH\xC3\xA4ll\xC3\xB6\x5C") );
+    CHECK( unescaped == wxString::FromUTF8("/Hällö\\") );
 }

 TEST_CASE("URI::FileScheme", "[uri]")