Replace hex and octal escapes with literal UTF-8 characters

This makes the code more readable and there should be no reason not to
use UTF-8 in string literals any longer.

Also replace wxHAVE_U_ESCAPE (all still supported compilers have it)
with wxMUST_USE_U_ESCAPE, which is only set for MSVS 2015 currently and
will be removed when we drop support for it and can use literal Unicode
characters in the wide strings everywhere.
This commit is contained in:
Vadim Zeitlin
2025-06-20 01:01:28 +02:00
parent d6dba57730
commit b672131185
16 changed files with 81 additions and 78 deletions

View File

@@ -1555,8 +1555,8 @@ wxDateTime::ParseFormat(const wxString& date,
minusFound = false;
else if
(
*input == wxS('-')
|| *input == wxString::FromUTF8("\xe2\x88\x92")
*input == wxS('-') ||
*input == wxString::FromUTF8("") // U+2212 MINUS SIGN
)
minusFound = true;
else

View File

@@ -1069,15 +1069,13 @@ wxVersionInfo wxGetLibraryVersionInfo()
msg += wxPlatformInfo::Get().GetPlatformDescription();
const wxString copyrightSign = wxString::FromUTF8("\xc2\xa9");
return wxVersionInfo(wxS("wxWidgets"),
wxMAJOR_VERSION,
wxMINOR_VERSION,
wxRELEASE_NUMBER,
msg,
wxString::Format(wxS("Copyright %s 1992-2025 wxWidgets team"),
copyrightSign));
wxString::FromUTF8("Copyright © 1992-2025 wxWidgets team")
);
}

View File

@@ -98,7 +98,7 @@ wxString wxAboutDialogInfo::GetCopyrightToDisplay() const
{
wxString ret = m_copyright;
const wxString copyrightSign = wxString::FromUTF8("\xc2\xa9");
const wxString copyrightSign = wxString::FromUTF8("©");
ret.Replace("(c)", copyrightSign);
ret.Replace("(C)", copyrightSign);

View File

@@ -481,8 +481,7 @@ wxRendererQt::DrawCheckMark(wxWindow *win, wxDC& dc, const wxRect& rect, int WXU
wxDCClipper clip(dc, rect);
wxDCFontChanger fontChanger(dc, win->GetFont());
// Draw the unicode character “✓” (U+2713)
const auto checkMark = wxString::FromUTF8("\xE2\x9C\x93");
const auto checkMark = wxString::FromUTF8(""); // U+2713 CHECK MARK
dc.DrawText(checkMark, rect.GetPosition());
}

View File

@@ -519,7 +519,7 @@ TEST_CASE("wxExecute::RedirectUTF8", "[.]")
// It seems unlikely that this part of the output will change for GNU
// ls, so check for its presence as a sign that the program output was
// decoded correctly.
if ( output[n].find(wxString::FromUTF8("vous \xc3\xaates libre")) != wxString::npos )
if ( output[n].find(wxString::FromUTF8("vous êtes libre")) != wxString::npos )
return;
}

View File

@@ -496,7 +496,7 @@ TEST_CASE_METHOD(FileFunctionsTestCase,
"FileFunctions::Mkdir",
"[filefn]")
{
wxString dirname = wxString::FromUTF8("__wxMkdir_test_dir_with_\xc3\xb6");
wxString dirname = wxString::FromUTF8("__wxMkdir_test_dir_with_ö");
INFO("Dir: " << dirname);
CHECK( wxMkdir(dirname) );
@@ -508,7 +508,7 @@ TEST_CASE_METHOD(FileFunctionsTestCase,
"FileFunctions::Rmdir",
"[filefn]")
{
wxString dirname = wxString::FromUTF8("__wxRmdir_test_dir_with_\xc3\xb6");
wxString dirname = wxString::FromUTF8("__wxRmdir_test_dir_with_ö");
INFO("Dir: " << dirname);
CHECK( wxMkdir(dirname) );

View File

@@ -149,7 +149,7 @@ void ConvAutoTestCase::Empty()
void ConvAutoTestCase::Encode()
{
wxConvAuto conv;
wxString str = wxString::FromUTF8("\xd0\x9f\xe3\x81\x82");
wxString str = wxString::FromUTF8("Пあ");
wxCharBuffer buf = conv.cWC2MB(str.wc_str());
CPPUNIT_ASSERT( buf );
CPPUNIT_ASSERT_EQUAL( str, wxString::FromUTF8(buf) );
@@ -186,36 +186,34 @@ void ConvAutoTestCase::UTF16BE()
TestFirstChar("\xfe\xff\0Y", wxT('Y'), 4, ConvState(wxBOM_UTF16BE, wxFONTENCODING_UTF16BE));
}
#ifdef wxMUST_USE_U_ESCAPE
constexpr wchar_t CYRILLIC_LETTER_P = L'\u041f';
#else
constexpr wchar_t CYRILLIC_LETTER_P = L'П';
#endif
void ConvAutoTestCase::UTF8()
{
#ifdef wxHAVE_U_ESCAPE
TestFirstChar("\xef\xbb\xbf\xd0\x9f", L'\u041f', wxNO_LEN, ConvState(wxBOM_UTF8, wxFONTENCODING_UTF8));
#endif
TestFirstChar("\xef\xbb\xbfП", CYRILLIC_LETTER_P, wxNO_LEN, ConvState(wxBOM_UTF8, wxFONTENCODING_UTF8));
}
void ConvAutoTestCase::UTF8NoBom()
{
#ifdef wxHAVE_U_ESCAPE
TestFirstChar("\xd0\x9f\xe3\x81\x82", L'\u041f', wxNO_LEN, ConvState(wxBOM_None, wxFONTENCODING_UTF8));
#endif
TestFirstChar("Пあ", CYRILLIC_LETTER_P, wxNO_LEN, ConvState(wxBOM_None, wxFONTENCODING_UTF8));
}
void ConvAutoTestCase::Fallback()
{
#ifdef wxHAVE_U_ESCAPE
TestFirstChar("\xbf", L'\u041f', wxNO_LEN,
TestFirstChar("\xbf", CYRILLIC_LETTER_P, wxNO_LEN,
ConvState(wxBOM_None, wxFONTENCODING_ISO8859_5, true),
wxFONTENCODING_ISO8859_5);
#endif
}
void ConvAutoTestCase::FallbackMultibyte()
{
#ifdef wxHAVE_U_ESCAPE
TestFirstChar("\x84\x50", L'\u041f', wxNO_LEN,
TestFirstChar("\x84\x50", CYRILLIC_LETTER_P, wxNO_LEN,
ConvState(wxBOM_None, wxFONTENCODING_CP932, true),
wxFONTENCODING_CP932);
#endif
}
void ConvAutoTestCase::FallbackShort()
@@ -246,8 +244,8 @@ void ConvAutoTestCase::TestTextStream(const char *src,
namespace
{
const wxString line1 = wxString::FromUTF8("a\xe3\x81\x82");
const wxString line2 = wxString::FromUTF8("\xce\xb2");
const wxString line1 = wxString::FromUTF8("a");
const wxString line2 = wxString::FromUTF8("β");
} // anonymous namespace

View File

@@ -244,7 +244,7 @@ void DataStreamTestCase::StringRW()
s.append(wxT("Test2"));
CPPUNIT_ASSERT_EQUAL( TestRW(s), s );
s = wxString::FromUTF8("\xc3\xbc"); // U+00FC LATIN SMALL LETTER U WITH DIAERESIS
s = wxString::FromUTF8("ü");
CPPUNIT_ASSERT_EQUAL( TestRW(s), s );
}

View File

@@ -42,7 +42,7 @@ TEST_CASE("CRT::SetGetEnv", "[crt][getenv][setenv]")
CHECK( val == "value" );
CHECK( wxString(wxGetenv(TESTVAR_NAME)) == "value" );
const wxString nonASCII = wxString::FromUTF8("\xe2\x98\xba");
const wxString nonASCII = wxString::FromUTF8("");
wxSetEnv(TESTVAR_NAME, nonASCII);
CHECK( wxGetEnv(TESTVAR_NAME, &val) );
CHECK( val == nonASCII );
@@ -68,12 +68,12 @@ TEST_CASE("CRT::Strchr", "[crt][strchr]")
{
// test that searching for a wide character in a narrow string simply
// doesn't find it but doesn't fail with an assert (#11487)
const wxUniChar smiley = *wxString::FromUTF8("\xe2\x98\xba").begin();
const wxUniChar smiley = *wxString::FromUTF8("").begin();
CHECK( !wxStrchr("hello", smiley) );
// but searching for an explicitly wide character does find it
CHECK( wxStrchr(wxString::FromUTF8(":-) == \xe2\x98\xba"),
CHECK( wxStrchr(wxString::FromUTF8(":-) == "),
static_cast<wchar_t>(smiley)) );
}

View File

@@ -547,10 +547,9 @@ TEST_CASE("StdString::Resize", "[stdstring]")
CHECK( s3 == wxT("abcABCdefDEF ") );
CHECK( s4 == wxT("abcABCdefDEFWW") );
wxString s =
wxString::FromUTF8("\xd0\x9f\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82");
wxString s = wxString::FromUTF8("Привет");
s.resize(3);
CHECK( s == wxString::FromUTF8("\xd0\x9f\xd1\x80\xd0\xb8") );
CHECK( s == wxString::FromUTF8("При") );
}
TEST_CASE("StdString::Riter", "[stdstring]")

View File

@@ -115,10 +115,7 @@ TEST_CASE("StringFormatUnicode", "[wxString]")
wxLocaleSetter loc("C.UTF-8");
#endif // !__WINDOWS__
const char *UNICODE_STR = "Iestat\xC4\xAB %i%i";
//const char *UNICODE_STR = "Iestat\xCC\x84 %i%i";
wxString fmt = wxString::FromUTF8(UNICODE_STR);
wxString fmt = wxString::FromUTF8("Iestatī");
wxString s = wxString::Format(fmt, 1, 1);
wxString expected(fmt);
expected.Replace("%i", "1");
@@ -207,8 +204,7 @@ TEST_CASE("StringExtraction", "[wxString]")
CHECK( wxStrcmp( s.substr(3, 5).c_str() , wxT("lo, w") ) == 0 );
CHECK( wxStrcmp( s.substr(3).c_str() , wxT("lo, world!") ) == 0 );
static const char *germanUTF8 = "Oberfl\303\244che";
wxString strUnicode(wxString::FromUTF8(germanUTF8));
wxString strUnicode(wxString::FromUTF8("Oberfläche"));
CHECK( strUnicode.Mid(0, 10) == strUnicode );
CHECK( strUnicode.Mid(7, 2) == "ch" );

View File

@@ -189,9 +189,8 @@ void UnicodeTestCase::ToFromAscii()
void UnicodeTestCase::ConstructorsWithConversion()
{
// the string "Déjà" in UTF-8 and wchar_t:
const unsigned char utf8Buf[] = {0x44,0xC3,0xA9,0x6A,0xC3,0xA0,0};
const unsigned char utf8subBuf[] = {0x44,0xC3,0xA9,0x6A,0}; // just "Déj"
const unsigned char utf8Buf[] = "Déjà";
const unsigned char utf8subBuf[] = "Déj";
const char* utf8 = reinterpret_cast<const char*>(utf8Buf);
const char* utf8sub = reinterpret_cast<const char*>(utf8subBuf);
@@ -311,10 +310,11 @@ void UnicodeTestCase::ConversionUTF8()
{
static const StringConversionData utf8data[] =
{
#ifdef wxHAVE_U_ESCAPE
#ifdef wxMUST_USE_U_ESCAPE
StringConversionData("\xc2\xa3", L"\u00a3"),
#else
StringConversionData("£", L"£"),
#endif
StringConversionData("\xc2", nullptr),
};
wxCSConv conv(wxT("utf-8"));
@@ -345,15 +345,15 @@ void UnicodeTestCase::ConversionUTF16()
{
static const StringConversionData utf16data[] =
{
#ifdef wxHAVE_U_ESCAPE
#ifdef wxMUST_USE_U_ESCAPE
StringConversionData(
"\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
L"\u041f\u0440\u0438\u0432\u0435\u0442"),
#else
StringConversionData(
"\x01\0\0b\x01\0\0a\x01\0\0r\0\0",
L"\u0100b\u0100a\u0100r"),
"\x04\x1f\x04\x40\x04\x38\x04\x32\x04\x35\x04\x42\0\0",
L"Привет"),
#endif
StringConversionData("\0f\0o\0o\0\0", L"foo"),
};
wxCSConv conv(wxFONTENCODING_UTF16BE);
@@ -397,12 +397,15 @@ void UnicodeTestCase::ConversionUTF32()
{
static const StringConversionData utf32data[] =
{
#ifdef wxHAVE_U_ESCAPE
#ifdef wxMUST_USE_U_ESCAPE
StringConversionData(
"\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
L"\u041f\u0440\u0438\u0432\u0435\u0442"),
#else
StringConversionData(
"\0\0\x04\x1f\0\0\x04\x40\0\0\x04\x38\0\0\x04\x32\0\0\x04\x35\0\0\x04\x42\0\0\0\0",
L"Привет"),
#endif
StringConversionData("\0\0\0f\0\0\0o\0\0\0o\0\0\0\0", L"foo"),
};
wxCSConv conv(wxFONTENCODING_UTF32BE);
@@ -431,8 +434,7 @@ void UnicodeTestCase::IsConvOk()
void UnicodeTestCase::Iteration()
{
// "czech" in Czech ("cestina"):
static const char *textUTF8 = "\304\215e\305\241tina";
static const char *textUTF8 = "čeština";// "czech" in Czech
static const wchar_t textUTF16[] = {0x10D, 0x65, 0x161, 0x74, 0x69, 0x6E, 0x61, 0};
wxString text(wxString::FromUTF8(textUTF8));

View File

@@ -254,13 +254,13 @@ TEST_CASE_METHOD(VsnprintfTestCase, "Vsnprintf::S", "[vsnprintf]")
// Unicode code points from U+03B1 to U+03B9 are the greek letters alpha-iota;
// UTF8 encoding of such code points is 0xCEB1 to 0xCEB9
#define ALPHA "\xCE\xB1"
#define ALPHA "α"
// alpha
#define ABC "\xCE\xB1\xCE\xB2\xCE\xB3"
#define ABC "αβγ"
// alpha+beta+gamma
#define ABCDE "\xCE\xB1\xCE\xB2\xCE\xB3\xCE\xB4\xCE\xB5"
#define ABCDE "αβγδε"
// alpha+beta+gamma+delta+epsilon
#define ABCDEFGHI "\xCE\xB1\xCE\xB2\xCE\xB3\xCE\xB4\xCE\xB5\xCE\xB6\xCE\xB7\xCE\xB8\xCE\xB9"
#define ABCDEFGHI "αβγδεζηθι"
// alpha+beta+gamma+delta+epsilon+zeta+eta+theta+iota
// the 'expected' and 'arg' parameters of this macro are supposed to be

View File

@@ -27,14 +27,14 @@
#define WXUISIM_TEST(test)
#endif
// define wxHAVE_U_ESCAPE if the compiler supports \uxxxx character constants
#if defined(__VISUALC__) || defined(__GNUC__)
#define wxHAVE_U_ESCAPE
// and disable warning that using them results in with MSVC 8+
#if defined(__VISUALC__)
// universal-character-name encountered in source
#pragma warning(disable:4428)
#if defined(__VISUALC__)
#if _MSC_VER < 1910
// MSVS 2015 doesn't handle literal Unicode characters in wide strings
// correctly, so use \uxxxx escapes for it instead.
//
// When support for MSVS 2015 is dropped, this symbol and all code
// guarded by it should be removed.
#define wxMUST_USE_U_ESCAPE
#endif
#endif

View File

@@ -267,8 +267,7 @@ void TextFileTestCase::ReadCRCRLF()
void TextFileTestCase::ReadUTF8()
{
CreateTestFile("\xd0\x9f\n"
"\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82");
CreateTestFile("П\nривет");
wxTextFile f;
CPPUNIT_ASSERT( f.Open(wxString::FromAscii(GetTestFileName()), wxConvUTF8) );
@@ -276,11 +275,14 @@ void TextFileTestCase::ReadUTF8()
CPPUNIT_ASSERT_EQUAL( (size_t)2, f.GetLineCount() );
CPPUNIT_ASSERT_EQUAL( wxTextFileType_Unix, f.GetLineType(0) );
CPPUNIT_ASSERT_EQUAL( wxTextFileType_None, f.GetLineType(1) );
#ifdef wxHAVE_U_ESCAPE
#ifdef wxMUST_USE_U_ESCAPE
CPPUNIT_ASSERT_EQUAL( wxString(L"\u041f"), f.GetFirstLine() );
CPPUNIT_ASSERT_EQUAL( wxString(L"\u0440\u0438\u0432\u0435\u0442"),
f.GetLastLine() );
#endif // wxHAVE_U_ESCAPE
#else
CPPUNIT_ASSERT_EQUAL( wxString(L"П"), f.GetFirstLine() );
CPPUNIT_ASSERT_EQUAL( wxString(L"ривет"), f.GetLastLine() );
#endif
}
void TextFileTestCase::ReadUTF16()
@@ -297,11 +299,14 @@ void TextFileTestCase::ReadUTF16()
CPPUNIT_ASSERT_EQUAL( wxTextFileType_Dos, f.GetLineType(0) );
CPPUNIT_ASSERT_EQUAL( wxTextFileType_None, f.GetLineType(1) );
#ifdef wxHAVE_U_ESCAPE
#ifdef wxMUST_USE_U_ESCAPE
CPPUNIT_ASSERT_EQUAL( wxString(L"\u041f"), f.GetFirstLine() );
CPPUNIT_ASSERT_EQUAL( wxString(L"\u0440\u0438\u0432\u0435\u0442"),
f.GetLastLine() );
#endif // wxHAVE_U_ESCAPE
#else
CPPUNIT_ASSERT_EQUAL( wxString(L"П"), f.GetFirstLine() );
CPPUNIT_ASSERT_EQUAL( wxString(L"ривет"), f.GetLastLine() );
#endif
}
void TextFileTestCase::ReadBig()
@@ -331,7 +336,7 @@ TEST_CASE("wxTextBuffer::Translate", "[textbuffer]")
{
// Bytes with the value of LF that are part of an UTF-8 character shouldn't
// be mangled.
const wxString smiley = wxString::FromUTF8("\xf0\x9f\x98\x8a"); // U+1F60A
const wxString smiley = wxString::FromUTF8("😊"); // U+1F60A
CHECK( wxTextBuffer::Translate(smiley, wxTextFileType_Dos) == smiley );
}

View File

@@ -313,7 +313,7 @@ TEST_CASE("URI::Paths", "[uri]")
URI_ASSERT_BADPATH("http:////BADPATH");
// 8-bit characters in the path should be percent-encoded.
URI_ASSERT_PATH_EQUAL( wxString::FromUTF8("http://host/\xc3\xa9"), "/%c3%a9" );
URI_ASSERT_PATH_EQUAL( wxString::FromUTF8("http://host/é"), "/%c3%a9" );
}
TEST_CASE("URI::UserInfo", "[uri]")
@@ -361,7 +361,7 @@ TEST_CASE("URI::UserInfo", "[uri]")
uri.SetUserAndPassword("you:", "?me");
URI_ASSERT_EQUAL( uri, "https://you%3a:%3fme@host/" );
uri.SetUserAndPassword(wxString::FromUTF8("\xc3\xa7"));
uri.SetUserAndPassword(wxString::FromUTF8("ç"));
URI_ASSERT_USER_EQUAL( uri, "%c3%a7");
}
@@ -495,23 +495,29 @@ TEST_CASE("URI::Unescape", "[uri]")
unescaped = wxURI::Unescape(escaped);
CHECK( unescaped == wxString::FromUTF8(
"http://ru.wikipedia.org/wiki/"
"\xD0\xA6\xD0\xB5\xD0\xBB\xD0\xBE\xD0\xB5_"
"\xD1\x87\xD0\xB8\xD1\x81\xD0\xBB\xD0\xBE"
"http://ru.wikipedia.org/wiki/Целое_число"
) );
#ifdef wxMUST_USE_U_ESCAPE
escaped = L"file://\u043C\u043E\u0439%5C%d1%84%d0%b0%d0%b9%d0%bb";
#else
escaped = L"file://мой%5C%d1%84%d0%b0%d0%b9%d0%bb";
#endif
unescaped = wxURI::Unescape(escaped);
#ifdef wxMUST_USE_U_ESCAPE
CHECK
(
unescaped == L"file://\u043C\u043E\u0439\\\u0444\u0430\u0439\u043B"
);
#else
CHECK( unescaped == L"file://мой\\файл" );
#endif
escaped = "%2FH%C3%A4ll%C3%B6%5C";
unescaped = wxURI(escaped).BuildUnescapedURI();
CHECK( unescaped == wxString::FromUTF8("\x2FH\xC3\xA4ll\xC3\xB6\x5C") );
CHECK( unescaped == wxString::FromUTF8("/Hällö\\") );
}
TEST_CASE("URI::FileScheme", "[uri]")