mirror of
https://github.com/fltk/fltk.git
synced 2026-02-05 07:49:50 +08:00
Let text widgets handle gracefully composed unicode characters - Cont'd.
This commit adds support of another type of composed characters: flags. It also fixes Fl_Text_Buffer::prev_char() and Fl_Text_Buffer::next_char() that must use Fl_Text_Buffer::byte_at() to access to the content of the text buffer.
This commit is contained in:
@@ -742,19 +742,11 @@ public:
|
||||
*/
|
||||
const Fl_Text_Selection* highlight_selection() const { return &mHighlight; }
|
||||
|
||||
/**
|
||||
Returns the index of the previous character.
|
||||
\param ix index to the current character
|
||||
*/
|
||||
int prev_char(int ix) const;
|
||||
int prev_char_clipped(int ix) const;
|
||||
int prev_char(int pos) const;
|
||||
int prev_char_clipped(int pos) const;
|
||||
|
||||
/**
|
||||
Returns the index of the next character.
|
||||
\param ix index to the current character
|
||||
*/
|
||||
int next_char(int ix) const;
|
||||
int next_char_clipped(int ix) const;
|
||||
int next_char(int pos) const;
|
||||
int next_char_clipped(int pos) const;
|
||||
|
||||
/**
|
||||
Align an index into the buffer to the current or previous UTF-8 boundary.
|
||||
|
||||
@@ -2080,9 +2080,8 @@ int Fl_Text_Buffer::outputfile(const char *file,
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Return the previous character position.
|
||||
Unicode safe.
|
||||
/**
|
||||
As prev_char() but returns 0 if the beginning of the buffer is reached.
|
||||
*/
|
||||
int Fl_Text_Buffer::prev_char_clipped(int pos) const
|
||||
{
|
||||
@@ -2090,17 +2089,27 @@ int Fl_Text_Buffer::prev_char_clipped(int pos) const
|
||||
return 0;
|
||||
|
||||
IS_UTF8_ALIGNED2(this, (pos))
|
||||
const char *previous = fl_utf8_previous_composed_char(address(0) + pos, address(0));
|
||||
pos = previous - address(0);
|
||||
|
||||
const int l_t = 40;
|
||||
char t[l_t + 1]; t[l_t] = 0;
|
||||
int l = l_t, p = pos, ll;
|
||||
for (int i = l_t; i > 0 && p > 0; i--) {
|
||||
t[--l] = byte_at(--p);
|
||||
ll = fl_utf8len(t[l]);
|
||||
if (ll == 1 || ll == 2) break;
|
||||
}
|
||||
const char *previous = fl_utf8_previous_composed_char(t + l_t, t + l);
|
||||
ll = strlen(t + l);
|
||||
pos = (pos - ll) + (previous - (t+l));
|
||||
IS_UTF8_ALIGNED2(this, (pos))
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Return the previous character position.
|
||||
/**
|
||||
Returns the index of the previous character.
|
||||
This function processes a composed character (e.g., a flag emoji) as a single character.
|
||||
Returns -1 if the beginning of the buffer is reached.
|
||||
\param pos index to the current character
|
||||
*/
|
||||
int Fl_Text_Buffer::prev_char(int pos) const
|
||||
{
|
||||
@@ -2109,15 +2118,31 @@ int Fl_Text_Buffer::prev_char(int pos) const
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Return the next character position.
|
||||
/**
|
||||
Returns the index of the next character.
|
||||
This function processes a composed character (e.g., a flag emoji) as a single character.
|
||||
Returns length() if the end of the buffer is reached.
|
||||
\param pos index to the current character
|
||||
*/
|
||||
int Fl_Text_Buffer::next_char(int pos) const
|
||||
{
|
||||
IS_UTF8_ALIGNED2(this, (pos))
|
||||
const char *next = fl_utf8_next_composed_char(address(0) + pos, address(0) + mLength);
|
||||
pos = next - address(0);
|
||||
int l = fl_utf8len1(byte_at(pos));
|
||||
if (l > 2) { // test for composed character only if pos is at long codepoint
|
||||
int p = pos, ll, b;
|
||||
char t[40]; // crazyest composed characters I know use 28 bytes in UTF8 (e.g., 🏴)
|
||||
l = 0;
|
||||
// extract bytes after pos stopping after short codepoint or 40 bytes at most
|
||||
while (p < mLength && l < sizeof(t)) {
|
||||
b = byte_at(p++);
|
||||
t[l++] = b;
|
||||
ll = fl_utf8len(b);
|
||||
for (int i = 1; i < ll && l < sizeof(t); i++) t[l++] = byte_at(p++);
|
||||
if (ll == 1 || ll == 2) break; // stop after short codepoint (includes '\n')
|
||||
}
|
||||
l = fl_utf8_next_composed_char(t, t + l) - t; // length of possibly composed character starting at pos
|
||||
}
|
||||
pos += l;
|
||||
if (pos>=mLength)
|
||||
return mLength;
|
||||
IS_UTF8_ALIGNED2(this, (pos))
|
||||
|
||||
@@ -1634,17 +1634,32 @@ unsigned fl_utf8from_mb(char* dst, unsigned dstlen, const char* src, unsigned sr
|
||||
Returns pointer to beginning of next unicode character after potentially composed character.
|
||||
Some unicode characters (example: 👩✈️ "woman pilot") are composed of several unicode points. They may pair two successive
|
||||
codepoints with U+200D (zero-width joiner) and may qualify any component with variation selectors or Fitzpatrick emoji modifiers.
|
||||
Most flag emojis are composed of 2 successive "regional indicator symbols" from range [U+1F1E6 , U+1F1FF].
|
||||
\param from points to a location within a UTF8 string. If this location is inside the UTF8
|
||||
encoding of a codepoint or is an invalid byte, this function returns \p from + 1.
|
||||
\param end points past last codepoint of the string.
|
||||
\return pointer to start of first codepoint after potentially composed character beginning at \p from.
|
||||
\return pointer to beginning of first codepoint after potentially composed character that begins at \p from.
|
||||
*/
|
||||
const char *fl_utf8_next_composed_char(const char *from, const char *end) {
|
||||
int skip = fl_utf8len(*from);
|
||||
if (skip == -1) return from + 1;
|
||||
int skip = fl_utf8len1(*from);
|
||||
if (skip <= 2) return from + skip;
|
||||
unsigned u = fl_utf8decode(from, end, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 1st regional indicator symbol can be a flag
|
||||
u = fl_utf8decode(from + skip, end, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 2nd regional indicator symbol gives a flag
|
||||
return from + 2 * skip;
|
||||
}
|
||||
} else if (u == 0x1F3F4) { // “waving black flag” may start subdivision flags (e.g. 🏴)
|
||||
const char *next = from + skip;
|
||||
do {
|
||||
u = fl_utf8decode(next, end, NULL);
|
||||
next += fl_utf8len1(*next);
|
||||
if (u == 0xE007F) return next; // ends with "cancel tag"
|
||||
} while (u >= 0xE0020 && u <= 0xE007E); // any series of "tag components"
|
||||
}
|
||||
from += skip; // skip 1st codepoint
|
||||
while (from < end) {
|
||||
unsigned u = fl_utf8decode(from, end, NULL);
|
||||
u = fl_utf8decode(from, end, NULL);
|
||||
if (u == 0x200D) { // zero-width joiner
|
||||
from += fl_utf8len(*from); // skip joiner
|
||||
from += fl_utf8len(*from); // skip joined codepoint
|
||||
@@ -1664,14 +1679,31 @@ const char *fl_utf8_next_composed_char(const char *from, const char *end) {
|
||||
\param from points to a location within a UTF8 string. If this location is inside the UTF8
|
||||
encoding of a codepoint or is an invalid byte, this function returns \p from - 1.
|
||||
\param begin points to start of first codepoint of the string.
|
||||
\return pointer to start of first potentially composed character before the codepoint beginning at \p from.
|
||||
\return pointer to beginning of first potentially composed character before the codepoint that begins at \p from.
|
||||
*/
|
||||
const char *fl_utf8_previous_composed_char(const char *from, const char *begin) {
|
||||
if (from <= begin || fl_utf8len(*from) == -1) return from - 1;
|
||||
const char *keep = from;
|
||||
int l = fl_utf8len(*from);
|
||||
if (from <= begin || l == -1) return from - 1;
|
||||
const char *keep = from + l;
|
||||
from = fl_utf8back(from - 1, begin, NULL);
|
||||
while (from >= begin) {
|
||||
unsigned u = fl_utf8decode(from, keep, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 1st regional indicator symbol can be a flag
|
||||
const char *previous = fl_utf8back(from - 1, begin, NULL);
|
||||
u = fl_utf8decode(previous, keep, NULL);
|
||||
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 2nd Regional indicator symbol gives a flag
|
||||
return previous;
|
||||
}
|
||||
} else if (u == 0xE007F) { // ends with "cancel tag"
|
||||
const char *previous = from;
|
||||
do {
|
||||
if (previous <= begin) return begin;
|
||||
previous = fl_utf8back(previous - 1, begin, NULL);
|
||||
u = fl_utf8decode(previous, keep, NULL);
|
||||
if (u == 0x1F3F4) return previous; // “waving black flag” starts subdivision flags
|
||||
} while (u >= 0xE0020 && u <= 0xE007E); // any series of "tag components"
|
||||
}
|
||||
while (from >= begin) {
|
||||
u = fl_utf8decode(from, keep, NULL);
|
||||
if (u >= 0xFE00 && u <= 0xFE0F) { // a variation selector
|
||||
from = fl_utf8back(from - 1, begin, NULL);
|
||||
} else if (u >= 0x1F3FB && u <= 0x1F3FF) { // EMOJI MODIFIER FITZPATRICK
|
||||
|
||||
Reference in New Issue
Block a user