Add complete support of emojis to text widgets (#1360)
Some checks failed
Build and Test / build-linux (push) Has been cancelled
Build and Test / build-wayland (push) Has been cancelled
Build and Test / build-macos (push) Has been cancelled
Build and Test / build-windows (push) Has been cancelled

This commit is contained in:
ManoloFLTK
2026-01-29 19:00:23 +01:00
parent 8daaf9ebd6
commit ec86cc0f9d
6 changed files with 160 additions and 24 deletions

View File

@@ -210,6 +210,9 @@ FL_EXPORT void fl_make_path_for_file( const char *path );
/* OD: recursively create a path in the file system */
FL_EXPORT char fl_make_path( const char *path );
FL_EXPORT const char *fl_utf8_next_composed_char(const char *from, const char *end);
FL_EXPORT const char *fl_utf8_previous_composed_char(const char *from, const char *begin);
/** @} */

View File

@@ -190,14 +190,20 @@ int Fl_Input::kf_delete_eol() {
int Fl_Input::kf_delete_char_right() {
if (readonly()) { fl_beep(); return 1; }
if (mark() != insert_position()) cut();
else cut(1);
else {
const char *next = fl_utf8_next_composed_char(value() + insert_position(), value() + size());
replace(insert_position(), next - value(), 0);
}
return 1;
}
int Fl_Input::kf_delete_char_left() {
if (readonly()) { fl_beep(); return 1; }
if (mark() != insert_position()) cut();
else cut(-1);
else {
const char *before = fl_utf8_previous_composed_char(value() + insert_position(), value());
replace(insert_position(), before - value(), 0);
}
return 1;
}
@@ -225,7 +231,8 @@ int Fl_Input::kf_clear_eol() {
// If OPTION_ARROW_FOCUS is disabled, return 1 to prevent focus navigation.
//
int Fl_Input::kf_move_char_left() {
int i = shift_position(insert_position()-1) + NORMAL_INPUT_MOVE;
const char *before = fl_utf8_previous_composed_char(value() + insert_position(), value());
int i = shift_position(before - value()) + NORMAL_INPUT_MOVE;
return Fl::option(Fl::OPTION_ARROW_FOCUS) ? i : 1;
}
@@ -233,7 +240,8 @@ int Fl_Input::kf_move_char_left() {
// If OPTION_ARROW_FOCUS is disabled, return 1 to prevent focus navigation.
//
int Fl_Input::kf_move_char_right() {
int i = shift_position(insert_position()+1) + NORMAL_INPUT_MOVE;
const char *next = fl_utf8_next_composed_char(value() + insert_position(), value() + size());
int i = shift_position(next - value()) + NORMAL_INPUT_MOVE;
return Fl::option(Fl::OPTION_ARROW_FOCUS) ? i : 1;
}

View File

@@ -658,8 +658,7 @@ void Fl_Input_::handle_mouse(int X, int Y, int /*W*/, int /*H*/, int drag) {
const char *l, *r, *t; double f0 = Fl::event_x()-X+xscroll_;
for (l = p, r = e; l<r; ) {
double f;
int cw = fl_utf8len((char)l[0]);
if (cw < 1) cw = 1;
int cw = fl_utf8_next_composed_char(l, value() + size()) - l;
t = l+cw;
f = X-xscroll_+expandpos(p, t, buf, 0);
if (f <= Fl::event_x()) {l = t; f0 = Fl::event_x()-f;}
@@ -667,7 +666,7 @@ void Fl_Input_::handle_mouse(int X, int Y, int /*W*/, int /*H*/, int drag) {
}
if (l < e) { // see if closer to character on right:
double f1;
int cw = fl_utf8len((char)l[0]);
int cw = fl_utf8_next_composed_char(l, value() + size()) - l;
if (cw > 0) {
f1 = X-xscroll_+expandpos(p, l + cw, buf, 0) - Fl::event_x();
if (f1 < f0) l = l+cw;

View File

@@ -2075,15 +2075,17 @@ int Fl_Text_Buffer::prev_char_clipped(int pos) const
return 0;
IS_UTF8_ALIGNED2(this, (pos))
char c;
do {
pos--;
if (pos==0)
return 0;
c = byte_at(pos);
} while ( (c&0xc0) == 0x80);
const int l_t = 40;
char t[l_t + 1]; t[l_t] = 0;
int l = l_t, p = pos, ll;
for (int i = l_t; i > 0 && p > 0; i--) {
t[--l] = byte_at(--p);
ll = fl_utf8len(t[l]);
if (ll == 1 || ll == 2) break;
}
const char *previous = fl_utf8_previous_composed_char(t + l_t, t + l);
ll = strlen(t + l);
pos = (pos - ll) + (previous - (t+l));
IS_UTF8_ALIGNED2(this, (pos))
return pos;
}
@@ -2091,6 +2093,7 @@ int Fl_Text_Buffer::prev_char_clipped(int pos) const
/*
Return the previous character position.
This function processes a composed character (e.g., a flag emoji) as a single character.
Returns -1 if the beginning of the buffer is reached.
*/
int Fl_Text_Buffer::prev_char(int pos) const
@@ -2102,13 +2105,28 @@ int Fl_Text_Buffer::prev_char(int pos) const
/*
Return the next character position.
This function processes a composed character (e.g., a flag emoji) as a single character.
Returns length() if the end of the buffer is reached.
*/
int Fl_Text_Buffer::next_char(int pos) const
{
IS_UTF8_ALIGNED2(this, (pos))
int n = fl_utf8len1(byte_at(pos));
pos += n;
unsigned l = fl_utf8len1(byte_at(pos));
if (l > 2) { // test for composed character only if pos is at long codepoint
int p = pos, ll, b;
char t[40]; // crazyest composed characters I know use 28 bytes in UTF8 (e.g., 🏴󠁧󠁢󠁷󠁬󠁳󠁿)
l = 0;
// extract bytes after pos stopping after short codepoint or 40 bytes at most
while (p < mLength && l < sizeof(t)) {
b = byte_at(p++);
t[l++] = b;
ll = fl_utf8len(b);
for (int i = 1; i < ll && l < sizeof(t); i++) t[l++] = byte_at(p++);
if (ll == 1 || ll == 2) break; // stop after short codepoint (includes '\n')
}
l = fl_utf8_next_composed_char(t, t + l) - t; // length of possibly composed character starting at pos
}
pos += l;
if (pos>=mLength)
return mLength;
IS_UTF8_ALIGNED2(this, (pos))

View File

@@ -2263,7 +2263,8 @@ int Fl_Text_Display::find_x(const char *s, int len, int style, int x) const {
int i = 0;
int last_w = 0; // STR #2788
while (i<len) {
int cl = fl_utf8len1(s[i]);
const char *next = fl_utf8_next_composed_char(s + i, s + len);
int cl = next - (s+i);
int w = int( string_width(s, i+cl, style) );
if (w>x) {
if (cursor_pos && (w-x < x-last_w)) return i+cl; // STR #2788

View File

@@ -1089,8 +1089,13 @@ unsigned fl_utf8toa(const char* src, unsigned srclen,
dst[count] = c;
p++;
} else {
int len; unsigned ucs = fl_utf8decode(p,e,&len);
p += len;
unsigned ucs = 0x100;
int len = fl_utf8len(*p);
if (len > 2) p = fl_utf8_next_composed_char(p, e);
else {
ucs = fl_utf8decode(p,e,&len);
p += len;
}
if (ucs < 0x100) dst[count] = ucs;
else dst[count] = '?';
}
@@ -1100,9 +1105,11 @@ unsigned fl_utf8toa(const char* src, unsigned srclen,
while (p < e) {
if (!(*p & 0x80)) p++;
else {
int len;
fl_utf8decode(p,e,&len);
p += len;
int len = fl_utf8len1(*p);
if (len > 2) p = fl_utf8_next_composed_char(p, e);
else {
p += len;
}
}
++count;
}
@@ -1393,4 +1400,104 @@ unsigned fl_utf8from_mb(char* dst, unsigned dstlen, const char* src, unsigned sr
return Fl::system_driver()->utf8from_mb(dst, dstlen, src, srclen);
}
/**
Returns pointer to beginning of character after given location in UTF8 string accounting for emoji sequences.
Unicode encodes some emojis (examples: 👩‍✈️ "woman pilot", 🇸🇲 "San Marino flag", 9⃣ "keycap 9")
via an <b>emoji sequence</b>, that is, they are represented by sequences of consecutive unicode points.
An emoji sequence may pair two successive codepoints with "zero-width joiner" and may qualify
any component with "variation selectors" or "Fitzpatrick emoji modifiers". Most flag emojis are encoded with two successive
"regional indicator symbols". Keycap emojis are encoded with key + "emoji variation selector" + "combining enclosing keycap".
\param from points to a location within a UTF8 string. If this location is inside the UTF8
encoding of a codepoint or is an invalid byte, this function returns \p from + 1.
\param end points past last codepoint of the string.
\return pointer to beginning of first codepoint after character, possibly an emoji sequence, that begins at \p from.
*/
const char *fl_utf8_next_composed_char(const char *from, const char *end) {
int skip = fl_utf8len(*from);
if (skip == -1) return from + 1;
unsigned u;
if (skip >= 4) {
u = fl_utf8decode(from, end, NULL);
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 1st regional indicator symbol can be a flag
u = fl_utf8decode(from + skip, end, NULL);
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 2nd regional indicator symbol gives a flag
return from + 2 * skip;
}
} else if (u == 0x1F3F4) { // “waving black flag” may start subdivision flags (e.g. 🏴󠁧󠁢󠁷󠁬󠁳󠁿)
const char *next = from + skip;
do {
u = fl_utf8decode(next, end, NULL);
next += fl_utf8len1(*next);
if (u == 0xE007F) return next; // ends with "cancel tag"
} while (u >= 0xE0020 && u <= 0xE007E); // any series of "tag components"
}
}
from += skip; // skip 1st codepoint
while (from < end) {
u = fl_utf8decode(from, end, NULL);
if (u == 0x200D) { // zero-width joiner
from += fl_utf8len(*from); // skip joiner
from += fl_utf8len(*from); // skip joined codepoint
} else if (u >= 0xFE00 && u <= 0xFE0F) { // a variation selector
from += fl_utf8len(*from); // skip variation selector
} else if (u >= 0x1F3FB && u <= 0x1F3FF) { // EMOJI MODIFIER FITZPATRICK
from += fl_utf8len(*from); // skip modifier
} else if (u == 0x20E3) { // combining enclosing keycap (e.g., 9⃣*️⃣#⃣9⃣)
from += fl_utf8len(*from); // skip it
} else break;
}
return from;
}
/**
Returns pointer to beginning of character before given location in UTF8 string accounting for emoji sequences.
See fl_utf8_next_composed_char() for a hint about what is an emoji sequence.
\param from points to a location within a UTF8 string. If this location is inside the UTF8
encoding of a codepoint or is an invalid byte, this function returns \p from - 1.
\param begin points to start of first codepoint of the string.
\return pointer to beginning of first character, possibly an emoji sequence, before the codepoint that begins at \p from.
*/
const char *fl_utf8_previous_composed_char(const char *from, const char *begin) {
int l = fl_utf8len(*from);
if (from <= begin || l == -1) return from - 1;
const char *keep = from + l;
from = fl_utf8back(from - 1, begin, NULL);
unsigned u = fl_utf8decode(from, keep, NULL);
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 1st regional indicator symbol can be a flag
const char *previous = fl_utf8back(from - 1, begin, NULL);
u = fl_utf8decode(previous, keep, NULL);
if (u >= 0x1F1E6 && u <= 0x1F1FF) { // a 2nd Regional indicator symbol gives a flag
return previous;
}
} else if (u == 0xE007F) { // ends with "cancel tag"
const char *previous = from;
do {
if (previous <= begin) return begin;
previous = fl_utf8back(previous - 1, begin, NULL);
u = fl_utf8decode(previous, keep, NULL);
if (u == 0x1F3F4) return previous; // “waving black flag” starts subdivision flags
} while (u >= 0xE0020 && u <= 0xE007E); // any series of "tag components"
}
while (from >= begin) {
u = fl_utf8decode(from, keep, NULL);
if (u >= 0xFE00 && u <= 0xFE0F) { // a variation selector
from = fl_utf8back(from - 1, begin, NULL);
} else if (u >= 0x1F3FB && u <= 0x1F3FF) { // EMOJI MODIFIER FITZPATRICK
from = fl_utf8back(from - 1, begin, NULL);
} else if (u == 0x20E3) { // combining enclosing keycap
from = fl_utf8back(from - 1, begin, NULL);
} else if (from > begin) {
keep = fl_utf8back(from - 1, begin, NULL);
u = fl_utf8decode(keep, from, NULL);
if (u == 0x200D) { // zero-width joiner
from = fl_utf8back(keep - 1, begin, NULL);
continue;
}
return from;
} else break;
}
return from;
}
/** @} */