mirror of
https://github.com/fltk/fltk.git
synced 2026-05-23 15:56:10 +08:00
Fix Fl_Help_View::find() (issue #179)
Fix search (string comparison) which had a few different issues. Document the function, arguments, and details about string matching. To do: correctly match complex HTML entities like "€" with Unicode code points >= U+0080 (UTF-8 multi byte encoding).
This commit is contained in:
+33
-14
@@ -1177,10 +1177,27 @@ Fl_Help_View::draw()
|
||||
} // draw()
|
||||
|
||||
|
||||
|
||||
/** Finds the specified string \p s at starting position \p p.
|
||||
|
||||
\return the matching position or -1 if not found
|
||||
The argument \p p and the return value are offsets in Fl_Help_View::value(),
|
||||
counting from 0. If \p p is out of range, 0 is used.
|
||||
|
||||
The string comparison is simple but honors some special cases:
|
||||
- the specified string \p s must be in UTF-8 encoding
|
||||
- HTML tags in value() are filtered (not compared as such, they never match)
|
||||
- HTML entities like '\<' or '\&x#20ac;' are converted to Unicode (UTF-8)
|
||||
- ASCII characters (7-bit, \< 0x80) are compared case insensitive
|
||||
- every newline (LF, '\\n') in value() is treated like a single space
|
||||
- all other strings are compared as-is (byte by byte)
|
||||
|
||||
\todo complex HTML entities for Unicode code points \> 0x80 are currently treated
|
||||
like one byte (not character!) and do not (yet) match correctly ("<" matches "<"
|
||||
but "€" doesn't match "€", and "ü" doesn't match "ü")
|
||||
|
||||
\param[in] s search string in UTF-8 encoding
|
||||
\param[in] p starting position for search (0,...), Default = 0
|
||||
|
||||
\return the matching position or -1 if not found
|
||||
*/
|
||||
int // O - Matching position or -1 if not found
|
||||
Fl_Help_View::find(const char *s, // I - String to find
|
||||
@@ -1193,27 +1210,28 @@ Fl_Help_View::find(const char *s, // I - String to find
|
||||
*bs, // Start of current comparison
|
||||
*sp; // Search string pointer
|
||||
|
||||
|
||||
DEBUG_FUNCTION(__LINE__,__FUNCTION__);
|
||||
|
||||
// Range check input and value...
|
||||
if (!s || !value_) return -1;
|
||||
|
||||
if (p < 0 || p >= (int)strlen(value_)) p = 0;
|
||||
else if (p > 0) p ++;
|
||||
|
||||
// Look for the string...
|
||||
for (i = nblocks_, b = blocks_; i > 0; i --, b ++) {
|
||||
for (i = nblocks_, b = blocks_; i > 0; i--, b++) {
|
||||
if (b->end < (value_ + p))
|
||||
continue;
|
||||
|
||||
if (b->start < (value_ + p)) bp = value_ + p;
|
||||
else bp = b->start;
|
||||
|
||||
for (sp = s, bs = bp; *sp && *bp && bp < b->end; bp ++) {
|
||||
for (sp = s, bs = bp; *sp && *bp && bp < b->end; bp++) {
|
||||
if (*bp == '<') {
|
||||
// skip to end of element...
|
||||
while (*bp && bp < b->end && *bp != '>') bp ++;
|
||||
while (*bp && bp < b->end && *bp != '>') bp++;
|
||||
// no match, so reset to start of search...
|
||||
sp = s;
|
||||
bs = bp + 1;
|
||||
continue;
|
||||
} else if (*bp == '&') {
|
||||
// decode HTML entity...
|
||||
@@ -1221,6 +1239,8 @@ Fl_Help_View::find(const char *s, // I - String to find
|
||||
else bp = strchr(bp + 1, ';') + 1;
|
||||
} else c = *bp;
|
||||
|
||||
if (c == '\n') c = ' '; // treat newline as a single space
|
||||
|
||||
// *FIXME* *UTF-8* (A.S. 02/14/2016)
|
||||
// At this point c may be an arbitrary Unicode Code Point corresponding
|
||||
// to a quoted character (see above), i.e. it _can_ be a multi byte
|
||||
@@ -1229,19 +1249,18 @@ Fl_Help_View::find(const char *s, // I - String to find
|
||||
// For instance: "€" == 0x20ac -> 0xe2 0x82 0xac (UTF-8: 3 bytes).
|
||||
// Hint: use fl_utf8encode() [see below]
|
||||
|
||||
if (tolower(*sp) == tolower(c)) sp ++;
|
||||
else {
|
||||
// No match, so reset to start of search...
|
||||
if (c > 0x20 && c < 0x80 && tolower(*sp) == tolower(c)) sp++;
|
||||
else if (*sp == c) sp++;
|
||||
else { // No match, so reset to start of search...
|
||||
sp = s;
|
||||
bs ++;
|
||||
bp = bs;
|
||||
bs++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!*sp) {
|
||||
// Found a match!
|
||||
if (!*sp) { // Found a match!
|
||||
topline(b->y - b->h);
|
||||
return (int) (b->end - value_);
|
||||
return int(bs - value_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user