new implementation of UNICODE BIDIRECTIONAL ALGORITHM

This commit is contained in:
Vincent Wei
2019-03-08 20:37:44 +08:00
parent 0093ac3edc
commit cc01ce0bbc
5 changed files with 156 additions and 43 deletions

View File

@@ -9724,13 +9724,29 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv);
#define BIDI_MASK_SS 0x5000
/**
* \defgroup glyph_bidi_types Glyph BIDI types
* \defgroup bidi_types BIDI types
*
* Values for BIDI glyph type.
* Values for BIDI types.
*
* @{
*/
typedef Uint16 BidiType;
typedef Uint8 BidiLevel;
#define BIDI_PGDIR_LTR 0
#define BIDI_PGDIR_RTL 1
#define BIDI_PGDIR_ON 2
#define BIDI_FLAG_SHAPE_MIRRORING 0x00000001
#define BIDI_FLAG_REORDER_NSM 0x00000002
#define BIDI_FLAG_SHAPE_ARAB_PRES 0x00000100
#define BIDI_FLAG_SHAPE_ARAB_LIGA 0x00000200
#define BIDI_FLAG_SHAPE_ARAB_CONSOLE 0x00000400
#define BIDI_FLAG_REMOVE_BIDI 0x00010000
#define BIDI_FLAG_REMOVE_JOINING 0x00020000
#define BIDI_FLAG_REMOVE_SPECIALS 0x00040000
/**
* \def BIDI_TYPE_LTR
* \brief Strong left to right
@@ -9868,31 +9884,36 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv);
*/
#define BIDI_TYPE_PDI (BIDI_MASK_NEUTRAL | BIDI_MASK_WEAK | BIDI_MASK_ISOLATE)
/** @} end of glyph_bidi_types */
/** @} end of bidi_types */
/* The following are only used internally */
#define BIDI_TYPE_SENTINEL (BIDI_MASK_SENTINEL)
/* Start of text */
#define BIDI_TYPE_SOT (BIDI_MASK_SENTINEL)
/* End of text */
#define BIDI_TYPE_EOT (BIDI_MASK_SENTINEL + BIDI_MASK_RTL)
/* Weak Left-To-Right */
#define BIDI_TYPE_WLTR (BIDI_MASK_WEAK)
/* Weak Right-To-Left */
#define BIDI_TYPE_WRTL (BIDI_MASK_WEAK | BIDI_MASK_RTL)
/* Is private-use value? */
#define BIDI_TYPE_PRIVATE(p) ((p) < 0)
/* Return the direction of the level number, BIDI_TYPE_LTR for even and
BIDI_TYPE_RTL for odds. */
#define BIDI_LEVEL_TO_DIR(lev) (BIDI_TYPE_LTR | (lev & 1))
/* Return the minimum level of the direction, 0 for BIDI_TYPE_LTR and
1 for BIDI_TYPE_RTL and BIDI_TYPE_AL. */
#define BIDI_DIR_TO_LEVEL(dir) ((BYTE)(dir & 1))
//#define BIDI_TYPE_PRIVATE(p) ((p) < 0)
/* Is right to left? */
#define BIDI_IS_RTL(p) ((p) & BIDI_MASK_RTL)
/* Is arabic? */
#define BIDI_IS_ARABIC(p) ((p) & BIDI_MASK_ARABIC)
/* Is right-to-left level? */
#define BIDI_LEVEL_IS_RTL(lev) ((lev) & 1)
/* Return the bidi type corresponding to the direction of the level number,
FRIBIDI_TYPE_LTR for evens and FRIBIDI_TYPE_RTL for odds. */
#define BIDI_LEVEL_TO_DIR(lev) \
(BIDI_LEVEL_IS_RTL(lev) ? BIDI_TYPE_RTL : BIDI_TYPE_LTR)
/* Return the minimum level of the direction, 0 for FRIBIDI_TYPE_LTR and
1 for FRIBIDI_TYPE_RTL and FRIBIDI_TYPE_AL. */
#define BIDI_DIR_TO_LEVEL(dir) \
((BidiLevel) (BIDI_IS_RTL(dir) ? 1 : 0))
/* Is strong? */
#define BIDI_IS_STRONG(p) (!((p) & BIDI_SWN_MASK))
/* Is weak? */
@@ -9921,7 +9942,8 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv);
/* Is explicit override: LRO, RLO? */
#define BIDI_IS_OVERRIDE(p) ((p) & BIDI_MASK_OVERRIDE)
/* Some more: */
/* Is isolote: LRO, RLO? */
#define BIDI_IS_ISOLATE(p) (((p) & BIDI_TYPE_MASK) == BIDI_MASK_ISOLATE)
/* Is left to right letter: LTR? */
#define BIDI_IS_LTR_LETTER(p) \
@@ -9939,7 +9961,43 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv);
/* Change numbers: EN, AN to RTL. */
#define BIDI_NUMBER_TO_RTL(p) \
(BIDI_IS_NUMBER(p) ? BIDI_TYPE_RTL : (p))
(BIDI_IS_NUMBER(p) ? BIDI_TYPE_RTL : (p))
/* Is explicit or BN: LRE, RLE, LRO, RLO, PDF, BN? */
#define BIDI_IS_EXPLICIT_OR_BN(p) \
((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \
((p) & (BIDI_MASK_BN)))
/* Is explicit or BN or WS: LRE, RLE, LRO, RLO, PDF, BN, WS? */
#define BIDI_IS_EXPLICIT_OR_BN_OR_WS(p) \
((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \
((p) & (BIDI_MASK_BN | BIDI_MASK_WS)))
/* Is explicit or separator or BN or WS: LRE, RLE, LRO, RLO, PDF, BS, SS, BN, WS? */
#define BIDI_IS_EXPLICIT_OR_SEPARATOR_OR_BN_OR_WS(p) \
((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \
((p) & (BIDI_MASK_SEPARATOR | BIDI_MASK_BN | BIDI_MASK_WS)))
/* Is explicit or BN or NSM: LRE, RLE, LRO, RLO, PDF, BN, NSM? */
#define BIDI_IS_EXPLICIT_OR_BN_OR_NSM(p) \
((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \
(((p) & BIDI_TYPE_MASK) == BIDI_MASK_NSM) || \
((p) & BIDI_MASK_BN))
/* Override status of an explicit mark:
* LRO,LRE->LTR, RLO,RLE->RTL, otherwise->ON. */
#define BIDI_EXPLICIT_TO_OVERRIDE_DIR(p) \
(BIDI_IS_OVERRIDE(p) ? BIDI_LEVEL_TO_DIR(BIDI_DIR_TO_LEVEL(p)) \
: BIDI_TYPE_ON)
/* Change numbers to RTL: EN,AN -> RTL. */
#define BIDI_CHANGE_NUMBER_TO_RTL(p) \
(BIDI_IS_NUMBER(p) ? BIDI_TYPE_RTL : (p))
#define BIDI_BRACKET_OPEN_MASK 0x80000000
#define BIDI_BRACKET_ID_MASK 0x7fffffff
#define BIDI_IS_BRACKET_OPEN(bt) ((bt & BIDI_BRACKET_OPEN_MASK)>0)
#define BIDI_BRACKET_ID(bt) ((bt & BIDI_BRACKET_ID_MASK))
/**
* \fn Uint16 GUIAPI GetACharBIDIType (LOGFONT* logfont, Achar32 chv)

View File

@@ -2,7 +2,7 @@ SUBDIRS = in-core
noinst_LTLIBRARIES = libfont.la
SRC_FILES = charset.c charset-arabic.c charset-bidi.c \
SRC_FILES = charset.c charset-arabic.c general-bidi.c \
unicode-comp.c unicode-script.c language-code.c \
sysfont.c logfont.c devfont.c fontname.c \
rawbitmap.c varbitmap.c qpf.c upf.c \
@@ -16,7 +16,7 @@ SRC_FILES = charset.c charset-arabic.c charset-bidi.c \
HDR_FILES = charset.h rawbitmap.h varbitmap.h freetype2.h qpf.h se_minigui.h \
upf.h bitmapfont.h unicode-bidi-tables.h \
unicode-tables.h unicode-break-tables.h unicode-script-table.h \
unicode-decomp.h unicode-comp.h \
unicode-decomp.h unicode-comp.h general-bidi.h \
unicode-emoji-tables.h unicode-arabic-shaping-tables.h
EXTRA_DIST = convgbmap.c jisunimap.c $(SRC_FILES) $(HDR_FILES) \

View File

@@ -47,6 +47,8 @@
#include "gdi.h"
#include "devfont.h"
#define BIDI_DEBUG
#include "bidi.h"
#define BIDI_MAX(a,b) ((a) > (b) ? (a) : (b))
@@ -65,30 +67,70 @@ struct _TYPERUN
int pos, len; /* run start position, run len.*/
Uint16 type; /* char type. */
BYTE level; /* embedding level. */
Sint16 level; /* embedding level. */
};
#ifdef BIDI_DEBUG
static const char bidi_level[] = {
/* -1 == BIDI_TYPE_SENTINEL, indicating start or end of string. */
'$',
/* 0-61 == 0-9,a-z,A-Z are the the only valid levels before resolving
* implicits. after that the level @ may be appear too.
*/
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D',
'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z',
/* TBD - insert another 125-64 levels */
/* 62 == only must appear after resolving implicits. */
'@',
/* 63 == BIDI_LEVEL_INVALID, internal error, this level shouldn't be seen.*/
'!',
/* >= 64 == overflows, this levels and higher levels show a real bug!. */
'*', '*', '*', '*', '*'
};
static char bidi_type_name(Uint16 c)
{
switch (c){
case BIDI_TYPE_LTR: return 'L';
case BIDI_TYPE_RTL: return 'R';
case BIDI_TYPE_AL: return 'A';
case BIDI_TYPE_LRE: return '<';
case BIDI_TYPE_RLE: return '>';
case BIDI_TYPE_LRO: return '(';
case BIDI_TYPE_RLO: return ')';
case BIDI_TYPE_PDF: return 'P';
case BIDI_TYPE_EN: return '1';
case BIDI_TYPE_AN: return '9';
case BIDI_TYPE_ES: return 'w';
case BIDI_TYPE_ET: return 'w';
case BIDI_TYPE_CS: return 'w';
case BIDI_TYPE_ES: return 's';
case BIDI_TYPE_ET: return 't';
case BIDI_TYPE_CS: return 'c';
case BIDI_TYPE_NSM: return '`';
case BIDI_TYPE_BN: return 'b';
case BIDI_TYPE_BS: return 'B';
case BIDI_TYPE_SS: return 'S';
case BIDI_TYPE_WS: return '_';
case BIDI_TYPE_ON: return 'n';
case BIDI_TYPE_LRI: return '[';
case BIDI_TYPE_RLI: return ']';
case BIDI_TYPE_FSI: return '@';
case BIDI_TYPE_PDI: return '#';
case BIDI_TYPE_SOT: return '^';
case BIDI_TYPE_EOT: return '$';
}
return 'n';
fprintf(stderr, "Unknown bidi type: %02x\n", c);
return '*';
}
static void print_resolved_levels(TYPERUN *pp)
@@ -97,7 +139,7 @@ static void print_resolved_levels(TYPERUN *pp)
while(pp){
int i;
for(i = 0; i < LEN (pp); i++)
fprintf(stderr, "%c", bidi_level[(int)LEVEL(pp)]);
fprintf(stderr, "%c", bidi_level[(int)LEVEL(pp) + 1]);
pp = pp->next;
}
fprintf(stderr, "\n");
@@ -124,7 +166,7 @@ static void print_run_types(TYPERUN *pp)
if(pp->level == -1){
sprintf(level_str, "%s", "-1");
}
else sprintf(level_str, "%c", bidi_level[(int)pp->level]);
else sprintf(level_str, "%c(%d)", bidi_level[(int)pp->level + 1], (int)pp->level);
fprintf(stderr, "pos:%d:len:%d(%c)[level:%s] || ", pp->pos, pp->len,
bidi_type_name(pp->type), level_str);
@@ -146,10 +188,9 @@ static void print_hexstr(Achar32* str, int len, BOOL reorder_state)
DBGLOG(" ");
for(m = 0; m < len; m++){
if(m && !(m%16)) DBGLOG("\n ");
DBGLOG2("0x%02x ", (unsigned char)str[m]);
DBGLOG2("0x%04X ", REAL_ACHAR(str[m]));
}
DBGLOG("\n====================================================\n");
}
#else /* BIDI_DEBUG */
@@ -255,7 +296,7 @@ static TYPERUN* get_runtype_link (const CHARSETOPS* charset_ops, Achar32* achars
static void bidi_resolveParagraphs(TYPERUN **ptype_rl_list, Uint32* pbase_dir, BYTE* pbase_level)
{
TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL;
DBGLOG("\n1:Finding the base level\n");
DBGLOG("\n1.Finding the base level\n");
*pbase_dir = BIDI_TYPE_ON;
for (pp = type_rl_list; pp; pp = pp->next){
@@ -267,9 +308,9 @@ static void bidi_resolveParagraphs(TYPERUN **ptype_rl_list, Uint32* pbase_dir, B
}
*pbase_dir = BIDI_LEVEL_TO_DIR (*pbase_level);
DBGLOG2(" Base level: %c\n", bidi_level[(int)*pbase_level]);
DBGLOG2(" Base level: %c\n", bidi_level[(int)*pbase_level + 1]);
DBGLOG2(" Base dir: %c\n", bidi_type_name(*pbase_dir));
DBGLOG("Finding the base level, Done\n");
DBGLOG(" Finding the base level, Done\n");
}
/* 2.Resolving Explicit levels.
@@ -277,12 +318,18 @@ static void bidi_resolveParagraphs(TYPERUN **ptype_rl_list, Uint32* pbase_dir, B
static void bidi_resolveExplicit (TYPERUN **ptype_rl_list, Uint32 base_dir)
{
TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL;
DBGLOG("\n2:Resolving weak types\n");
DBGLOG("\n2.Resolving explicit types\n");
for (pp = type_rl_list->next; pp->next; pp = pp->next)
{
LEVEL(pp) = BIDI_DIR_TO_LEVEL(base_dir);
}
#ifdef BIDI_DEBUG
print_run_types(type_rl_list);
print_resolved_levels(type_rl_list);
print_resolved_types(type_rl_list);
#endif
return;
}
@@ -293,7 +340,7 @@ static void bidi_resolveWeak(TYPERUN **ptype_rl_list, Uint32 base_dir)
Uint32 last_strong, prev_type_org;
BOOL w4;
DBGLOG("\n3:Resolving weak types\n");
DBGLOG("\n3.Resolving weak types\n");
last_strong = base_dir;
for (pp = type_rl_list->next; pp->next; pp = pp->next)
@@ -403,7 +450,6 @@ static void bidi_resolveWeak(TYPERUN **ptype_rl_list, Uint32 base_dir)
print_resolved_levels (type_rl_list);
print_resolved_types (type_rl_list);
#endif
}
/* Return the embedding direction of a link. */
@@ -413,7 +459,9 @@ static void bidi_resolveWeak(TYPERUN **ptype_rl_list, Uint32 base_dir)
static void bidi_resolveNeutrals(TYPERUN **ptype_rl_list, Uint32 base_bir)
{
TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL;
DBGLOG ("\n4:Resolving neutral types\n");
DBGLOG ("\n4.Resolving neutral types\n");
for (pp = type_rl_list->next; pp->next; pp = pp->next)
{
Uint32 prev_type, this_type, next_type;
@@ -444,7 +492,8 @@ static int bidi_resolveImplicit(TYPERUN **ptype_rl_list, int base_level)
{
TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL;
int max_level = base_level;
DBGLOG ("\n5:Resolving implicit levels\n");
DBGLOG ("\n5.Resolving implicit levels\n");
for (pp = type_rl_list->next; pp->next; pp = pp->next){
Uint32 this_type;
@@ -480,7 +529,7 @@ bidi_resolveMirrorChar (const CHARSETOPS* charset_ops, Achar32* achars, int len,
TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL;
/* L4. Mirror all characters that are in odd levels and have mirrors. */
DBGLOG ("6.Mirroring\n");
DBGLOG ("\n6.Mirroring\n");
for (pp = type_rl_list->next; pp->next; pp = pp->next) {
if (pp->level & 1)
{
@@ -514,8 +563,14 @@ static void bidi_resolve_string (const CHARSETOPS* charset_ops,
bidi_resolveParagraphs(&type_rl_list, &base_dir, &base_level);
}
else {
DBGLOG("\n1.Initializing the base level\n");
base_level = pel;
base_dir = BIDI_LEVEL_TO_DIR (base_level);
DBGLOG2(" Base level: %c\n", bidi_level[base_level + 1]);
DBGLOG2(" Base dir: %c\n", bidi_type_name(base_dir));
DBGLOG(" Initializing the base level, Done\n");
}
/* 2.Resolving Explicit levels.*/
@@ -561,7 +616,7 @@ static void bidi_reorder (REORDER_CONTEXT* context, int len,
TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL;
/* L2. Reorder. */
DBGLOG("\n6:Reordering\n");
DBGLOG("\n7.Reordering\n");
for(i = max_level; i > 0; i--){
for (pp = type_rl_list->next; pp->next; pp = pp->next){
@@ -585,7 +640,7 @@ static void bidi_reorder (REORDER_CONTEXT* context, int len,
}
}
}
DBGLOG("\nReordering, Done\n");
DBGLOG(" Reordering, Done\n");
}
static void bidi_reverse_chars (void* context, int len, int pos)

View File

@@ -3574,6 +3574,7 @@ static Uint16 unicode_bidi_char_type (Achar32 chv)
Achar32 chv_last = (Achar32)TABLESIZE (__mg_unicode_bidi_char_type_map);
Achar32 chv_mid;
chv = REAL_ACHAR(chv);
while (chv_last >= chv_first) {
chv_mid = (chv_first + chv_last)/2;

View File

@@ -46,8 +46,7 @@
extern "C" {
#endif /* __cplusplus */
//#define ARABIC_DEBUG
#ifdef ARABIC_DEBUG
#ifdef BIDI_DEBUG
#define DBGLOG(s) do { if (1) { fprintf(stderr, s); } } while (0)
#define DBGLOG2(s, t1) do { if (1) { fprintf(stderr, s, t1); } } while (0)
#define DBGLOG3(s, t1,t2) do { if (1) { fprintf(stderr, s, t1,t2); } } while (0)