diff --git a/include/gdi.h b/include/gdi.h index 4179296a..869f1262 100644 --- a/include/gdi.h +++ b/include/gdi.h @@ -9724,13 +9724,29 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv); #define BIDI_MASK_SS 0x5000 /** - * \defgroup glyph_bidi_types Glyph BIDI types + * \defgroup bidi_types BIDI types * - * Values for BIDI glyph type. + * Values for BIDI types. * * @{ */ +typedef Uint16 BidiType; +typedef Uint8 BidiLevel; + +#define BIDI_PGDIR_LTR 0 +#define BIDI_PGDIR_RTL 1 +#define BIDI_PGDIR_ON 2 + +#define BIDI_FLAG_SHAPE_MIRRORING 0x00000001 +#define BIDI_FLAG_REORDER_NSM 0x00000002 +#define BIDI_FLAG_SHAPE_ARAB_PRES 0x00000100 +#define BIDI_FLAG_SHAPE_ARAB_LIGA 0x00000200 +#define BIDI_FLAG_SHAPE_ARAB_CONSOLE 0x00000400 + +#define BIDI_FLAG_REMOVE_BIDI 0x00010000 +#define BIDI_FLAG_REMOVE_JOINING 0x00020000 +#define BIDI_FLAG_REMOVE_SPECIALS 0x00040000 /** * \def BIDI_TYPE_LTR * \brief Strong left to right @@ -9868,31 +9884,36 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv); */ #define BIDI_TYPE_PDI (BIDI_MASK_NEUTRAL | BIDI_MASK_WEAK | BIDI_MASK_ISOLATE) - /** @} end of glyph_bidi_types */ + /** @} end of bidi_types */ -/* The following are only used internally */ +#define BIDI_TYPE_SENTINEL (BIDI_MASK_SENTINEL) -/* Start of text */ -#define BIDI_TYPE_SOT (BIDI_MASK_SENTINEL) -/* End of text */ -#define BIDI_TYPE_EOT (BIDI_MASK_SENTINEL + BIDI_MASK_RTL) +/* Weak Left-To-Right */ +#define BIDI_TYPE_WLTR (BIDI_MASK_WEAK) +/* Weak Right-To-Left */ +#define BIDI_TYPE_WRTL (BIDI_MASK_WEAK | BIDI_MASK_RTL) /* Is private-use value? */ -#define BIDI_TYPE_PRIVATE(p) ((p) < 0) - -/* Return the direction of the level number, BIDI_TYPE_LTR for even and - BIDI_TYPE_RTL for odds. */ -#define BIDI_LEVEL_TO_DIR(lev) (BIDI_TYPE_LTR | (lev & 1)) - -/* Return the minimum level of the direction, 0 for BIDI_TYPE_LTR and - 1 for BIDI_TYPE_RTL and BIDI_TYPE_AL. */ -#define BIDI_DIR_TO_LEVEL(dir) ((BYTE)(dir & 1)) +//#define BIDI_TYPE_PRIVATE(p) ((p) < 0) /* Is right to left? */ #define BIDI_IS_RTL(p) ((p) & BIDI_MASK_RTL) /* Is arabic? */ #define BIDI_IS_ARABIC(p) ((p) & BIDI_MASK_ARABIC) +/* Is right-to-left level? */ +#define BIDI_LEVEL_IS_RTL(lev) ((lev) & 1) + +/* Return the bidi type corresponding to the direction of the level number, + FRIBIDI_TYPE_LTR for evens and FRIBIDI_TYPE_RTL for odds. */ +#define BIDI_LEVEL_TO_DIR(lev) \ + (BIDI_LEVEL_IS_RTL(lev) ? BIDI_TYPE_RTL : BIDI_TYPE_LTR) + +/* Return the minimum level of the direction, 0 for FRIBIDI_TYPE_LTR and + 1 for FRIBIDI_TYPE_RTL and FRIBIDI_TYPE_AL. */ +#define BIDI_DIR_TO_LEVEL(dir) \ + ((BidiLevel) (BIDI_IS_RTL(dir) ? 1 : 0)) + /* Is strong? */ #define BIDI_IS_STRONG(p) (!((p) & BIDI_SWN_MASK)) /* Is weak? */ @@ -9921,7 +9942,8 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv); /* Is explicit override: LRO, RLO? */ #define BIDI_IS_OVERRIDE(p) ((p) & BIDI_MASK_OVERRIDE) -/* Some more: */ +/* Is isolote: LRO, RLO? */ +#define BIDI_IS_ISOLATE(p) (((p) & BIDI_TYPE_MASK) == BIDI_MASK_ISOLATE) /* Is left to right letter: LTR? */ #define BIDI_IS_LTR_LETTER(p) \ @@ -9939,7 +9961,43 @@ MG_EXPORT Uint32 GUIAPI GetACharType (LOGFONT* logfont, Achar32 chv); /* Change numbers: EN, AN to RTL. */ #define BIDI_NUMBER_TO_RTL(p) \ - (BIDI_IS_NUMBER(p) ? BIDI_TYPE_RTL : (p)) + (BIDI_IS_NUMBER(p) ? BIDI_TYPE_RTL : (p)) + +/* Is explicit or BN: LRE, RLE, LRO, RLO, PDF, BN? */ +#define BIDI_IS_EXPLICIT_OR_BN(p) \ + ((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \ + ((p) & (BIDI_MASK_BN))) + +/* Is explicit or BN or WS: LRE, RLE, LRO, RLO, PDF, BN, WS? */ +#define BIDI_IS_EXPLICIT_OR_BN_OR_WS(p) \ + ((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \ + ((p) & (BIDI_MASK_BN | BIDI_MASK_WS))) + +/* Is explicit or separator or BN or WS: LRE, RLE, LRO, RLO, PDF, BS, SS, BN, WS? */ +#define BIDI_IS_EXPLICIT_OR_SEPARATOR_OR_BN_OR_WS(p) \ + ((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \ + ((p) & (BIDI_MASK_SEPARATOR | BIDI_MASK_BN | BIDI_MASK_WS))) + +/* Is explicit or BN or NSM: LRE, RLE, LRO, RLO, PDF, BN, NSM? */ +#define BIDI_IS_EXPLICIT_OR_BN_OR_NSM(p) \ + ((((p) & BIDI_TYPE_MASK) == BIDI_MASK_EXPLICIT) || \ + (((p) & BIDI_TYPE_MASK) == BIDI_MASK_NSM) || \ + ((p) & BIDI_MASK_BN)) + +/* Override status of an explicit mark: + * LRO,LRE->LTR, RLO,RLE->RTL, otherwise->ON. */ +#define BIDI_EXPLICIT_TO_OVERRIDE_DIR(p) \ + (BIDI_IS_OVERRIDE(p) ? BIDI_LEVEL_TO_DIR(BIDI_DIR_TO_LEVEL(p)) \ + : BIDI_TYPE_ON) + +/* Change numbers to RTL: EN,AN -> RTL. */ +#define BIDI_CHANGE_NUMBER_TO_RTL(p) \ + (BIDI_IS_NUMBER(p) ? BIDI_TYPE_RTL : (p)) + +#define BIDI_BRACKET_OPEN_MASK 0x80000000 +#define BIDI_BRACKET_ID_MASK 0x7fffffff +#define BIDI_IS_BRACKET_OPEN(bt) ((bt & BIDI_BRACKET_OPEN_MASK)>0) +#define BIDI_BRACKET_ID(bt) ((bt & BIDI_BRACKET_ID_MASK)) /** * \fn Uint16 GUIAPI GetACharBIDIType (LOGFONT* logfont, Achar32 chv) diff --git a/src/font/Makefile.am b/src/font/Makefile.am index 7a74343c..78797897 100644 --- a/src/font/Makefile.am +++ b/src/font/Makefile.am @@ -2,7 +2,7 @@ SUBDIRS = in-core noinst_LTLIBRARIES = libfont.la -SRC_FILES = charset.c charset-arabic.c charset-bidi.c \ +SRC_FILES = charset.c charset-arabic.c general-bidi.c \ unicode-comp.c unicode-script.c language-code.c \ sysfont.c logfont.c devfont.c fontname.c \ rawbitmap.c varbitmap.c qpf.c upf.c \ @@ -16,7 +16,7 @@ SRC_FILES = charset.c charset-arabic.c charset-bidi.c \ HDR_FILES = charset.h rawbitmap.h varbitmap.h freetype2.h qpf.h se_minigui.h \ upf.h bitmapfont.h unicode-bidi-tables.h \ unicode-tables.h unicode-break-tables.h unicode-script-table.h \ - unicode-decomp.h unicode-comp.h \ + unicode-decomp.h unicode-comp.h general-bidi.h \ unicode-emoji-tables.h unicode-arabic-shaping-tables.h EXTRA_DIST = convgbmap.c jisunimap.c $(SRC_FILES) $(HDR_FILES) \ diff --git a/src/font/charset-bidi.c b/src/font/charset-bidi.c index 1f8bed43..df709283 100644 --- a/src/font/charset-bidi.c +++ b/src/font/charset-bidi.c @@ -47,6 +47,8 @@ #include "gdi.h" #include "devfont.h" + +#define BIDI_DEBUG #include "bidi.h" #define BIDI_MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -65,30 +67,70 @@ struct _TYPERUN int pos, len; /* run start position, run len.*/ Uint16 type; /* char type. */ - BYTE level; /* embedding level. */ + Sint16 level; /* embedding level. */ }; #ifdef BIDI_DEBUG +static const char bidi_level[] = { + /* -1 == BIDI_TYPE_SENTINEL, indicating start or end of string. */ + '$', + /* 0-61 == 0-9,a-z,A-Z are the the only valid levels before resolving + * implicits. after that the level @ may be appear too. + */ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', + 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', + 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', + + /* TBD - insert another 125-64 levels */ + + /* 62 == only must appear after resolving implicits. */ + '@', + + /* 63 == BIDI_LEVEL_INVALID, internal error, this level shouldn't be seen.*/ + '!', + + /* >= 64 == overflows, this levels and higher levels show a real bug!. */ + '*', '*', '*', '*', '*' +}; + static char bidi_type_name(Uint16 c) { switch (c){ case BIDI_TYPE_LTR: return 'L'; case BIDI_TYPE_RTL: return 'R'; case BIDI_TYPE_AL: return 'A'; + case BIDI_TYPE_LRE: return '<'; + case BIDI_TYPE_RLE: return '>'; + case BIDI_TYPE_LRO: return '('; + case BIDI_TYPE_RLO: return ')'; + case BIDI_TYPE_PDF: return 'P'; case BIDI_TYPE_EN: return '1'; case BIDI_TYPE_AN: return '9'; - case BIDI_TYPE_ES: return 'w'; - case BIDI_TYPE_ET: return 'w'; - case BIDI_TYPE_CS: return 'w'; + case BIDI_TYPE_ES: return 's'; + case BIDI_TYPE_ET: return 't'; + case BIDI_TYPE_CS: return 'c'; case BIDI_TYPE_NSM: return '`'; case BIDI_TYPE_BN: return 'b'; case BIDI_TYPE_BS: return 'B'; case BIDI_TYPE_SS: return 'S'; case BIDI_TYPE_WS: return '_'; case BIDI_TYPE_ON: return 'n'; + case BIDI_TYPE_LRI: return '['; + case BIDI_TYPE_RLI: return ']'; + case BIDI_TYPE_FSI: return '@'; + case BIDI_TYPE_PDI: return '#'; + case BIDI_TYPE_SOT: return '^'; + case BIDI_TYPE_EOT: return '$'; } - return 'n'; + + fprintf(stderr, "Unknown bidi type: %02x\n", c); + + return '*'; } static void print_resolved_levels(TYPERUN *pp) @@ -97,7 +139,7 @@ static void print_resolved_levels(TYPERUN *pp) while(pp){ int i; for(i = 0; i < LEN (pp); i++) - fprintf(stderr, "%c", bidi_level[(int)LEVEL(pp)]); + fprintf(stderr, "%c", bidi_level[(int)LEVEL(pp) + 1]); pp = pp->next; } fprintf(stderr, "\n"); @@ -124,7 +166,7 @@ static void print_run_types(TYPERUN *pp) if(pp->level == -1){ sprintf(level_str, "%s", "-1"); } - else sprintf(level_str, "%c", bidi_level[(int)pp->level]); + else sprintf(level_str, "%c(%d)", bidi_level[(int)pp->level + 1], (int)pp->level); fprintf(stderr, "pos:%d:len:%d(%c)[level:%s] || ", pp->pos, pp->len, bidi_type_name(pp->type), level_str); @@ -146,10 +188,9 @@ static void print_hexstr(Achar32* str, int len, BOOL reorder_state) DBGLOG(" "); for(m = 0; m < len; m++){ if(m && !(m%16)) DBGLOG("\n "); - DBGLOG2("0x%02x ", (unsigned char)str[m]); + DBGLOG2("0x%04X ", REAL_ACHAR(str[m])); } DBGLOG("\n====================================================\n"); - } #else /* BIDI_DEBUG */ @@ -255,7 +296,7 @@ static TYPERUN* get_runtype_link (const CHARSETOPS* charset_ops, Achar32* achars static void bidi_resolveParagraphs(TYPERUN **ptype_rl_list, Uint32* pbase_dir, BYTE* pbase_level) { TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL; - DBGLOG("\n1:Finding the base level\n"); + DBGLOG("\n1.Finding the base level\n"); *pbase_dir = BIDI_TYPE_ON; for (pp = type_rl_list; pp; pp = pp->next){ @@ -267,9 +308,9 @@ static void bidi_resolveParagraphs(TYPERUN **ptype_rl_list, Uint32* pbase_dir, B } *pbase_dir = BIDI_LEVEL_TO_DIR (*pbase_level); - DBGLOG2(" Base level: %c\n", bidi_level[(int)*pbase_level]); + DBGLOG2(" Base level: %c\n", bidi_level[(int)*pbase_level + 1]); DBGLOG2(" Base dir: %c\n", bidi_type_name(*pbase_dir)); - DBGLOG("Finding the base level, Done\n"); + DBGLOG(" Finding the base level, Done\n"); } /* 2.Resolving Explicit levels. @@ -277,12 +318,18 @@ static void bidi_resolveParagraphs(TYPERUN **ptype_rl_list, Uint32* pbase_dir, B static void bidi_resolveExplicit (TYPERUN **ptype_rl_list, Uint32 base_dir) { TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL; - DBGLOG("\n2:Resolving weak types\n"); + DBGLOG("\n2.Resolving explicit types\n"); for (pp = type_rl_list->next; pp->next; pp = pp->next) { LEVEL(pp) = BIDI_DIR_TO_LEVEL(base_dir); } + +#ifdef BIDI_DEBUG + print_run_types(type_rl_list); + print_resolved_levels(type_rl_list); + print_resolved_types(type_rl_list); +#endif return; } @@ -293,7 +340,7 @@ static void bidi_resolveWeak(TYPERUN **ptype_rl_list, Uint32 base_dir) Uint32 last_strong, prev_type_org; BOOL w4; - DBGLOG("\n3:Resolving weak types\n"); + DBGLOG("\n3.Resolving weak types\n"); last_strong = base_dir; for (pp = type_rl_list->next; pp->next; pp = pp->next) @@ -403,7 +450,6 @@ static void bidi_resolveWeak(TYPERUN **ptype_rl_list, Uint32 base_dir) print_resolved_levels (type_rl_list); print_resolved_types (type_rl_list); #endif - } /* Return the embedding direction of a link. */ @@ -413,7 +459,9 @@ static void bidi_resolveWeak(TYPERUN **ptype_rl_list, Uint32 base_dir) static void bidi_resolveNeutrals(TYPERUN **ptype_rl_list, Uint32 base_bir) { TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL; - DBGLOG ("\n4:Resolving neutral types\n"); + + DBGLOG ("\n4.Resolving neutral types\n"); + for (pp = type_rl_list->next; pp->next; pp = pp->next) { Uint32 prev_type, this_type, next_type; @@ -444,7 +492,8 @@ static int bidi_resolveImplicit(TYPERUN **ptype_rl_list, int base_level) { TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL; int max_level = base_level; - DBGLOG ("\n5:Resolving implicit levels\n"); + + DBGLOG ("\n5.Resolving implicit levels\n"); for (pp = type_rl_list->next; pp->next; pp = pp->next){ Uint32 this_type; @@ -480,7 +529,7 @@ bidi_resolveMirrorChar (const CHARSETOPS* charset_ops, Achar32* achars, int len, TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL; /* L4. Mirror all characters that are in odd levels and have mirrors. */ - DBGLOG ("6.Mirroring\n"); + DBGLOG ("\n6.Mirroring\n"); for (pp = type_rl_list->next; pp->next; pp = pp->next) { if (pp->level & 1) { @@ -514,8 +563,14 @@ static void bidi_resolve_string (const CHARSETOPS* charset_ops, bidi_resolveParagraphs(&type_rl_list, &base_dir, &base_level); } else { + DBGLOG("\n1.Initializing the base level\n"); + base_level = pel; base_dir = BIDI_LEVEL_TO_DIR (base_level); + + DBGLOG2(" Base level: %c\n", bidi_level[base_level + 1]); + DBGLOG2(" Base dir: %c\n", bidi_type_name(base_dir)); + DBGLOG(" Initializing the base level, Done\n"); } /* 2.Resolving Explicit levels.*/ @@ -561,7 +616,7 @@ static void bidi_reorder (REORDER_CONTEXT* context, int len, TYPERUN *type_rl_list = *ptype_rl_list, *pp = NULL; /* L2. Reorder. */ - DBGLOG("\n6:Reordering\n"); + DBGLOG("\n7.Reordering\n"); for(i = max_level; i > 0; i--){ for (pp = type_rl_list->next; pp->next; pp = pp->next){ @@ -585,7 +640,7 @@ static void bidi_reorder (REORDER_CONTEXT* context, int len, } } } - DBGLOG("\nReordering, Done\n"); + DBGLOG(" Reordering, Done\n"); } static void bidi_reverse_chars (void* context, int len, int pos) diff --git a/src/font/charset.c b/src/font/charset.c index ba8edf26..c85b3ce9 100644 --- a/src/font/charset.c +++ b/src/font/charset.c @@ -3574,6 +3574,7 @@ static Uint16 unicode_bidi_char_type (Achar32 chv) Achar32 chv_last = (Achar32)TABLESIZE (__mg_unicode_bidi_char_type_map); Achar32 chv_mid; + chv = REAL_ACHAR(chv); while (chv_last >= chv_first) { chv_mid = (chv_first + chv_last)/2; diff --git a/src/include/bidi.h b/src/include/bidi.h index 6f441f76..c8eb5d34 100644 --- a/src/include/bidi.h +++ b/src/include/bidi.h @@ -46,8 +46,7 @@ extern "C" { #endif /* __cplusplus */ -//#define ARABIC_DEBUG -#ifdef ARABIC_DEBUG +#ifdef BIDI_DEBUG #define DBGLOG(s) do { if (1) { fprintf(stderr, s); } } while (0) #define DBGLOG2(s, t1) do { if (1) { fprintf(stderr, s, t1); } } while (0) #define DBGLOG3(s, t1,t2) do { if (1) { fprintf(stderr, s, t1,t2); } } while (0)