diff --git a/src/font/Makefile.am b/src/font/Makefile.am index cd8fef05..3a27dd90 100644 --- a/src/font/Makefile.am +++ b/src/font/Makefile.am @@ -15,7 +15,8 @@ SRC_FILES = charset.c charset-arabic.c charset-bidi.c \ HDR_FILES = charset.h rawbitmap.h varbitmap.h freetype2.h qpf.h se_minigui.h \ upf.h bitmapfont.h unicode-bidi-tables.h \ - unicode-tables.h unicode-break.h unicode-decomp.h unicode-comp.h unicode-emoji-table.h + unicode-tables.h unicode-break.h unicode-decomp.h unicode-comp.h \ + unicode-emoji-tables.h unicode-arabic-shaping-tables.h EXTRA_DIST = convgbmap.c jisunimap.c $(SRC_FILES) $(HDR_FILES) \ makefile.ng makefile.msvc diff --git a/src/font/unicode-arabic-shapping-tables.h b/src/font/unicode-arabic-shapping-tables.h new file mode 100644 index 00000000..de4012b2 --- /dev/null +++ b/src/font/unicode-arabic-shapping-tables.h @@ -0,0 +1,433 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-arabic-table.py + * + * on ucd/ArabicShaping.txt ucd/UnicodeData.txt ucd/Blocks.txt files with these headers: + * + * # ArabicShaping-11.0.0.txt + * # Date: 2018-02-21, 14:50:00 GMT [KW, RP] + * # Blocks-11.0.0.txt + * # Date: 2017-10-16, 24:39:00 GMT [KW] + * UnicodeData.txt does not have a header. + */ + +#ifndef _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H +#define _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H + + +#define X JOINING_TYPE_X +#define R JOINING_TYPE_R +#define T JOINING_TYPE_T +#define U JOINING_TYPE_U +#define A JOINING_GROUP_ALAPH +#define DR JOINING_GROUP_DALATH_RISH +#define L JOINING_TYPE_L +#define C JOINING_TYPE_C +#define D JOINING_TYPE_D + +static const Uint8 joining_table[] = +{ + +#define joining_offset_0x0600u 0 + + /* Arabic */ + + /* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 0640 */ C,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D, + /* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D, + /* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X, + /* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D, + + /* Syriac */ + + /* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D, + /* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D, + + /* Arabic Supplement */ + + /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, + /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D, + + /* FILLER */ + + /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* NKo */ + + /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X, + + /* FILLER */ + + /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Mandaic */ + + /* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X, + + /* Syriac Supplement */ + + /* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Arabic Extended-A */ + + /* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,X,D,D,D,R,D,D,D,D,X,X, + /* 08C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 08E0 */ X,X,U, + +#define joining_offset_0x1806u 739 + + /* Mongolian */ + + /* 1800 */ U,D,X,X,C,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X, + /* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 18A0 */ D,D,D,D,D,D,D,D,D,X,D, + +#define joining_offset_0x200cu 904 + + /* General Punctuation */ + + /* 2000 */ U,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2060 */ X,X,X,X,X,X,U,U,U,U, + +#define joining_offset_0xa840u 998 + + /* Phags-pa */ + + /* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U, + +#define joining_offset_0x10ac0u 1050 + + /* Manichaean */ + + /* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D, + /* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R, + +#define joining_offset_0x10b80u 1098 + + /* Psalter Pahlavi */ + + /* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U, + +#define joining_offset_0x10d00u 1146 + + /* Hanifi Rohingya */ + + /* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 10D20 */ D,D,R,D, + +#define joining_offset_0x10f30u 1182 + + /* Sogdian */ + + /* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D, + /* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R, + +#define joining_offset_0x110bdu 1219 + + /* Kaithi */ + + /* 110A0 */ U,X,X, + /* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U, + +#define joining_offset_0x1e900u 1236 + + /* Adlam */ + + /* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1E940 */ D,D,D,D, + +}; /* Table items: 1304; occupancy: 56% */ + + +static inline BOOL uc_in_range(Uchar32 uc, Uchar32 lo, Uchar32 hi) +{ + /* The casts below are important as if T is smaller than int, + * the subtract results will become a signed int! */ + return (Uchar32)(u - lo) <= (Uchar32)(hi - lo); +} + + +static unsigned int +joining_type (Uchar32 u) +{ + switch (u >> 12) + { + case 0x0u: + if (uc_in_range(u, 0x0600u, 0x08E2u)) return joining_table[u - 0x0600u + joining_offset_0x0600u]; + break; + + case 0x1u: + if (uc_in_range(u, 0x1806u, 0x18AAu)) return joining_table[u - 0x1806u + joining_offset_0x1806u]; + break; + + case 0x2u: + if (uc_in_range(u, 0x200Cu, 0x2069u)) return joining_table[u - 0x200Cu + joining_offset_0x200cu]; + break; + + case 0xAu: + if (uc_in_range(u, 0xA840u, 0xA873u)) return joining_table[u - 0xA840u + joining_offset_0xa840u]; + break; + + case 0x10u: + if (uc_in_range(u, 0x10AC0u, 0x10AEFu)) return joining_table[u - 0x10AC0u + joining_offset_0x10ac0u]; + if (uc_in_range(u, 0x10B80u, 0x10BAFu)) return joining_table[u - 0x10B80u + joining_offset_0x10b80u]; + if (uc_in_range(u, 0x10D00u, 0x10D23u)) return joining_table[u - 0x10D00u + joining_offset_0x10d00u]; + if (uc_in_range(u, 0x10F30u, 0x10F54u)) return joining_table[u - 0x10F30u + joining_offset_0x10f30u]; + break; + + case 0x11u: + if (uc_in_range(u, 0x110BDu, 0x110CDu)) return joining_table[u - 0x110BDu + joining_offset_0x110bdu]; + break; + + case 0x1Eu: + if (uc_in_range(u, 0x1E900u, 0x1E943u)) return joining_table[u - 0x1E900u + joining_offset_0x1e900u]; + break; + + default: + break; + } + return X; +} + +#undef X +#undef R +#undef T +#undef U +#undef A +#undef DR +#undef L +#undef C +#undef D + + +static const Uint16 shaping_table[][4] = +{ + {0x0000u, 0x0000u, 0x0000u, 0xFE80u}, /* U+0621 ARABIC LETTER HAMZA ISOLATED FORM */ + {0x0000u, 0x0000u, 0xFE82u, 0xFE81u}, /* U+0622 ARABIC LETTER ALEF WITH MADDA ABOVE */ + {0x0000u, 0x0000u, 0xFE84u, 0xFE83u}, /* U+0623 ARABIC LETTER ALEF WITH HAMZA ABOVE */ + {0x0000u, 0x0000u, 0xFE86u, 0xFE85u}, /* U+0624 ARABIC LETTER WAW WITH HAMZA ABOVE */ + {0x0000u, 0x0000u, 0xFE88u, 0xFE87u}, /* U+0625 ARABIC LETTER ALEF WITH HAMZA BELOW */ + {0xFE8Bu, 0xFE8Cu, 0xFE8Au, 0xFE89u}, /* U+0626 ARABIC LETTER YEH WITH HAMZA ABOVE */ + {0x0000u, 0x0000u, 0xFE8Eu, 0xFE8Du}, /* U+0627 ARABIC LETTER ALEF */ + {0xFE91u, 0xFE92u, 0xFE90u, 0xFE8Fu}, /* U+0628 ARABIC LETTER BEH */ + {0x0000u, 0x0000u, 0xFE94u, 0xFE93u}, /* U+0629 ARABIC LETTER TEH MARBUTA */ + {0xFE97u, 0xFE98u, 0xFE96u, 0xFE95u}, /* U+062A ARABIC LETTER TEH */ + {0xFE9Bu, 0xFE9Cu, 0xFE9Au, 0xFE99u}, /* U+062B ARABIC LETTER THEH */ + {0xFE9Fu, 0xFEA0u, 0xFE9Eu, 0xFE9Du}, /* U+062C ARABIC LETTER JEEM */ + {0xFEA3u, 0xFEA4u, 0xFEA2u, 0xFEA1u}, /* U+062D ARABIC LETTER HAH */ + {0xFEA7u, 0xFEA8u, 0xFEA6u, 0xFEA5u}, /* U+062E ARABIC LETTER KHAH */ + {0x0000u, 0x0000u, 0xFEAAu, 0xFEA9u}, /* U+062F ARABIC LETTER DAL */ + {0x0000u, 0x0000u, 0xFEACu, 0xFEABu}, /* U+0630 ARABIC LETTER THAL */ + {0x0000u, 0x0000u, 0xFEAEu, 0xFEADu}, /* U+0631 ARABIC LETTER REH */ + {0x0000u, 0x0000u, 0xFEB0u, 0xFEAFu}, /* U+0632 ARABIC LETTER ZAIN */ + {0xFEB3u, 0xFEB4u, 0xFEB2u, 0xFEB1u}, /* U+0633 ARABIC LETTER SEEN */ + {0xFEB7u, 0xFEB8u, 0xFEB6u, 0xFEB5u}, /* U+0634 ARABIC LETTER SHEEN */ + {0xFEBBu, 0xFEBCu, 0xFEBAu, 0xFEB9u}, /* U+0635 ARABIC LETTER SAD */ + {0xFEBFu, 0xFEC0u, 0xFEBEu, 0xFEBDu}, /* U+0636 ARABIC LETTER DAD */ + {0xFEC3u, 0xFEC4u, 0xFEC2u, 0xFEC1u}, /* U+0637 ARABIC LETTER TAH */ + {0xFEC7u, 0xFEC8u, 0xFEC6u, 0xFEC5u}, /* U+0638 ARABIC LETTER ZAH */ + {0xFECBu, 0xFECCu, 0xFECAu, 0xFEC9u}, /* U+0639 ARABIC LETTER AIN */ + {0xFECFu, 0xFED0u, 0xFECEu, 0xFECDu}, /* U+063A ARABIC LETTER GHAIN */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0640 */ + {0xFED3u, 0xFED4u, 0xFED2u, 0xFED1u}, /* U+0641 ARABIC LETTER FEH */ + {0xFED7u, 0xFED8u, 0xFED6u, 0xFED5u}, /* U+0642 ARABIC LETTER QAF */ + {0xFEDBu, 0xFEDCu, 0xFEDAu, 0xFED9u}, /* U+0643 ARABIC LETTER KAF */ + {0xFEDFu, 0xFEE0u, 0xFEDEu, 0xFEDDu}, /* U+0644 ARABIC LETTER LAM */ + {0xFEE3u, 0xFEE4u, 0xFEE2u, 0xFEE1u}, /* U+0645 ARABIC LETTER MEEM */ + {0xFEE7u, 0xFEE8u, 0xFEE6u, 0xFEE5u}, /* U+0646 ARABIC LETTER NOON */ + {0xFEEBu, 0xFEECu, 0xFEEAu, 0xFEE9u}, /* U+0647 ARABIC LETTER HEH */ + {0x0000u, 0x0000u, 0xFEEEu, 0xFEEDu}, /* U+0648 ARABIC LETTER WAW */ + {0xFBE8u, 0xFBE9u, 0xFEF0u, 0xFEEFu}, /* U+0649 ARABIC LETTER */ + {0xFEF3u, 0xFEF4u, 0xFEF2u, 0xFEF1u}, /* U+064A ARABIC LETTER YEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0650 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0651 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0652 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0653 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0654 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0655 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0656 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0657 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0658 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0659 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0660 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0661 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0662 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0663 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0664 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0665 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0666 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0667 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0668 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0669 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0670 */ + {0x0000u, 0x0000u, 0xFB51u, 0xFB50u}, /* U+0671 ARABIC LETTER ALEF WASLA */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0672 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0673 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0674 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0675 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0676 */ + {0x0000u, 0x0000u, 0x0000u, 0xFBDDu}, /* U+0677 ARABIC LETTER U WITH HAMZA ABOVE ISOLATED FORM */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0678 */ + {0xFB68u, 0xFB69u, 0xFB67u, 0xFB66u}, /* U+0679 ARABIC LETTER TTEH */ + {0xFB60u, 0xFB61u, 0xFB5Fu, 0xFB5Eu}, /* U+067A ARABIC LETTER TTEHEH */ + {0xFB54u, 0xFB55u, 0xFB53u, 0xFB52u}, /* U+067B ARABIC LETTER BEEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067D */ + {0xFB58u, 0xFB59u, 0xFB57u, 0xFB56u}, /* U+067E ARABIC LETTER PEH */ + {0xFB64u, 0xFB65u, 0xFB63u, 0xFB62u}, /* U+067F ARABIC LETTER TEHEH */ + {0xFB5Cu, 0xFB5Du, 0xFB5Bu, 0xFB5Au}, /* U+0680 ARABIC LETTER BEHEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0681 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0682 */ + {0xFB78u, 0xFB79u, 0xFB77u, 0xFB76u}, /* U+0683 ARABIC LETTER NYEH */ + {0xFB74u, 0xFB75u, 0xFB73u, 0xFB72u}, /* U+0684 ARABIC LETTER DYEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0685 */ + {0xFB7Cu, 0xFB7Du, 0xFB7Bu, 0xFB7Au}, /* U+0686 ARABIC LETTER TCHEH */ + {0xFB80u, 0xFB81u, 0xFB7Fu, 0xFB7Eu}, /* U+0687 ARABIC LETTER TCHEHEH */ + {0x0000u, 0x0000u, 0xFB89u, 0xFB88u}, /* U+0688 ARABIC LETTER DDAL */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0689 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068B */ + {0x0000u, 0x0000u, 0xFB85u, 0xFB84u}, /* U+068C ARABIC LETTER DAHAL */ + {0x0000u, 0x0000u, 0xFB83u, 0xFB82u}, /* U+068D ARABIC LETTER DDAHAL */ + {0x0000u, 0x0000u, 0xFB87u, 0xFB86u}, /* U+068E ARABIC LETTER DUL */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0690 */ + {0x0000u, 0x0000u, 0xFB8Du, 0xFB8Cu}, /* U+0691 ARABIC LETTER RREH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0692 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0693 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0694 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0695 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0696 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0697 */ + {0x0000u, 0x0000u, 0xFB8Bu, 0xFB8Au}, /* U+0698 ARABIC LETTER JEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0699 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A0 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A1 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A2 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A3 */ + {0xFB6Cu, 0xFB6Du, 0xFB6Bu, 0xFB6Au}, /* U+06A4 ARABIC LETTER VEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A5 */ + {0xFB70u, 0xFB71u, 0xFB6Fu, 0xFB6Eu}, /* U+06A6 ARABIC LETTER PEHEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A7 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A8 */ + {0xFB90u, 0xFB91u, 0xFB8Fu, 0xFB8Eu}, /* U+06A9 ARABIC LETTER KEHEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AA */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AB */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AC */ + {0xFBD5u, 0xFBD6u, 0xFBD4u, 0xFBD3u}, /* U+06AD ARABIC LETTER NG */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AE */ + {0xFB94u, 0xFB95u, 0xFB93u, 0xFB92u}, /* U+06AF ARABIC LETTER GAF */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B0 */ + {0xFB9Cu, 0xFB9Du, 0xFB9Bu, 0xFB9Au}, /* U+06B1 ARABIC LETTER NGOEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B2 */ + {0xFB98u, 0xFB99u, 0xFB97u, 0xFB96u}, /* U+06B3 ARABIC LETTER GUEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B4 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B5 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B6 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B7 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B8 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B9 */ + {0x0000u, 0x0000u, 0xFB9Fu, 0xFB9Eu}, /* U+06BA ARABIC LETTER NOON GHUNNA */ + {0xFBA2u, 0xFBA3u, 0xFBA1u, 0xFBA0u}, /* U+06BB ARABIC LETTER RNOON */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BC */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BD */ + {0xFBACu, 0xFBADu, 0xFBABu, 0xFBAAu}, /* U+06BE ARABIC LETTER HEH DOACHASHMEE */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BF */ + {0x0000u, 0x0000u, 0xFBA5u, 0xFBA4u}, /* U+06C0 ARABIC LETTER HEH WITH YEH ABOVE */ + {0xFBA8u, 0xFBA9u, 0xFBA7u, 0xFBA6u}, /* U+06C1 ARABIC LETTER HEH GOAL */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C2 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C3 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C4 */ + {0x0000u, 0x0000u, 0xFBE1u, 0xFBE0u}, /* U+06C5 ARABIC LETTER KIRGHIZ OE */ + {0x0000u, 0x0000u, 0xFBDAu, 0xFBD9u}, /* U+06C6 ARABIC LETTER OE */ + {0x0000u, 0x0000u, 0xFBD8u, 0xFBD7u}, /* U+06C7 ARABIC LETTER U */ + {0x0000u, 0x0000u, 0xFBDCu, 0xFBDBu}, /* U+06C8 ARABIC LETTER YU */ + {0x0000u, 0x0000u, 0xFBE3u, 0xFBE2u}, /* U+06C9 ARABIC LETTER KIRGHIZ YU */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CA */ + {0x0000u, 0x0000u, 0xFBDFu, 0xFBDEu}, /* U+06CB ARABIC LETTER VE */ + {0xFBFEu, 0xFBFFu, 0xFBFDu, 0xFBFCu}, /* U+06CC ARABIC LETTER FARSI YEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CD */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CE */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CF */ + {0xFBE6u, 0xFBE7u, 0xFBE5u, 0xFBE4u}, /* U+06D0 ARABIC LETTER E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06D1 */ + {0x0000u, 0x0000u, 0xFBAFu, 0xFBAEu}, /* U+06D2 ARABIC LETTER YEH BARREE */ + {0x0000u, 0x0000u, 0xFBB1u, 0xFBB0u}, /* U+06D3 ARABIC LETTER YEH BARREE WITH HAMZA ABOVE */ +}; + +#define SHAPING_TABLE_FIRST 0x0621u +#define SHAPING_TABLE_LAST 0x06D3u + + +static const struct ligature_set_t { + Uint16 first; + struct ligature_pairs_t { + Uint16 second; + Uint16 ligature; + } ligatures[4]; +} ligature_table[] = +{ + { 0xFEDFu, { + { 0xFE88u, 0xFEF9u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM */ + { 0xFE82u, 0xFEF5u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM */ + { 0xFE8Eu, 0xFEFBu }, /* ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM */ + { 0xFE84u, 0xFEF7u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM */ + }}, + { 0xFEE0u, { + { 0xFE88u, 0xFEFAu }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM */ + { 0xFE82u, 0xFEF6u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM */ + { 0xFE8Eu, 0xFEFCu }, /* ARABIC LIGATURE LAM WITH ALEF FINAL FORM */ + { 0xFE84u, 0xFEF8u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM */ + }}, +}; + + +#endif /* _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H */ + +/* == End of generated table == */ diff --git a/src/font/unicode-emoji-table.h b/src/font/unicode-emoji-tables.h similarity index 97% rename from src/font/unicode-emoji-table.h rename to src/font/unicode-emoji-tables.h index 38a515de..9f2c71e3 100644 --- a/src/font/unicode-emoji-table.h +++ b/src/font/unicode-emoji-tables.h @@ -2,9 +2,9 @@ /* * The following tables are generated by running: * - * ./gen-emoji-table.py emoji/emoji-data.txt + * ./gen-emoji-table.py * - * on file with this header: + * on emoji/emoji-data.txt file with this header: * * # emoji-data.txt * # Date: 2019-01-15, 12:10:05 GMT @@ -18,8 +18,8 @@ * # For documentation and usage, see http://www.unicode.org/reports/tr51 */ -#ifndef UNICODE_EMOJI_TABLE_H -#define UNICODE_EMOJI_TABLE_H +#ifndef _MGFONT_UNICODE_EMOJI_TABLES_H +#define _MGFONT_UNICODE_EMOJI_TABLES_H struct Interval { Uchar32 start, end; @@ -391,6 +391,6 @@ static const struct Interval _unicode_extended_pictographic_table[] = {0x1F947, 0x1FFFD}, }; -#endif /* UNICODE_EMOJI_TABLE_H */ +#endif /* _MGFONT_UNICODE_EMOJI_TABLES_H */ /* == End of generated table == */ diff --git a/src/font/unicode-emoji.c b/src/font/unicode-emoji.c index 08682d42..b747fd30 100644 --- a/src/font/unicode-emoji.c +++ b/src/font/unicode-emoji.c @@ -53,7 +53,7 @@ #ifdef _MGCHARSET_UNICODE #include "unicode-ops.h" -#include "unicode-emoji-table.h" +#include "unicode-emoji-tables.h" static int interval_compare(const void *key, const void *elt) { diff --git a/src/font/utils/gen-arabic-table.py b/src/font/utils/gen-arabic-table.py new file mode 100755 index 00000000..62de9876 --- /dev/null +++ b/src/font/utils/gen-arabic-table.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python + +# Copied from HarfBuzz +# https://github.com/harfbuzz/harfbuzz +# On 2019-03-07 +# +# Revised by Vincent Wei for MiniGUI 3.4 + +from __future__ import print_function, division, absolute_import + +import io, os.path, sys + +#if len (sys.argv) != 4: +# print ("usage: ./gen-arabic-table.py", file=sys.stderr) +# sys.exit (1) + +filenames = ['ucd/ArabicShaping.txt', 'ucd/UnicodeData.txt', 'ucd/Blocks.txt'] + +files = [io.open (x, encoding='utf-8') for x in filenames] + +headers = [[files[0].readline (), files[0].readline ()], [files[2].readline (), files[2].readline ()]] +headers.append (["UnicodeData.txt does not have a header."]) +while files[0].readline ().find ('##################') < 0: + pass + +blocks = {} +def read_blocks(f): + global blocks + for line in f: + + j = line.find ('#') + if j >= 0: + line = line[:j] + + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + + uu = fields[0].split ('..') + start = int (uu[0], 16) + if len (uu) == 1: + end = start + else: + end = int (uu[1], 16) + + t = fields[1] + + for u in range (start, end + 1): + blocks[u] = t + +def print_joining_table(f): + + values = {} + for line in f: + + if line[0] == '#': + continue + + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + + u = int (fields[0], 16) + + if fields[3] in ["ALAPH", "DALATH RISH"]: + value = "JOINING_GROUP_" + fields[3].replace(' ', '_') + else: + value = "JOINING_TYPE_" + fields[2] + values[u] = value + + short_value = {} + for value in set([v for v in values.values()] + ['JOINING_TYPE_X']): + short = ''.join(x[0] for x in value.split('_')[2:]) + assert short not in short_value.values() + short_value[value] = short + + print () + for value,short in short_value.items(): + print ("#define %s %s" % (short, value)) + + uu = sorted(values.keys()) + num = len(values) + all_blocks = set([blocks[u] for u in uu]) + + last = -100000 + ranges = [] + for u in uu: + if u - last <= 1+16*5: + ranges[-1][-1] = u + else: + ranges.append([u,u]) + last = u + + print () + print ("static const Uint8 joining_table[] =") + print ("{") + last_block = None + offset = 0 + for start,end in ranges: + + print () + print ("#define joining_offset_0x%04xu %d" % (start, offset)) + + for u in range(start, end+1): + + block = blocks.get(u, last_block) + value = values.get(u, "JOINING_TYPE_X") + + if block != last_block or u == start: + if u != start: + print () + if block in all_blocks: + print ("\n /* %s */" % block) + else: + print ("\n /* FILLER */") + last_block = block + if u % 32 != 0: + print () + print (" /* %04X */" % (u//32*32), " " * (u % 32), end="") + + if u % 32 == 0: + print () + print (" /* %04X */ " % u, end="") + print ("%s," % short_value[value], end="") + print () + + offset += end - start + 1 + print () + occupancy = num * 100. / offset + print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)) + print () + + print ("") + print ("static inline BOOL uc_in_range(Uchar32 uc, Uchar32 lo, Uchar32 hi)") + print ("{") + print (" /* The casts below are important as if T is smaller than int,") + print (" * the subtract results will become a signed int! */") + print (" return (Uchar32)(u - lo) <= (Uchar32)(hi - lo);") + print ("}") + print ("") + + page_bits = 12; + print () + print ("static unsigned int") + print ("joining_type (Uchar32 u)") + print ("{") + print (" switch (u >> %d)" % page_bits) + print (" {") + pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]]) + for p in sorted(pages): + print (" case 0x%0Xu:" % p) + for (start,end) in ranges: + if p not in [start>>page_bits, end>>page_bits]: continue + offset = "joining_offset_0x%04xu" % start + print (" if (uc_in_range(u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start, end, start, offset)) + print (" break;") + print ("") + print (" default:") + print (" break;") + print (" }") + print (" return X;") + print ("}") + print () + for value,short in short_value.items(): + print ("#undef %s" % (short)) + print () + +def print_shaping_table(f): + + shapes = {} + ligatures = {} + names = {} + for line in f: + + fields = [x.strip () for x in line.split (';')] + if fields[5][0:1] != '<': + continue + + items = fields[5].split (' ') + shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:]) + + if not shape in ['initial', 'medial', 'isolated', 'final']: + continue + + c = int (fields[0], 16) + if len (items) != 1: + # We only care about lam-alef ligatures + if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]: + continue + + # Save ligature + names[c] = fields[1] + if items not in ligatures: + ligatures[items] = {} + ligatures[items][shape] = c + pass + else: + # Save shape + if items[0] not in names: + names[items[0]] = fields[1] + else: + names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip () + if items[0] not in shapes: + shapes[items[0]] = {} + shapes[items[0]][shape] = c + + print () + print ("static const Uint16 shaping_table[][4] =") + print ("{") + + keys = shapes.keys () + min_u, max_u = min (keys), max (keys) + for u in range (min_u, max_u + 1): + s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0 + for shape in ['initial', 'medial', 'final', 'isolated']] + value = ', '.join ("0x%04Xu" % c for c in s) + print (" {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else "")) + + print ("};") + print () + print ("#define SHAPING_TABLE_FIRST 0x%04Xu" % min_u) + print ("#define SHAPING_TABLE_LAST 0x%04Xu" % max_u) + print () + + ligas = {} + for pair in ligatures.keys (): + for shape in ligatures[pair]: + c = ligatures[pair][shape] + if shape == 'isolated': + liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final']) + elif shape == 'final': + liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final']) + else: + raise Exception ("Unexpected shape", shape) + if liga[0] not in ligas: + ligas[liga[0]] = [] + ligas[liga[0]].append ((liga[1], c)) + max_i = max (len (ligas[l]) for l in ligas) + print () + print ("static const struct ligature_set_t {") + print (" Uint16 first;") + print (" struct ligature_pairs_t {") + print (" Uint16 second;") + print (" Uint16 ligature;") + print (" } ligatures[%d];" % max_i) + print ("} ligature_table[] =") + print ("{") + for first in sorted (ligas.keys ()): + + print (" { 0x%04Xu, {" % (first)) + for liga in ligas[first]: + print (" { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga[0], liga[1], names[liga[1]])) + print (" }},") + + print ("};") + print () + + + +print ("/* == Start of generated table == */") +print ("/*") +print (" * The following table is generated by running:") +print (" *") +print (" * ./gen-arabic-table.py") +print (" *") +print (" * on ucd/ArabicShaping.txt ucd/UnicodeData.txt ucd/Blocks.txt files with these headers:") +print (" *") +for h in headers: + for l in h: + print (" * %s" % (l.strip())) +print (" */") +print () +print ("#ifndef _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H") +print ("#define _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H") +print () + +read_blocks (files[2]) +print_joining_table (files[0]) +print_shaping_table (files[1]) + +print () +print ("#endif /* _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H */") +print () +print ("/* == End of generated table == */") diff --git a/src/font/utils/gen-emoji-table.py b/src/font/utils/gen-emoji-table.py index 00c6ed6f..637d0cb6 100755 --- a/src/font/utils/gen-emoji-table.py +++ b/src/font/utils/gen-emoji-table.py @@ -11,11 +11,11 @@ import sys import os.path from collections import OrderedDict -if len (sys.argv) != 2: - print("usage: ./gen-emoji-table.py emoji/emoji-data.txt", file=sys.stderr) - sys.exit (1) +#if len (sys.argv) != 2: +# print("usage: ./gen-emoji-table.py emoji/emoji-data.txt", file=sys.stderr) +# sys.exit (1) -f = open(sys.argv[1]) +f = open('emoji/emoji-data.txt') header = [f.readline () for _ in range(10)] ranges = OrderedDict() @@ -42,16 +42,16 @@ print("/* == Start of generated table == */") print("/*") print(" * The following tables are generated by running:") print(" *") -print(" * ./gen-emoji-table.py emoji/emoji-data.txt") +print(" * ./gen-emoji-table.py") print(" *") -print(" * on file with this header:") +print(" * on emoji/emoji-data.txt file with this header:") print(" *") for l in header: print(" * %s" % (l.strip())) print(" */") print() -print("#ifndef UNICODE_EMOJI_TABLE_H") -print("#define UNICODE_EMOJI_TABLE_H") +print("#ifndef _MGFONT_UNICODE_EMOJI_TABLES_H") +print("#define _MGFONT_UNICODE_EMOJI_TABLES_H") print() print("struct Interval {\n Uchar32 start, end;\n};") @@ -69,6 +69,6 @@ for typ,s in ranges.items(): print("};") print() -print("#endif /* UNICODE_EMOJI_TABLE_H */") +print("#endif /* _MGFONT_UNICODE_EMOJI_TABLES_H */") print() print("/* == End of generated table == */")