Arabic shaping tables

This commit is contained in:
Vincent Wei
2019-03-07 10:20:41 +08:00
parent 10d8b43c61
commit 76cf8a02af
6 changed files with 734 additions and 16 deletions

View File

@@ -15,7 +15,8 @@ SRC_FILES = charset.c charset-arabic.c charset-bidi.c \
HDR_FILES = charset.h rawbitmap.h varbitmap.h freetype2.h qpf.h se_minigui.h \
upf.h bitmapfont.h unicode-bidi-tables.h \
unicode-tables.h unicode-break.h unicode-decomp.h unicode-comp.h unicode-emoji-table.h
unicode-tables.h unicode-break.h unicode-decomp.h unicode-comp.h \
unicode-emoji-tables.h unicode-arabic-shaping-tables.h
EXTRA_DIST = convgbmap.c jisunimap.c $(SRC_FILES) $(HDR_FILES) \
makefile.ng makefile.msvc

View File

@@ -0,0 +1,433 @@
/* == Start of generated table == */
/*
* The following table is generated by running:
*
* ./gen-arabic-table.py
*
* on ucd/ArabicShaping.txt ucd/UnicodeData.txt ucd/Blocks.txt files with these headers:
*
* # ArabicShaping-11.0.0.txt
* # Date: 2018-02-21, 14:50:00 GMT [KW, RP]
* # Blocks-11.0.0.txt
* # Date: 2017-10-16, 24:39:00 GMT [KW]
* UnicodeData.txt does not have a header.
*/
#ifndef _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H
#define _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H
#define X JOINING_TYPE_X
#define R JOINING_TYPE_R
#define T JOINING_TYPE_T
#define U JOINING_TYPE_U
#define A JOINING_GROUP_ALAPH
#define DR JOINING_GROUP_DALATH_RISH
#define L JOINING_TYPE_L
#define C JOINING_TYPE_C
#define D JOINING_TYPE_D
static const Uint8 joining_table[] =
{
#define joining_offset_0x0600u 0
/* Arabic */
/* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 0640 */ C,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D,
/* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D,
/* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X,
/* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D,
/* Syriac */
/* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D,
/* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D,
/* Arabic Supplement */
/* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D,
/* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D,
/* FILLER */
/* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* NKo */
/* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X,
/* FILLER */
/* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* Mandaic */
/* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X,
/* Syriac Supplement */
/* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* Arabic Extended-A */
/* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,X,D,D,D,R,D,D,D,D,X,X,
/* 08C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 08E0 */ X,X,U,
#define joining_offset_0x1806u 739
/* Mongolian */
/* 1800 */ U,D,X,X,C,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,
/* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 18A0 */ D,D,D,D,D,D,D,D,D,X,D,
#define joining_offset_0x200cu 904
/* General Punctuation */
/* 2000 */ U,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 2060 */ X,X,X,X,X,X,U,U,U,U,
#define joining_offset_0xa840u 998
/* Phags-pa */
/* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U,
#define joining_offset_0x10ac0u 1050
/* Manichaean */
/* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D,
/* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R,
#define joining_offset_0x10b80u 1098
/* Psalter Pahlavi */
/* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,
/* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U,
#define joining_offset_0x10d00u 1146
/* Hanifi Rohingya */
/* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 10D20 */ D,D,R,D,
#define joining_offset_0x10f30u 1182
/* Sogdian */
/* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D,
/* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R,
#define joining_offset_0x110bdu 1219
/* Kaithi */
/* 110A0 */ U,X,X,
/* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U,
#define joining_offset_0x1e900u 1236
/* Adlam */
/* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,
/* 1E940 */ D,D,D,D,
}; /* Table items: 1304; occupancy: 56% */
static inline BOOL uc_in_range(Uchar32 uc, Uchar32 lo, Uchar32 hi)
{
/* The casts below are important as if T is smaller than int,
* the subtract results will become a signed int! */
return (Uchar32)(u - lo) <= (Uchar32)(hi - lo);
}
static unsigned int
joining_type (Uchar32 u)
{
switch (u >> 12)
{
case 0x0u:
if (uc_in_range(u, 0x0600u, 0x08E2u)) return joining_table[u - 0x0600u + joining_offset_0x0600u];
break;
case 0x1u:
if (uc_in_range(u, 0x1806u, 0x18AAu)) return joining_table[u - 0x1806u + joining_offset_0x1806u];
break;
case 0x2u:
if (uc_in_range(u, 0x200Cu, 0x2069u)) return joining_table[u - 0x200Cu + joining_offset_0x200cu];
break;
case 0xAu:
if (uc_in_range(u, 0xA840u, 0xA873u)) return joining_table[u - 0xA840u + joining_offset_0xa840u];
break;
case 0x10u:
if (uc_in_range(u, 0x10AC0u, 0x10AEFu)) return joining_table[u - 0x10AC0u + joining_offset_0x10ac0u];
if (uc_in_range(u, 0x10B80u, 0x10BAFu)) return joining_table[u - 0x10B80u + joining_offset_0x10b80u];
if (uc_in_range(u, 0x10D00u, 0x10D23u)) return joining_table[u - 0x10D00u + joining_offset_0x10d00u];
if (uc_in_range(u, 0x10F30u, 0x10F54u)) return joining_table[u - 0x10F30u + joining_offset_0x10f30u];
break;
case 0x11u:
if (uc_in_range(u, 0x110BDu, 0x110CDu)) return joining_table[u - 0x110BDu + joining_offset_0x110bdu];
break;
case 0x1Eu:
if (uc_in_range(u, 0x1E900u, 0x1E943u)) return joining_table[u - 0x1E900u + joining_offset_0x1e900u];
break;
default:
break;
}
return X;
}
#undef X
#undef R
#undef T
#undef U
#undef A
#undef DR
#undef L
#undef C
#undef D
static const Uint16 shaping_table[][4] =
{
{0x0000u, 0x0000u, 0x0000u, 0xFE80u}, /* U+0621 ARABIC LETTER HAMZA ISOLATED FORM */
{0x0000u, 0x0000u, 0xFE82u, 0xFE81u}, /* U+0622 ARABIC LETTER ALEF WITH MADDA ABOVE */
{0x0000u, 0x0000u, 0xFE84u, 0xFE83u}, /* U+0623 ARABIC LETTER ALEF WITH HAMZA ABOVE */
{0x0000u, 0x0000u, 0xFE86u, 0xFE85u}, /* U+0624 ARABIC LETTER WAW WITH HAMZA ABOVE */
{0x0000u, 0x0000u, 0xFE88u, 0xFE87u}, /* U+0625 ARABIC LETTER ALEF WITH HAMZA BELOW */
{0xFE8Bu, 0xFE8Cu, 0xFE8Au, 0xFE89u}, /* U+0626 ARABIC LETTER YEH WITH HAMZA ABOVE */
{0x0000u, 0x0000u, 0xFE8Eu, 0xFE8Du}, /* U+0627 ARABIC LETTER ALEF */
{0xFE91u, 0xFE92u, 0xFE90u, 0xFE8Fu}, /* U+0628 ARABIC LETTER BEH */
{0x0000u, 0x0000u, 0xFE94u, 0xFE93u}, /* U+0629 ARABIC LETTER TEH MARBUTA */
{0xFE97u, 0xFE98u, 0xFE96u, 0xFE95u}, /* U+062A ARABIC LETTER TEH */
{0xFE9Bu, 0xFE9Cu, 0xFE9Au, 0xFE99u}, /* U+062B ARABIC LETTER THEH */
{0xFE9Fu, 0xFEA0u, 0xFE9Eu, 0xFE9Du}, /* U+062C ARABIC LETTER JEEM */
{0xFEA3u, 0xFEA4u, 0xFEA2u, 0xFEA1u}, /* U+062D ARABIC LETTER HAH */
{0xFEA7u, 0xFEA8u, 0xFEA6u, 0xFEA5u}, /* U+062E ARABIC LETTER KHAH */
{0x0000u, 0x0000u, 0xFEAAu, 0xFEA9u}, /* U+062F ARABIC LETTER DAL */
{0x0000u, 0x0000u, 0xFEACu, 0xFEABu}, /* U+0630 ARABIC LETTER THAL */
{0x0000u, 0x0000u, 0xFEAEu, 0xFEADu}, /* U+0631 ARABIC LETTER REH */
{0x0000u, 0x0000u, 0xFEB0u, 0xFEAFu}, /* U+0632 ARABIC LETTER ZAIN */
{0xFEB3u, 0xFEB4u, 0xFEB2u, 0xFEB1u}, /* U+0633 ARABIC LETTER SEEN */
{0xFEB7u, 0xFEB8u, 0xFEB6u, 0xFEB5u}, /* U+0634 ARABIC LETTER SHEEN */
{0xFEBBu, 0xFEBCu, 0xFEBAu, 0xFEB9u}, /* U+0635 ARABIC LETTER SAD */
{0xFEBFu, 0xFEC0u, 0xFEBEu, 0xFEBDu}, /* U+0636 ARABIC LETTER DAD */
{0xFEC3u, 0xFEC4u, 0xFEC2u, 0xFEC1u}, /* U+0637 ARABIC LETTER TAH */
{0xFEC7u, 0xFEC8u, 0xFEC6u, 0xFEC5u}, /* U+0638 ARABIC LETTER ZAH */
{0xFECBu, 0xFECCu, 0xFECAu, 0xFEC9u}, /* U+0639 ARABIC LETTER AIN */
{0xFECFu, 0xFED0u, 0xFECEu, 0xFECDu}, /* U+063A ARABIC LETTER GHAIN */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063B */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063C */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063D */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063E */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063F */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0640 */
{0xFED3u, 0xFED4u, 0xFED2u, 0xFED1u}, /* U+0641 ARABIC LETTER FEH */
{0xFED7u, 0xFED8u, 0xFED6u, 0xFED5u}, /* U+0642 ARABIC LETTER QAF */
{0xFEDBu, 0xFEDCu, 0xFEDAu, 0xFED9u}, /* U+0643 ARABIC LETTER KAF */
{0xFEDFu, 0xFEE0u, 0xFEDEu, 0xFEDDu}, /* U+0644 ARABIC LETTER LAM */
{0xFEE3u, 0xFEE4u, 0xFEE2u, 0xFEE1u}, /* U+0645 ARABIC LETTER MEEM */
{0xFEE7u, 0xFEE8u, 0xFEE6u, 0xFEE5u}, /* U+0646 ARABIC LETTER NOON */
{0xFEEBu, 0xFEECu, 0xFEEAu, 0xFEE9u}, /* U+0647 ARABIC LETTER HEH */
{0x0000u, 0x0000u, 0xFEEEu, 0xFEEDu}, /* U+0648 ARABIC LETTER WAW */
{0xFBE8u, 0xFBE9u, 0xFEF0u, 0xFEEFu}, /* U+0649 ARABIC LETTER */
{0xFEF3u, 0xFEF4u, 0xFEF2u, 0xFEF1u}, /* U+064A ARABIC LETTER YEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064B */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064C */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064D */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064E */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064F */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0650 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0651 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0652 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0653 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0654 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0655 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0656 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0657 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0658 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0659 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065A */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065B */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065C */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065D */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065E */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065F */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0660 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0661 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0662 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0663 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0664 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0665 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0666 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0667 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0668 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0669 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066A */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066B */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066C */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066D */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066E */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066F */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0670 */
{0x0000u, 0x0000u, 0xFB51u, 0xFB50u}, /* U+0671 ARABIC LETTER ALEF WASLA */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0672 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0673 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0674 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0675 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0676 */
{0x0000u, 0x0000u, 0x0000u, 0xFBDDu}, /* U+0677 ARABIC LETTER U WITH HAMZA ABOVE ISOLATED FORM */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0678 */
{0xFB68u, 0xFB69u, 0xFB67u, 0xFB66u}, /* U+0679 ARABIC LETTER TTEH */
{0xFB60u, 0xFB61u, 0xFB5Fu, 0xFB5Eu}, /* U+067A ARABIC LETTER TTEHEH */
{0xFB54u, 0xFB55u, 0xFB53u, 0xFB52u}, /* U+067B ARABIC LETTER BEEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067C */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067D */
{0xFB58u, 0xFB59u, 0xFB57u, 0xFB56u}, /* U+067E ARABIC LETTER PEH */
{0xFB64u, 0xFB65u, 0xFB63u, 0xFB62u}, /* U+067F ARABIC LETTER TEHEH */
{0xFB5Cu, 0xFB5Du, 0xFB5Bu, 0xFB5Au}, /* U+0680 ARABIC LETTER BEHEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0681 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0682 */
{0xFB78u, 0xFB79u, 0xFB77u, 0xFB76u}, /* U+0683 ARABIC LETTER NYEH */
{0xFB74u, 0xFB75u, 0xFB73u, 0xFB72u}, /* U+0684 ARABIC LETTER DYEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0685 */
{0xFB7Cu, 0xFB7Du, 0xFB7Bu, 0xFB7Au}, /* U+0686 ARABIC LETTER TCHEH */
{0xFB80u, 0xFB81u, 0xFB7Fu, 0xFB7Eu}, /* U+0687 ARABIC LETTER TCHEHEH */
{0x0000u, 0x0000u, 0xFB89u, 0xFB88u}, /* U+0688 ARABIC LETTER DDAL */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0689 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068A */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068B */
{0x0000u, 0x0000u, 0xFB85u, 0xFB84u}, /* U+068C ARABIC LETTER DAHAL */
{0x0000u, 0x0000u, 0xFB83u, 0xFB82u}, /* U+068D ARABIC LETTER DDAHAL */
{0x0000u, 0x0000u, 0xFB87u, 0xFB86u}, /* U+068E ARABIC LETTER DUL */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068F */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0690 */
{0x0000u, 0x0000u, 0xFB8Du, 0xFB8Cu}, /* U+0691 ARABIC LETTER RREH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0692 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0693 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0694 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0695 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0696 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0697 */
{0x0000u, 0x0000u, 0xFB8Bu, 0xFB8Au}, /* U+0698 ARABIC LETTER JEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0699 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069A */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069B */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069C */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069D */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069E */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069F */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A0 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A1 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A2 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A3 */
{0xFB6Cu, 0xFB6Du, 0xFB6Bu, 0xFB6Au}, /* U+06A4 ARABIC LETTER VEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A5 */
{0xFB70u, 0xFB71u, 0xFB6Fu, 0xFB6Eu}, /* U+06A6 ARABIC LETTER PEHEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A7 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A8 */
{0xFB90u, 0xFB91u, 0xFB8Fu, 0xFB8Eu}, /* U+06A9 ARABIC LETTER KEHEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AA */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AB */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AC */
{0xFBD5u, 0xFBD6u, 0xFBD4u, 0xFBD3u}, /* U+06AD ARABIC LETTER NG */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AE */
{0xFB94u, 0xFB95u, 0xFB93u, 0xFB92u}, /* U+06AF ARABIC LETTER GAF */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B0 */
{0xFB9Cu, 0xFB9Du, 0xFB9Bu, 0xFB9Au}, /* U+06B1 ARABIC LETTER NGOEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B2 */
{0xFB98u, 0xFB99u, 0xFB97u, 0xFB96u}, /* U+06B3 ARABIC LETTER GUEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B4 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B5 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B6 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B7 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B8 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B9 */
{0x0000u, 0x0000u, 0xFB9Fu, 0xFB9Eu}, /* U+06BA ARABIC LETTER NOON GHUNNA */
{0xFBA2u, 0xFBA3u, 0xFBA1u, 0xFBA0u}, /* U+06BB ARABIC LETTER RNOON */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BC */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BD */
{0xFBACu, 0xFBADu, 0xFBABu, 0xFBAAu}, /* U+06BE ARABIC LETTER HEH DOACHASHMEE */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BF */
{0x0000u, 0x0000u, 0xFBA5u, 0xFBA4u}, /* U+06C0 ARABIC LETTER HEH WITH YEH ABOVE */
{0xFBA8u, 0xFBA9u, 0xFBA7u, 0xFBA6u}, /* U+06C1 ARABIC LETTER HEH GOAL */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C2 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C3 */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C4 */
{0x0000u, 0x0000u, 0xFBE1u, 0xFBE0u}, /* U+06C5 ARABIC LETTER KIRGHIZ OE */
{0x0000u, 0x0000u, 0xFBDAu, 0xFBD9u}, /* U+06C6 ARABIC LETTER OE */
{0x0000u, 0x0000u, 0xFBD8u, 0xFBD7u}, /* U+06C7 ARABIC LETTER U */
{0x0000u, 0x0000u, 0xFBDCu, 0xFBDBu}, /* U+06C8 ARABIC LETTER YU */
{0x0000u, 0x0000u, 0xFBE3u, 0xFBE2u}, /* U+06C9 ARABIC LETTER KIRGHIZ YU */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CA */
{0x0000u, 0x0000u, 0xFBDFu, 0xFBDEu}, /* U+06CB ARABIC LETTER VE */
{0xFBFEu, 0xFBFFu, 0xFBFDu, 0xFBFCu}, /* U+06CC ARABIC LETTER FARSI YEH */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CD */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CE */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CF */
{0xFBE6u, 0xFBE7u, 0xFBE5u, 0xFBE4u}, /* U+06D0 ARABIC LETTER E */
{0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06D1 */
{0x0000u, 0x0000u, 0xFBAFu, 0xFBAEu}, /* U+06D2 ARABIC LETTER YEH BARREE */
{0x0000u, 0x0000u, 0xFBB1u, 0xFBB0u}, /* U+06D3 ARABIC LETTER YEH BARREE WITH HAMZA ABOVE */
};
#define SHAPING_TABLE_FIRST 0x0621u
#define SHAPING_TABLE_LAST 0x06D3u
static const struct ligature_set_t {
Uint16 first;
struct ligature_pairs_t {
Uint16 second;
Uint16 ligature;
} ligatures[4];
} ligature_table[] =
{
{ 0xFEDFu, {
{ 0xFE88u, 0xFEF9u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM */
{ 0xFE82u, 0xFEF5u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM */
{ 0xFE8Eu, 0xFEFBu }, /* ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM */
{ 0xFE84u, 0xFEF7u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM */
}},
{ 0xFEE0u, {
{ 0xFE88u, 0xFEFAu }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM */
{ 0xFE82u, 0xFEF6u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM */
{ 0xFE8Eu, 0xFEFCu }, /* ARABIC LIGATURE LAM WITH ALEF FINAL FORM */
{ 0xFE84u, 0xFEF8u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM */
}},
};
#endif /* _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H */
/* == End of generated table == */

View File

@@ -2,9 +2,9 @@
/*
* The following tables are generated by running:
*
* ./gen-emoji-table.py emoji/emoji-data.txt
* ./gen-emoji-table.py
*
* on file with this header:
* on emoji/emoji-data.txt file with this header:
*
* # emoji-data.txt
* # Date: 2019-01-15, 12:10:05 GMT
@@ -18,8 +18,8 @@
* # For documentation and usage, see http://www.unicode.org/reports/tr51
*/
#ifndef UNICODE_EMOJI_TABLE_H
#define UNICODE_EMOJI_TABLE_H
#ifndef _MGFONT_UNICODE_EMOJI_TABLES_H
#define _MGFONT_UNICODE_EMOJI_TABLES_H
struct Interval {
Uchar32 start, end;
@@ -391,6 +391,6 @@ static const struct Interval _unicode_extended_pictographic_table[] =
{0x1F947, 0x1FFFD},
};
#endif /* UNICODE_EMOJI_TABLE_H */
#endif /* _MGFONT_UNICODE_EMOJI_TABLES_H */
/* == End of generated table == */

View File

@@ -53,7 +53,7 @@
#ifdef _MGCHARSET_UNICODE
#include "unicode-ops.h"
#include "unicode-emoji-table.h"
#include "unicode-emoji-tables.h"
static int interval_compare(const void *key, const void *elt)
{

View File

@@ -0,0 +1,284 @@
#!/usr/bin/env python
# Copied from HarfBuzz
# https://github.com/harfbuzz/harfbuzz
# On 2019-03-07
#
# Revised by Vincent Wei for MiniGUI 3.4
from __future__ import print_function, division, absolute_import
import io, os.path, sys
#if len (sys.argv) != 4:
# print ("usage: ./gen-arabic-table.py", file=sys.stderr)
# sys.exit (1)
filenames = ['ucd/ArabicShaping.txt', 'ucd/UnicodeData.txt', 'ucd/Blocks.txt']
files = [io.open (x, encoding='utf-8') for x in filenames]
headers = [[files[0].readline (), files[0].readline ()], [files[2].readline (), files[2].readline ()]]
headers.append (["UnicodeData.txt does not have a header."])
while files[0].readline ().find ('##################') < 0:
pass
blocks = {}
def read_blocks(f):
global blocks
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
uu = fields[0].split ('..')
start = int (uu[0], 16)
if len (uu) == 1:
end = start
else:
end = int (uu[1], 16)
t = fields[1]
for u in range (start, end + 1):
blocks[u] = t
def print_joining_table(f):
values = {}
for line in f:
if line[0] == '#':
continue
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
u = int (fields[0], 16)
if fields[3] in ["ALAPH", "DALATH RISH"]:
value = "JOINING_GROUP_" + fields[3].replace(' ', '_')
else:
value = "JOINING_TYPE_" + fields[2]
values[u] = value
short_value = {}
for value in set([v for v in values.values()] + ['JOINING_TYPE_X']):
short = ''.join(x[0] for x in value.split('_')[2:])
assert short not in short_value.values()
short_value[value] = short
print ()
for value,short in short_value.items():
print ("#define %s %s" % (short, value))
uu = sorted(values.keys())
num = len(values)
all_blocks = set([blocks[u] for u in uu])
last = -100000
ranges = []
for u in uu:
if u - last <= 1+16*5:
ranges[-1][-1] = u
else:
ranges.append([u,u])
last = u
print ()
print ("static const Uint8 joining_table[] =")
print ("{")
last_block = None
offset = 0
for start,end in ranges:
print ()
print ("#define joining_offset_0x%04xu %d" % (start, offset))
for u in range(start, end+1):
block = blocks.get(u, last_block)
value = values.get(u, "JOINING_TYPE_X")
if block != last_block or u == start:
if u != start:
print ()
if block in all_blocks:
print ("\n /* %s */" % block)
else:
print ("\n /* FILLER */")
last_block = block
if u % 32 != 0:
print ()
print (" /* %04X */" % (u//32*32), " " * (u % 32), end="")
if u % 32 == 0:
print ()
print (" /* %04X */ " % u, end="")
print ("%s," % short_value[value], end="")
print ()
offset += end - start + 1
print ()
occupancy = num * 100. / offset
print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
print ()
print ("")
print ("static inline BOOL uc_in_range(Uchar32 uc, Uchar32 lo, Uchar32 hi)")
print ("{")
print (" /* The casts below are important as if T is smaller than int,")
print (" * the subtract results will become a signed int! */")
print (" return (Uchar32)(u - lo) <= (Uchar32)(hi - lo);")
print ("}")
print ("")
page_bits = 12;
print ()
print ("static unsigned int")
print ("joining_type (Uchar32 u)")
print ("{")
print (" switch (u >> %d)" % page_bits)
print (" {")
pages = set([u>>page_bits for u in [s for s,e in ranges]+[e for s,e in ranges]])
for p in sorted(pages):
print (" case 0x%0Xu:" % p)
for (start,end) in ranges:
if p not in [start>>page_bits, end>>page_bits]: continue
offset = "joining_offset_0x%04xu" % start
print (" if (uc_in_range(u, 0x%04Xu, 0x%04Xu)) return joining_table[u - 0x%04Xu + %s];" % (start, end, start, offset))
print (" break;")
print ("")
print (" default:")
print (" break;")
print (" }")
print (" return X;")
print ("}")
print ()
for value,short in short_value.items():
print ("#undef %s" % (short))
print ()
def print_shaping_table(f):
shapes = {}
ligatures = {}
names = {}
for line in f:
fields = [x.strip () for x in line.split (';')]
if fields[5][0:1] != '<':
continue
items = fields[5].split (' ')
shape, items = items[0][1:-1], tuple (int (x, 16) for x in items[1:])
if not shape in ['initial', 'medial', 'isolated', 'final']:
continue
c = int (fields[0], 16)
if len (items) != 1:
# We only care about lam-alef ligatures
if len (items) != 2 or items[0] != 0x0644 or items[1] not in [0x0622, 0x0623, 0x0625, 0x0627]:
continue
# Save ligature
names[c] = fields[1]
if items not in ligatures:
ligatures[items] = {}
ligatures[items][shape] = c
pass
else:
# Save shape
if items[0] not in names:
names[items[0]] = fields[1]
else:
names[items[0]] = os.path.commonprefix ([names[items[0]], fields[1]]).strip ()
if items[0] not in shapes:
shapes[items[0]] = {}
shapes[items[0]][shape] = c
print ()
print ("static const Uint16 shaping_table[][4] =")
print ("{")
keys = shapes.keys ()
min_u, max_u = min (keys), max (keys)
for u in range (min_u, max_u + 1):
s = [shapes[u][shape] if u in shapes and shape in shapes[u] else 0
for shape in ['initial', 'medial', 'final', 'isolated']]
value = ', '.join ("0x%04Xu" % c for c in s)
print (" {%s}, /* U+%04X %s */" % (value, u, names[u] if u in names else ""))
print ("};")
print ()
print ("#define SHAPING_TABLE_FIRST 0x%04Xu" % min_u)
print ("#define SHAPING_TABLE_LAST 0x%04Xu" % max_u)
print ()
ligas = {}
for pair in ligatures.keys ():
for shape in ligatures[pair]:
c = ligatures[pair][shape]
if shape == 'isolated':
liga = (shapes[pair[0]]['initial'], shapes[pair[1]]['final'])
elif shape == 'final':
liga = (shapes[pair[0]]['medial'], shapes[pair[1]]['final'])
else:
raise Exception ("Unexpected shape", shape)
if liga[0] not in ligas:
ligas[liga[0]] = []
ligas[liga[0]].append ((liga[1], c))
max_i = max (len (ligas[l]) for l in ligas)
print ()
print ("static const struct ligature_set_t {")
print (" Uint16 first;")
print (" struct ligature_pairs_t {")
print (" Uint16 second;")
print (" Uint16 ligature;")
print (" } ligatures[%d];" % max_i)
print ("} ligature_table[] =")
print ("{")
for first in sorted (ligas.keys ()):
print (" { 0x%04Xu, {" % (first))
for liga in ligas[first]:
print (" { 0x%04Xu, 0x%04Xu }, /* %s */" % (liga[0], liga[1], names[liga[1]]))
print (" }},")
print ("};")
print ()
print ("/* == Start of generated table == */")
print ("/*")
print (" * The following table is generated by running:")
print (" *")
print (" * ./gen-arabic-table.py")
print (" *")
print (" * on ucd/ArabicShaping.txt ucd/UnicodeData.txt ucd/Blocks.txt files with these headers:")
print (" *")
for h in headers:
for l in h:
print (" * %s" % (l.strip()))
print (" */")
print ()
print ("#ifndef _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H")
print ("#define _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H")
print ()
read_blocks (files[2])
print_joining_table (files[0])
print_shaping_table (files[1])
print ()
print ("#endif /* _MGFONT_UNICODE_ARABIC_SHAPING_TABLES_H */")
print ()
print ("/* == End of generated table == */")

View File

@@ -11,11 +11,11 @@ import sys
import os.path
from collections import OrderedDict
if len (sys.argv) != 2:
print("usage: ./gen-emoji-table.py emoji/emoji-data.txt", file=sys.stderr)
sys.exit (1)
#if len (sys.argv) != 2:
# print("usage: ./gen-emoji-table.py emoji/emoji-data.txt", file=sys.stderr)
# sys.exit (1)
f = open(sys.argv[1])
f = open('emoji/emoji-data.txt')
header = [f.readline () for _ in range(10)]
ranges = OrderedDict()
@@ -42,16 +42,16 @@ print("/* == Start of generated table == */")
print("/*")
print(" * The following tables are generated by running:")
print(" *")
print(" * ./gen-emoji-table.py emoji/emoji-data.txt")
print(" * ./gen-emoji-table.py")
print(" *")
print(" * on file with this header:")
print(" * on emoji/emoji-data.txt file with this header:")
print(" *")
for l in header:
print(" * %s" % (l.strip()))
print(" */")
print()
print("#ifndef UNICODE_EMOJI_TABLE_H")
print("#define UNICODE_EMOJI_TABLE_H")
print("#ifndef _MGFONT_UNICODE_EMOJI_TABLES_H")
print("#define _MGFONT_UNICODE_EMOJI_TABLES_H")
print()
print("struct Interval {\n Uchar32 start, end;\n};")
@@ -69,6 +69,6 @@ for typ,s in ranges.items():
print("};")
print()
print("#endif /* UNICODE_EMOJI_TABLE_H */")
print("#endif /* _MGFONT_UNICODE_EMOJI_TABLES_H */")
print()
print("/* == End of generated table == */")