diff --git a/src/font/unicode-shape-complex-indic-table.c b/src/font/unicode-shape-complex-indic-table.c new file mode 100644 index 00000000..387fada6 --- /dev/null +++ b/src/font/unicode-shape-complex-indic-table.c @@ -0,0 +1,490 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-indic-table.py + * + * on ucd/IndicSyllabicCategory.txt ucd/IndicPositionalCategory.txt ucd/Blocks.txt files with these headers: + * + * # IndicSyllabicCategory-11.0.0.txt + * # Date: 2018-05-21, 18:33:00 GMT [KW, RP] + * # IndicPositionalCategory-11.0.0.txt + * # Date: 2018-02-05, 16:21:00 GMT [KW, RP] + * # Blocks-11.0.0.txt + * # Date: 2017-10-16, 24:39:00 GMT [KW] + */ + +#include "unicode-shape-complex-indic.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-macros" + +#define ISC_A INDIC_SYLLABIC_CATEGORY_AVAGRAHA /* 16 chars; Avagraha */ +#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 83 chars; Bindu */ +#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */ +#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 58 chars; Cantillation_Mark */ +#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2110 chars; Consonant */ +#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 10 chars; Consonant_Dead */ +#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 67 chars; Consonant_Final */ +#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */ +#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */ +#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */ +#define ISC_CM INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL /* 28 chars; Consonant_Medial */ +#define ISC_CP INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER /* 21 chars; Consonant_Placeholder */ +#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 2 chars; Consonant_Preceding_Repha */ +#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 7 chars; Consonant_Prefixed */ +#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 95 chars; Consonant_Subjoined */ +#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 4 chars; Consonant_Succeeding_Repha */ +#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 6 chars; Consonant_With_Stacker */ +#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */ +#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 11 chars; Invisible_Stacker */ +#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */ +#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */ +#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */ +#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 30 chars; Nukta */ +#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 480 chars; Number */ +#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */ +#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */ +#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 21 chars; Pure_Killer */ +#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */ +#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */ +#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */ +#define ISC_TM INDIC_SYLLABIC_CATEGORY_TONE_MARK /* 42 chars; Tone_Mark */ +#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 25 chars; Virama */ +#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 36 chars; Visarga */ +#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */ +#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 660 chars; Vowel_Dependent */ +#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 464 chars; Vowel_Independent */ + +#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 340 chars; Bottom */ +#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */ +#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 2 chars; Bottom_And_Right */ +#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 59 chars; Left */ +#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 21 chars; Left_And_Right */ +#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */ +#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */ +#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 276 chars; Right */ +#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 393 chars; Top */ +#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */ +#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */ +#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */ +#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */ +#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */ +#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */ +#pragma GCC diagnostic pop + +#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M) + + +static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = { + + +#define indic_offset_0x0028u 0 + + + /* Basic Latin */ + + /* 0028 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), _(x,x), _(x,x), + /* 0030 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0038 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x00b0u 24 + + + /* Latin-1 Supplement */ + + /* 00B0 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00B8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00C0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00C8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), + +#define indic_offset_0x0900u 64 + + + /* Devanagari */ + + /* 0900 */ _(Bi,T), _(Bi,T), _(Bi,T), _(Vs,R), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0908 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0910 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0918 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0920 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0928 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0930 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(A,x), _(M,R), _(M,L), + /* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T), + /* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(V,B), _(M,L), _(M,R), + /* 0950 */ _(x,x), _(Ca,T), _(Ca,B), _(x,T), _(x,T), _(M,T), _(M,B), _(M,B), + /* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0960 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0968 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0970 */ _(x,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + + /* Bengali */ + + /* 0980 */ _(CP,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0988 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x), + /* 0990 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 09A0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 09A8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 09B0 */ _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), + /* 09B8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L), + /* 09C0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L), + /* 09C8 */ _(M,L), _(x,x), _(x,x), _(M,LR), _(M,LR), _(V,B), _(CD,x), _(x,x), + /* 09D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), + /* 09D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), + /* 09E0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 09E8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 09F0 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 09F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(Bi,x), _(x,x), _(SM,T), _(x,x), + + /* Gurmukhi */ + + /* 0A00 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0A08 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), + /* 0A10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0A18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0A20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0A28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0A30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), + /* 0A38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(x,x), _(M,R), _(M,L), + /* 0A40 */ _(M,R), _(M,B), _(M,B), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), + /* 0A48 */ _(M,T), _(x,x), _(x,x), _(M,T), _(M,T), _(V,B), _(x,x), _(x,x), + /* 0A50 */ _(x,x), _(Ca,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0A58 */ _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), + /* 0A60 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0A68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0A70 */ _(Bi,T), _(GM,T), _(CP,x), _(CP,x), _(x,x), _(CM,B), _(x,x), _(x,x), + /* 0A78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Gujarati */ + + /* 0A80 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0A88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), + /* 0A90 */ _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0A98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0AA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0AA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0AB0 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), + /* 0AB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L), + /* 0AC0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(x,x), _(M,T), + /* 0AC8 */ _(M,T), _(M,TR), _(x,x), _(M,R), _(M,R), _(V,B), _(x,x), _(x,x), + /* 0AD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0AD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0AE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0AE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0AF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0AF8 */ _(x,x), _(C,x), _(Ca,T), _(Ca,T), _(Ca,T), _(N,T), _(N,T), _(N,T), + + /* Oriya */ + + /* 0B00 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0B08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x), + /* 0B10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0B18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0B20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0B28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0B30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), + /* 0B38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T), + /* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L), + /* 0B48 */ _(M,TL), _(x,x), _(x,x), _(M,LR),_(M,TLR), _(V,B), _(x,x), _(x,x), + /* 0B50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,TR), + /* 0B58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), + /* 0B60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0B68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0B70 */ _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0B78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Tamil */ + + /* 0B80 */ _(x,x), _(x,x), _(Bi,T), _(ML,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0B88 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(VI,x), + /* 0B90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(x,x), _(x,x), + /* 0B98 */ _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(C,x), _(C,x), + /* 0BA0 */ _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), + /* 0BA8 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), + /* 0BB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0BB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), + /* 0BC0 */ _(M,T), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(M,L), _(M,L), + /* 0BC8 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T), _(x,x), _(x,x), + /* 0BD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), + /* 0BD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0BE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0BE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0BF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0BF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Telugu */ + + /* 0C00 */ _(Bi,T), _(Bi,R), _(Bi,R), _(Vs,R), _(Bi,T), _(VI,x), _(VI,x), _(VI,x), + /* 0C08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), + /* 0C10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0C18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(A,x), _(M,T), _(M,T), + /* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T), + /* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x), + /* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x), + /* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0C78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Kannada */ + + /* 0C80 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0C88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), + /* 0C90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0CA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0CA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0CB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), + /* 0CB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T), + /* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR), + /* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x), + /* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), + /* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(x,x), + /* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0CF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Malayalam */ + + /* 0D00 */ _(Bi,T), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0D08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), + /* 0D10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D28 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D38 */ _(C,x), _(C,x), _(C,x), _(PK,T), _(PK,T), _(A,x), _(M,R), _(M,R), + /* 0D40 */ _(M,R), _(M,R), _(M,R), _(M,B), _(M,B), _(x,x), _(M,L), _(M,L), + /* 0D48 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T),_(CPR,x), _(x,x), + /* 0D50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(M,R), + /* 0D58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), + /* 0D60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0D68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0D70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0D78 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), + + /* Sinhala */ + + /* 0D80 */ _(x,x), _(x,x), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0D88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0D90 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), + /* 0D98 */ _(x,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DA8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DB0 */ _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DB8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), + /* 0DC0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), + /* 0DC8 */ _(x,x), _(x,x), _(V,T), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), + /* 0DD0 */ _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), _(x,x), _(M,B), _(x,x), + /* 0DD8 */ _(M,R), _(M,L), _(M,TL), _(M,L), _(M,LR),_(M,TLR), _(M,LR), _(M,R), + /* 0DE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0DE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0DF0 */ _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x1000u 1336 + + + /* Myanmar */ + + /* 1000 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1008 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1010 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1018 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1020 */ _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 1028 */ _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), + /* 1030 */ _(M,B), _(M,L), _(M,T), _(M,T), _(M,T), _(M,T), _(Bi,T), _(TM,B), + /* 1038 */ _(Vs,R), _(IS,x), _(PK,T), _(CM,R), _(CM,x), _(CM,B), _(CM,B), _(C,x), + /* 1040 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 1048 */ _(Nd,x), _(Nd,x), _(x,x), _(CP,x), _(x,x), _(x,x), _(CP,x), _(x,x), + /* 1050 */ _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), + /* 1058 */ _(M,B), _(M,B), _(C,x), _(C,x), _(C,x), _(C,x), _(CM,B), _(CM,B), + /* 1060 */ _(CM,B), _(C,x), _(M,R), _(TM,R), _(TM,R), _(C,x), _(C,x), _(M,R), + /* 1068 */ _(M,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(C,x), _(C,x), + /* 1070 */ _(C,x), _(M,T), _(M,T), _(M,T), _(M,T), _(C,x), _(C,x), _(C,x), + /* 1078 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1080 */ _(C,x), _(C,x), _(CM,B), _(M,R), _(M,L), _(M,T), _(M,T), _(TM,R), + /* 1088 */ _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,B), _(C,x), _(TM,R), + /* 1090 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 1098 */ _(Nd,x), _(Nd,x), _(TM,R), _(TM,R), _(M,R), _(M,T), _(x,x), _(x,x), + +#define indic_offset_0x1780u 1496 + + + /* Khmer */ + + /* 1780 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1788 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1790 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1798 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 17A0 */ _(C,x), _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 17A8 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 17B0 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(M,R), _(M,T), + /* 17B8 */ _(M,T), _(M,T), _(M,T), _(M,B), _(M,B), _(M,B), _(M,TL),_(M,TLR), + /* 17C0 */ _(M,LR), _(M,L), _(M,L), _(M,L), _(M,LR), _(M,LR), _(Bi,T), _(Vs,R), + /* 17C8 */ _(M,R), _(RS,T), _(RS,T), _(SM,T),_(CSR,T), _(CK,T), _(SM,T), _(SM,T), + /* 17D0 */ _(SM,T), _(PK,T), _(IS,x), _(SM,T), _(x,x), _(x,x), _(x,x), _(x,x), + /* 17D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(A,x), _(SM,T), _(x,x), _(x,x), + /* 17E0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 17E8 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x1cd0u 1608 + + + /* Vedic Extensions */ + + /* 1CD0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(x,x), _(Ca,O), _(Ca,B), _(Ca,B), _(Ca,B), + /* 1CD8 */ _(Ca,B), _(Ca,B), _(Ca,T), _(Ca,T), _(Ca,B), _(Ca,B), _(Ca,B), _(Ca,B), + /* 1CE0 */ _(Ca,T), _(Ca,R), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), + /* 1CE8 */ _(x,O), _(x,x), _(x,x), _(x,x), _(x,x), _(x,B), _(x,x), _(x,x), + /* 1CF0 */ _(x,x), _(x,x), _(Vs,x), _(Vs,x), _(Ca,T),_(CWS,x),_(CWS,x), _(Ca,R), + /* 1CF8 */ _(Ca,x), _(Ca,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x2008u 1656 + + + /* General Punctuation */ + + /* 2008 */ _(x,x), _(x,x), _(x,x), _(x,x),_(ZWNJ,x),_(ZWJ,x), _(x,x), _(x,x), + /* 2010 */ _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x2070u 1672 + + + /* Superscripts and Subscripts */ + + /* 2070 */ _(x,x), _(x,x), _(x,x), _(x,x), _(SM,x), _(x,x), _(x,x), _(x,x), + /* 2078 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 2080 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0xa8e0u 1696 + + + /* Devanagari Extended */ + + /* A8E0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), + /* A8E8 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), + /* A8F0 */ _(Ca,T), _(Ca,T), _(Bi,x), _(Bi,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* A8F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(M,T), + +#define indic_offset_0xa9e0u 1728 + + + /* Myanmar Extended-B */ + + /* A9E0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(M,T), _(x,x), _(C,x), + /* A9E8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* A9F0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* A9F8 */ _(Nd,x), _(Nd,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), + +#define indic_offset_0xaa60u 1760 + + + /* Myanmar Extended-A */ + + /* AA60 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* AA68 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), + /* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x), + +}; /* Table items: 1792; occupancy: 70% */ + +INDIC_TABLE_ELEMENT_TYPE +_unicode_indic_get_categories (Uchar32 u) +{ + switch (u >> 12) + { + case 0x0u: + if (unlikely (u == 0x00A0u)) return _(CP,x); + if (uc_in_range(u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u]; + if (uc_in_range(u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u]; + if (uc_in_range(u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u]; + break; + + case 0x1u: + if (uc_in_range(u, 0x1000u, 0x109Fu)) return indic_table[u - 0x1000u + indic_offset_0x1000u]; + if (uc_in_range(u, 0x1780u, 0x17EFu)) return indic_table[u - 0x1780u + indic_offset_0x1780u]; + if (uc_in_range(u, 0x1CD0u, 0x1CFFu)) return indic_table[u - 0x1CD0u + indic_offset_0x1cd0u]; + break; + + case 0x2u: + if (unlikely (u == 0x25CCu)) return _(CP,x); + if (uc_in_range(u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u]; + if (uc_in_range(u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u]; + break; + + case 0xAu: + if (uc_in_range(u, 0xA8E0u, 0xA8FFu)) return indic_table[u - 0xA8E0u + indic_offset_0xa8e0u]; + if (uc_in_range(u, 0xA9E0u, 0xA9FFu)) return indic_table[u - 0xA9E0u + indic_offset_0xa9e0u]; + if (uc_in_range(u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u]; + break; + + default: + break; + } + return _(x,x); +} + +#undef _ +#undef ISC_A +#undef ISC_Bi +#undef ISC_BJN +#undef ISC_Ca +#undef ISC_C +#undef ISC_CD +#undef ISC_CF +#undef ISC_CHL +#undef ISC_CIP +#undef ISC_CK +#undef ISC_CM +#undef ISC_CP +#undef ISC_CPR +#undef ISC_CPrf +#undef ISC_CS +#undef ISC_CSR +#undef ISC_CWS +#undef ISC_GM +#undef ISC_IS +#undef ISC_ZWJ +#undef ISC_ML +#undef ISC_ZWNJ +#undef ISC_N +#undef ISC_Nd +#undef ISC_NJ +#undef ISC_x +#undef ISC_PK +#undef ISC_RS +#undef ISC_SM +#undef ISC_TL +#undef ISC_TM +#undef ISC_V +#undef ISC_Vs +#undef ISC_Vo +#undef ISC_M +#undef ISC_VI +#undef IMC_B +#undef IMC_BL +#undef IMC_BR +#undef IMC_L +#undef IMC_LR +#undef IMC_x +#undef IMC_O +#undef IMC_R +#undef IMC_T +#undef IMC_TB +#undef IMC_TBR +#undef IMC_TL +#undef IMC_TLR +#undef IMC_TR +#undef IMC_VOL + +/* == End of generated table == */ diff --git a/src/font/utils/IndicVowelConstraints.txt b/src/font/utils/IndicVowelConstraints.txt new file mode 100644 index 00000000..146ae1cb --- /dev/null +++ b/src/font/utils/IndicVowelConstraints.txt @@ -0,0 +1,97 @@ +# Copied from https://docs.microsoft.com/en-us/typography/script-development/use +# On October 23, 2018; with documentd dated 02/07/2018. + + 0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E + 0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA + 0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I + 0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U + 090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E + 090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E + 090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E + 0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O + 0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E + 0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O + 0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E + 0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O + 0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E + 0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU + 0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI + 0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E + 0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE + 0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE + 0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE + 0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW + 0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE + 0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE + 0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA + 098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R + 098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L + 0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA + 0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I + 0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II + 0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U + 0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU + 0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE + 0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI + 0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO + 0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU + 0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA + 0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E + 0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E + 0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI + 0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O + 0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O + 0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E + 0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU + 0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI + 0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA + 0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA + 0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK + 0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK + 0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK + 0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU + 0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK + 0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK + 0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK + 0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA + 0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU + 0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA + 0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK + 0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK + 0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E + 0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA + 0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK + 0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA + 0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA + 0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA + 0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA + 0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA + 0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA + 0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA + 0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA + 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA + 0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA + 0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA + 11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA + 1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R + 1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E + 11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA + 11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E + 11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O + 11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU + 112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA + 112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E + 112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI + 112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O + 112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU + 11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA + 114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R + 114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR + 1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E + 1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E + 11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E + 11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI + 11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E + 11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI diff --git a/src/font/utils/fetch-ucd.sh b/src/font/utils/fetch-ucd.sh index aaaa7279..c79fedef 100755 --- a/src/font/utils/fetch-ucd.sh +++ b/src/font/utils/fetch-ucd.sh @@ -1,7 +1,7 @@ #!/bin/bash url='https://www.unicode.org/Public/11.0.0/ucd/' -files='UnicodeData.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt ArabicShaping.txt BidiBrackets.txt BidiMirroring.txt Blocks.txt' +files='Scripts.txt UnicodeData.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt ArabicShaping.txt BidiBrackets.txt BidiMirroring.txt Blocks.txt' if test ! -d ucd; then mkdir ucd diff --git a/src/font/utils/gen-indic-table.py b/src/font/utils/gen-indic-table.py new file mode 100755 index 00000000..6bf64e80 --- /dev/null +++ b/src/font/utils/gen-indic-table.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python + +# Copied from HarfBuzz +# https://github.com/harfbuzz/harfbuzz +# On 2019-03-07 +# +# Revised by Vincent Wei for MiniGUI 3.4 + +from __future__ import print_function, division, absolute_import + +import io, sys + +#if len (sys.argv) != 4: +# print ("usage: ./gen-indic-table.py", file=sys.stderr) +# sys.exit (1) + +ALLOWED_SINGLES = [0x00A0, 0x25CC] +ALLOWED_BLOCKS = [ + 'Basic Latin', + 'Latin-1 Supplement', + 'Devanagari', + 'Bengali', + 'Gurmukhi', + 'Gujarati', + 'Oriya', + 'Tamil', + 'Telugu', + 'Kannada', + 'Malayalam', + 'Sinhala', + 'Myanmar', + 'Khmer', + 'Vedic Extensions', + 'General Punctuation', + 'Superscripts and Subscripts', + 'Devanagari Extended', + 'Myanmar Extended-B', + 'Myanmar Extended-A', +] + +filenames = ['ucd/IndicSyllabicCategory.txt', 'ucd/IndicPositionalCategory.txt', 'ucd/Blocks.txt'] + +files = [io.open (x, encoding='utf-8') for x in filenames] + +headers = [[f.readline () for i in range (2)] for f in files] + +data = [{} for f in files] +values = [{} for f in files] +for i, f in enumerate (files): + for line in f: + + j = line.find ('#') + if j >= 0: + line = line[:j] + + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + + uu = fields[0].split ('..') + start = int (uu[0], 16) + if len (uu) == 1: + end = start + else: + end = int (uu[1], 16) + + t = fields[1] + + for u in range (start, end + 1): + data[i][u] = t + values[i][t] = values[i].get (t, 0) + end - start + 1 + +# Merge data into one dict: +defaults = ('Other', 'Not_Applicable', 'No_Block') +for i,v in enumerate (defaults): + values[i][v] = values[i].get (v, 0) + 1 +combined = {} +for i,d in enumerate (data): + for u,v in d.items (): + if i == 2 and not u in combined: + continue + if not u in combined: + combined[u] = list (defaults) + combined[u][i] = v +combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS} +data = combined +del combined +num = len (data) + +for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]: + if data[u][0] == 'Other': + data[u][0] = "Vowel_Dependent" + +# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out +singles = {} +for u in ALLOWED_SINGLES: + singles[u] = data[u] + del data[u] + +print ("/* == Start of generated table == */") +print ("/*") +print (" * The following table is generated by running:") +print (" *") +print (" * ./gen-indic-table.py") +print (" *") +print (" * on ucd/IndicSyllabicCategory.txt ucd/IndicPositionalCategory.txt ucd/Blocks.txt files with these headers:") +print (" *") +for h in headers: + for l in h: + print (" * %s" % (l.strip())) +print (" */") +print () +print ('#include "unicode-shape-complex-indic.h"') +print () + +# Shorten values +short = [{ + "Bindu": 'Bi', + "Cantillation_Mark": 'Ca', + "Joiner": 'ZWJ', + "Non_Joiner": 'ZWNJ', + "Number": 'Nd', + "Visarga": 'Vs', + "Vowel": 'Vo', + "Vowel_Dependent": 'M', + "Consonant_Prefixed": 'CPrf', + "Other": 'x', +},{ + "Not_Applicable": 'x', +}] +all_shorts = [{},{}] + +# Add some of the values, to make them more readable, and to avoid duplicates + + +for i in range (2): + for v,s in short[i].items (): + all_shorts[i][s] = v + +what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"] +what_short = ["ISC", "IMC"] +print ('#pragma GCC diagnostic push') +print ('#pragma GCC diagnostic ignored "-Wunused-macros"') +for i in range (2): + print () + vv = sorted (values[i].keys ()) + for v in vv: + v_no_and = v.replace ('_And_', '_') + if v in short[i]: + s = short[i][v] + else: + s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')]) + if s in all_shorts[i]: + raise Exception ("Duplicate short value alias", v, all_shorts[i][s]) + all_shorts[i][s] = v + short[i][v] = s + print ("#define %s_%s %s_%s %s/* %3d chars; %s */" % + (what_short[i], s, what[i], v.upper (), + ' '* ((48-1 - len (what[i]) - 1 - len (v)) // 8), + values[i][v], v)) +print ('#pragma GCC diagnostic pop') +print () +print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)") +print () +print () + +total = 0 +used = 0 +last_block = None +def print_block (block, start, end, data): + global total, used, last_block + if block and block != last_block: + print () + print () + print (" /* %s */" % block) + num = 0 + assert start % 8 == 0 + assert (end+1) % 8 == 0 + for u in range (start, end+1): + if u % 8 == 0: + print () + print (" /* %04X */" % u, end="") + if u in data: + num += 1 + d = data.get (u, defaults) + print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="") + + total += end - start + 1 + used += num + if block: + last_block = block + +uu = sorted (data.keys ()) + +last = -100000 +num = 0 +offset = 0 +starts = [] +ends = [] +print ("static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {") +for u in uu: + if u <= last: + continue + block = data[u][2] + + start = u//8*8 + end = start+1 + while end in uu and block == data[end][2]: + end += 1 + end = (end-1)//8*8 + 7 + + if start != last + 1: + if start - last <= 1+16*3: + print_block (None, last+1, start-1, data) + last = start-1 + else: + if last >= 0: + ends.append (last + 1) + offset += ends[-1] - starts[-1] + print () + print () + print ("#define indic_offset_0x%04xu %d" % (start, offset)) + starts.append (start) + + print_block (block, start, end, data) + last = end +ends.append (last + 1) +offset += ends[-1] - starts[-1] +print () +print () +occupancy = used * 100. / total +page_bits = 12 +print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy)) +print () +print ("INDIC_TABLE_ELEMENT_TYPE") +print ("_unicode_indic_get_categories (Uchar32 u)") +print ("{") +print (" switch (u >> %d)" % page_bits) +print (" {") +pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())]) +for p in sorted(pages): + print (" case 0x%0Xu:" % p) + for u,d in singles.items (): + if p != u>>page_bits: continue + print (" if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]])) + for (start,end) in zip (starts, ends): + if p not in [start>>page_bits, end>>page_bits]: continue + offset = "indic_offset_0x%04xu" % start + print (" if (uc_in_range(u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset)) + print (" break;") + print ("") +print (" default:") +print (" break;") +print (" }") +print (" return _(x,x);") +print ("}") +print () +print ("#undef _") +for i in range (2): + print + vv = sorted (values[i].keys ()) + for v in vv: + print ("#undef %s_%s" % + (what_short[i], short[i][v])) +print () +print ("/* == End of generated table == */") + +# Maintain at least 30% occupancy in the table */ +if occupancy < 30: + raise Exception ("Table too sparse, please investigate: ", occupancy) diff --git a/src/font/utils/gen-vowel-constraints.py b/src/font/utils/gen-vowel-constraints.py new file mode 100755 index 00000000..a8eeb16b --- /dev/null +++ b/src/font/utils/gen-vowel-constraints.py @@ -0,0 +1,219 @@ +#!/usr/bin/python + +"""Generator of the function to prohibit certain vowel sequences. + +It creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted +circles into sequences prohibited by the USE script development spec. +This function should be used as the ``preprocess_text`` of an +``hb_ot_complex_shaper_t``. +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import collections +try: + from HTMLParser import HTMLParser + def write (s): + print (s.encode ('utf-8'), end='') +except ImportError: + from html.parser import HTMLParser + def write (s): + sys.stdout.flush () + sys.stdout.buffer.write (s.encode ('utf-8')) +import itertools +import io +import sys + +#if len (sys.argv) != 3: +# print ('usage: ./gen-vowel-constraints.py', file=sys.stderr) +# sys.exit (1) + +with io.open ('ucd/Scripts.txt', encoding='utf-8') as f: + scripts_header = [f.readline () for i in range (2)] + scripts = {} + script_order = {} + for line in f: + j = line.find ('#') + if j >= 0: + line = line[:j] + fields = [x.strip () for x in line.split (';')] + if len (fields) == 1: + continue + uu = fields[0].split ('..') + start = int (uu[0], 16) + if len (uu) == 1: + end = start + else: + end = int (uu[1], 16) + script = fields[1] + for u in range (start, end + 1): + scripts[u] = script + if script not in script_order: + script_order[script] = start + +class ConstraintSet (object): + """A set of prohibited code point sequences. + + Args: + constraint (List[int]): A prohibited code point sequence. + + """ + def __init__ (self, constraint): + # Either a list or a dictionary. As a list of code points, it + # represents a prohibited code point sequence. As a dictionary, + # it represents a set of prohibited sequences, where each item + # represents the set of prohibited sequences starting with the + # key (a code point) concatenated with any of the values + # (ConstraintSets). + self._c = constraint + + def add (self, constraint): + """Add a constraint to this set.""" + if not constraint: + return + first = constraint[0] + rest = constraint[1:] + if isinstance (self._c, list): + if constraint == self._c[:len (constraint)]: + self._c = constraint + elif self._c != constraint[:len (self._c)]: + self._c = {self._c[0]: ConstraintSet (self._c[1:])} + if isinstance (self._c, dict): + if first in self._c: + self._c[first].add (rest) + else: + self._c[first] = ConstraintSet (rest) + + def _indent (self, depth): + return (' ' * depth).replace (' ', '\t') + + def __str__ (self, index=0, depth=4): + s = [] + indent = self._indent (depth) + if isinstance (self._c, list): + if len (self._c) == 0: + s.append ('{}matched = true;\n'.format (indent)) + elif len (self._c) == 1: + s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or '')) + else: + s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index)) + s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), len (self._c))) + for i, cp in enumerate (self._c[1:], start=1): + s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format ( + self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&')) + s.append ('{}{{\n'.format (indent)) + for i in range (len (self._c)): + s.append ('{}buffer->next_glyph ();\n'.format (self._indent (depth + 1))) + s.append ('{}_output_dotted_circle (buffer);\n'.format (self._indent (depth + 1))) + s.append ('{}}}\n'.format (indent)) + else: + s.append ('{}switch (buffer->cur ({}).codepoint)\n'.format(indent, index or '')) + s.append ('{}{{\n'.format (indent)) + cases = collections.defaultdict (set) + for first, rest in sorted (self._c.items ()): + cases[rest.__str__ (index + 1, depth + 2)].add (first) + for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]): + for i, cp in enumerate (sorted (labels)): + if i % 4 == 0: + s.append (self._indent (depth + 1)) + else: + s.append (' ') + s.append ('case 0x{:04X}u:{}'.format (cp, '\n' if i % 4 == 3 else '')) + if len (labels) % 4 != 0: + s.append ('\n') + s.append (body) + s.append ('{}break;\n'.format (self._indent (depth + 2))) + s.append ('{}}}\n'.format (indent)) + return ''.join (s) + +constraints = {} +with io.open ('IndicVowelConstraints.txt', encoding='utf-8') as f: + constraints_header = [f.readline ().strip () for i in range (2)] + for line in f: + j = line.find ('#') + if j >= 0: + line = line[:j] + constraint = [int (cp, 16) for cp in line.split (';')[0].split ()] + if not constraint: continue + assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint) + script = scripts[constraint[0]] + if script in constraints: + constraints[script].add (constraint) + else: + constraints[script] = ConstraintSet (constraint) + assert constraints, 'No constraints found' + +print ('/* == Start of generated functions == */') +print ('/*') +print (' * The following functions are generated by running:') +print (' *') +print (' * %s' % sys.argv[0]) +print (' *') +print (' * on IndicVowelConstraints.txt and ucd/Scripts.txt files with these headers:') +print (' *') +for line in constraints_header: + print (' * %s' % line.strip ()) +print (' *') +for line in scripts_header: + print (' * %s' % line.strip ()) +print (' */') +print () +print ('#include "hb-ot-shape-complex-vowel-constraints.hh"') +print () +print ('static void') +print ('_output_dotted_circle (hb_buffer_t *buffer)') +print ('{') +print (' hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);') +print (' _hb_glyph_info_reset_continuation (&dottedcircle);') +print ('}') +print () +print ('static void') +print ('_output_with_dotted_circle (hb_buffer_t *buffer)') +print ('{') +print (' _output_dotted_circle (buffer);') +print (' buffer->next_glyph ();') +print ('}') +print () + +print ('void') +print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,') +print ('\t\t\t\t hb_buffer_t *buffer,') +print ('\t\t\t\t hb_font_t *font HB_UNUSED)') +print ('{') +print (' /* UGLY UGLY UGLY business of adding dotted-circle in the middle of') +print (' * vowel-sequences that look like another vowel. Data for each script') +print (' * collected from the USE script development spec.') +print (' *') +print (' * https://github.com/harfbuzz/harfbuzz/issues/1019') +print (' */') +print (' bool processed = false;') +print (' buffer->clear_output ();') +print (' unsigned int count = buffer->len;') +print (' switch ((unsigned) buffer->props.script)') +print (' {') + +for script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]): + print (' case HB_SCRIPT_{}:'.format (script.upper ())) + print (' for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)') + print (' {') + print ('\tbool matched = false;') + write (str (constraints)) + print ('\tbuffer->next_glyph ();') + print ('\tif (matched) _output_with_dotted_circle (buffer);') + print (' }') + print (' processed = true;') + print (' break;') + print () + +print (' default:') +print (' break;') +print (' }') +print (' if (processed)') +print (' {') +print (' if (buffer->idx < count)') +print (' buffer->next_glyph ();') +print (' }') +print ('}') + +print () +print ('/* == End of generated functions == */')