Indic shaping table

This commit is contained in:
Vincent Wei
2019-03-07 11:02:51 +08:00
parent 76cf8a02af
commit dcd31bed7c
5 changed files with 1077 additions and 1 deletions

View File

@@ -0,0 +1,490 @@
/* == Start of generated table == */
/*
* The following table is generated by running:
*
* ./gen-indic-table.py
*
* on ucd/IndicSyllabicCategory.txt ucd/IndicPositionalCategory.txt ucd/Blocks.txt files with these headers:
*
* # IndicSyllabicCategory-11.0.0.txt
* # Date: 2018-05-21, 18:33:00 GMT [KW, RP]
* # IndicPositionalCategory-11.0.0.txt
* # Date: 2018-02-05, 16:21:00 GMT [KW, RP]
* # Blocks-11.0.0.txt
* # Date: 2017-10-16, 24:39:00 GMT [KW]
*/
#include "unicode-shape-complex-indic.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-macros"
#define ISC_A INDIC_SYLLABIC_CATEGORY_AVAGRAHA /* 16 chars; Avagraha */
#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 83 chars; Bindu */
#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */
#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 58 chars; Cantillation_Mark */
#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2110 chars; Consonant */
#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 10 chars; Consonant_Dead */
#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 67 chars; Consonant_Final */
#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */
#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */
#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */
#define ISC_CM INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL /* 28 chars; Consonant_Medial */
#define ISC_CP INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER /* 21 chars; Consonant_Placeholder */
#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 2 chars; Consonant_Preceding_Repha */
#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 7 chars; Consonant_Prefixed */
#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 95 chars; Consonant_Subjoined */
#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 4 chars; Consonant_Succeeding_Repha */
#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 6 chars; Consonant_With_Stacker */
#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */
#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 11 chars; Invisible_Stacker */
#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */
#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */
#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */
#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 30 chars; Nukta */
#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 480 chars; Number */
#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */
#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */
#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 21 chars; Pure_Killer */
#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */
#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */
#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */
#define ISC_TM INDIC_SYLLABIC_CATEGORY_TONE_MARK /* 42 chars; Tone_Mark */
#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 25 chars; Virama */
#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 36 chars; Visarga */
#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */
#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 660 chars; Vowel_Dependent */
#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 464 chars; Vowel_Independent */
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 340 chars; Bottom */
#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */
#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 2 chars; Bottom_And_Right */
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 59 chars; Left */
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 21 chars; Left_And_Right */
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 276 chars; Right */
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 393 chars; Top */
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */
#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */
#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */
#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */
#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */
#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */
#pragma GCC diagnostic pop
#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)
static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {
#define indic_offset_0x0028u 0
/* Basic Latin */
/* 0028 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), _(x,x), _(x,x),
/* 0030 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0038 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
#define indic_offset_0x00b0u 24
/* Latin-1 Supplement */
/* 00B0 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 00B8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 00C0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 00C8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 00D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x),
#define indic_offset_0x0900u 64
/* Devanagari */
/* 0900 */ _(Bi,T), _(Bi,T), _(Bi,T), _(Vs,R), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 0908 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 0910 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0918 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0920 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0928 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0930 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(A,x), _(M,R), _(M,L),
/* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T),
/* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(V,B), _(M,L), _(M,R),
/* 0950 */ _(x,x), _(Ca,T), _(Ca,B), _(x,T), _(x,T), _(M,T), _(M,B), _(M,B),
/* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0960 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0968 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0970 */ _(x,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* Bengali */
/* 0980 */ _(CP,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0988 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x),
/* 0990 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 09A0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 09A8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 09B0 */ _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x),
/* 09B8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L),
/* 09C0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L),
/* 09C8 */ _(M,L), _(x,x), _(x,x), _(M,LR), _(M,LR), _(V,B), _(CD,x), _(x,x),
/* 09D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
/* 09D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x),
/* 09E0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 09E8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 09F0 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 09F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(Bi,x), _(x,x), _(SM,T), _(x,x),
/* Gurmukhi */
/* 0A00 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0A08 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x),
/* 0A10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0A18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0A20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0A28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0A30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(x,x),
/* 0A38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(x,x), _(M,R), _(M,L),
/* 0A40 */ _(M,R), _(M,B), _(M,B), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T),
/* 0A48 */ _(M,T), _(x,x), _(x,x), _(M,T), _(M,T), _(V,B), _(x,x), _(x,x),
/* 0A50 */ _(x,x), _(Ca,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0A58 */ _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x),
/* 0A60 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0A68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0A70 */ _(Bi,T), _(GM,T), _(CP,x), _(CP,x), _(x,x), _(CM,B), _(x,x), _(x,x),
/* 0A78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* Gujarati */
/* 0A80 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0A88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x),
/* 0A90 */ _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0A98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0AA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0AA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0AB0 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
/* 0AB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L),
/* 0AC0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(x,x), _(M,T),
/* 0AC8 */ _(M,T), _(M,TR), _(x,x), _(M,R), _(M,R), _(V,B), _(x,x), _(x,x),
/* 0AD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0AD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0AE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0AE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0AF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0AF8 */ _(x,x), _(C,x), _(Ca,T), _(Ca,T), _(Ca,T), _(N,T), _(N,T), _(N,T),
/* Oriya */
/* 0B00 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0B08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x),
/* 0B10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0B18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0B20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0B28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0B30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
/* 0B38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T),
/* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L),
/* 0B48 */ _(M,TL), _(x,x), _(x,x), _(M,LR),_(M,TLR), _(V,B), _(x,x), _(x,x),
/* 0B50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,TR),
/* 0B58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x),
/* 0B60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0B68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0B70 */ _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0B78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* Tamil */
/* 0B80 */ _(x,x), _(x,x), _(Bi,T), _(ML,x), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0B88 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(VI,x),
/* 0B90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(x,x), _(x,x),
/* 0B98 */ _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(C,x), _(C,x),
/* 0BA0 */ _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x),
/* 0BA8 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x),
/* 0BB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0BB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R),
/* 0BC0 */ _(M,T), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(M,L), _(M,L),
/* 0BC8 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T), _(x,x), _(x,x),
/* 0BD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
/* 0BD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0BE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0BE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0BF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0BF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* Telugu */
/* 0C00 */ _(Bi,T), _(Bi,R), _(Bi,R), _(Vs,R), _(Bi,T), _(VI,x), _(VI,x), _(VI,x),
/* 0C08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
/* 0C10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0C18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(A,x), _(M,T), _(M,T),
/* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T),
/* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x),
/* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x),
/* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0C78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* Kannada */
/* 0C80 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0C88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
/* 0C90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0CA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0CA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0CB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
/* 0CB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T),
/* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR),
/* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x),
/* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x),
/* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(x,x),
/* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0CF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* Malayalam */
/* 0D00 */ _(Bi,T), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0D08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
/* 0D10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
/* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0D20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0D28 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0D30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0D38 */ _(C,x), _(C,x), _(C,x), _(PK,T), _(PK,T), _(A,x), _(M,R), _(M,R),
/* 0D40 */ _(M,R), _(M,R), _(M,R), _(M,B), _(M,B), _(x,x), _(M,L), _(M,L),
/* 0D48 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T),_(CPR,x), _(x,x),
/* 0D50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(M,R),
/* 0D58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x),
/* 0D60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0D68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0D70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 0D78 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x),
/* Sinhala */
/* 0D80 */ _(x,x), _(x,x), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
/* 0D88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 0D90 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x),
/* 0D98 */ _(x,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0DA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0DA8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0DB0 */ _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 0DB8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(x,x),
/* 0DC0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x),
/* 0DC8 */ _(x,x), _(x,x), _(V,T), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
/* 0DD0 */ _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), _(x,x), _(M,B), _(x,x),
/* 0DD8 */ _(M,R), _(M,L), _(M,TL), _(M,L), _(M,LR),_(M,TLR), _(M,LR), _(M,R),
/* 0DE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
/* 0DE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 0DF0 */ _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(x,x),
#define indic_offset_0x1000u 1336
/* Myanmar */
/* 1000 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1008 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1010 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1018 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1020 */ _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 1028 */ _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), _(M,T), _(M,T), _(M,B),
/* 1030 */ _(M,B), _(M,L), _(M,T), _(M,T), _(M,T), _(M,T), _(Bi,T), _(TM,B),
/* 1038 */ _(Vs,R), _(IS,x), _(PK,T), _(CM,R), _(CM,x), _(CM,B), _(CM,B), _(C,x),
/* 1040 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 1048 */ _(Nd,x), _(Nd,x), _(x,x), _(CP,x), _(x,x), _(x,x), _(CP,x), _(x,x),
/* 1050 */ _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R),
/* 1058 */ _(M,B), _(M,B), _(C,x), _(C,x), _(C,x), _(C,x), _(CM,B), _(CM,B),
/* 1060 */ _(CM,B), _(C,x), _(M,R), _(TM,R), _(TM,R), _(C,x), _(C,x), _(M,R),
/* 1068 */ _(M,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(C,x), _(C,x),
/* 1070 */ _(C,x), _(M,T), _(M,T), _(M,T), _(M,T), _(C,x), _(C,x), _(C,x),
/* 1078 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1080 */ _(C,x), _(C,x), _(CM,B), _(M,R), _(M,L), _(M,T), _(M,T), _(TM,R),
/* 1088 */ _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,B), _(C,x), _(TM,R),
/* 1090 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 1098 */ _(Nd,x), _(Nd,x), _(TM,R), _(TM,R), _(M,R), _(M,T), _(x,x), _(x,x),
#define indic_offset_0x1780u 1496
/* Khmer */
/* 1780 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1788 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1790 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 1798 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* 17A0 */ _(C,x), _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 17A8 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
/* 17B0 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(M,R), _(M,T),
/* 17B8 */ _(M,T), _(M,T), _(M,T), _(M,B), _(M,B), _(M,B), _(M,TL),_(M,TLR),
/* 17C0 */ _(M,LR), _(M,L), _(M,L), _(M,L), _(M,LR), _(M,LR), _(Bi,T), _(Vs,R),
/* 17C8 */ _(M,R), _(RS,T), _(RS,T), _(SM,T),_(CSR,T), _(CK,T), _(SM,T), _(SM,T),
/* 17D0 */ _(SM,T), _(PK,T), _(IS,x), _(SM,T), _(x,x), _(x,x), _(x,x), _(x,x),
/* 17D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(A,x), _(SM,T), _(x,x), _(x,x),
/* 17E0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* 17E8 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
#define indic_offset_0x1cd0u 1608
/* Vedic Extensions */
/* 1CD0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(x,x), _(Ca,O), _(Ca,B), _(Ca,B), _(Ca,B),
/* 1CD8 */ _(Ca,B), _(Ca,B), _(Ca,T), _(Ca,T), _(Ca,B), _(Ca,B), _(Ca,B), _(Ca,B),
/* 1CE0 */ _(Ca,T), _(Ca,R), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O),
/* 1CE8 */ _(x,O), _(x,x), _(x,x), _(x,x), _(x,x), _(x,B), _(x,x), _(x,x),
/* 1CF0 */ _(x,x), _(x,x), _(Vs,x), _(Vs,x), _(Ca,T),_(CWS,x),_(CWS,x), _(Ca,R),
/* 1CF8 */ _(Ca,x), _(Ca,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
#define indic_offset_0x2008u 1656
/* General Punctuation */
/* 2008 */ _(x,x), _(x,x), _(x,x), _(x,x),_(ZWNJ,x),_(ZWJ,x), _(x,x), _(x,x),
/* 2010 */ _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), _(x,x), _(x,x),
#define indic_offset_0x2070u 1672
/* Superscripts and Subscripts */
/* 2070 */ _(x,x), _(x,x), _(x,x), _(x,x), _(SM,x), _(x,x), _(x,x), _(x,x),
/* 2078 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* 2080 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x),
#define indic_offset_0xa8e0u 1696
/* Devanagari Extended */
/* A8E0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T),
/* A8E8 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T),
/* A8F0 */ _(Ca,T), _(Ca,T), _(Bi,x), _(Bi,x), _(x,x), _(x,x), _(x,x), _(x,x),
/* A8F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(M,T),
#define indic_offset_0xa9e0u 1728
/* Myanmar Extended-B */
/* A9E0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(M,T), _(x,x), _(C,x),
/* A9E8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* A9F0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
/* A9F8 */ _(Nd,x), _(Nd,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x),
#define indic_offset_0xaa60u 1760
/* Myanmar Extended-A */
/* AA60 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* AA68 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
/* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x),
/* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x),
}; /* Table items: 1792; occupancy: 70% */
INDIC_TABLE_ELEMENT_TYPE
_unicode_indic_get_categories (Uchar32 u)
{
switch (u >> 12)
{
case 0x0u:
if (unlikely (u == 0x00A0u)) return _(CP,x);
if (uc_in_range(u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u];
if (uc_in_range(u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u];
if (uc_in_range(u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u];
break;
case 0x1u:
if (uc_in_range(u, 0x1000u, 0x109Fu)) return indic_table[u - 0x1000u + indic_offset_0x1000u];
if (uc_in_range(u, 0x1780u, 0x17EFu)) return indic_table[u - 0x1780u + indic_offset_0x1780u];
if (uc_in_range(u, 0x1CD0u, 0x1CFFu)) return indic_table[u - 0x1CD0u + indic_offset_0x1cd0u];
break;
case 0x2u:
if (unlikely (u == 0x25CCu)) return _(CP,x);
if (uc_in_range(u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u];
if (uc_in_range(u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u];
break;
case 0xAu:
if (uc_in_range(u, 0xA8E0u, 0xA8FFu)) return indic_table[u - 0xA8E0u + indic_offset_0xa8e0u];
if (uc_in_range(u, 0xA9E0u, 0xA9FFu)) return indic_table[u - 0xA9E0u + indic_offset_0xa9e0u];
if (uc_in_range(u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u];
break;
default:
break;
}
return _(x,x);
}
#undef _
#undef ISC_A
#undef ISC_Bi
#undef ISC_BJN
#undef ISC_Ca
#undef ISC_C
#undef ISC_CD
#undef ISC_CF
#undef ISC_CHL
#undef ISC_CIP
#undef ISC_CK
#undef ISC_CM
#undef ISC_CP
#undef ISC_CPR
#undef ISC_CPrf
#undef ISC_CS
#undef ISC_CSR
#undef ISC_CWS
#undef ISC_GM
#undef ISC_IS
#undef ISC_ZWJ
#undef ISC_ML
#undef ISC_ZWNJ
#undef ISC_N
#undef ISC_Nd
#undef ISC_NJ
#undef ISC_x
#undef ISC_PK
#undef ISC_RS
#undef ISC_SM
#undef ISC_TL
#undef ISC_TM
#undef ISC_V
#undef ISC_Vs
#undef ISC_Vo
#undef ISC_M
#undef ISC_VI
#undef IMC_B
#undef IMC_BL
#undef IMC_BR
#undef IMC_L
#undef IMC_LR
#undef IMC_x
#undef IMC_O
#undef IMC_R
#undef IMC_T
#undef IMC_TB
#undef IMC_TBR
#undef IMC_TL
#undef IMC_TLR
#undef IMC_TR
#undef IMC_VOL
/* == End of generated table == */

View File

@@ -0,0 +1,97 @@
# Copied from https://docs.microsoft.com/en-us/typography/script-development/use
# On October 23, 2018; with documentd dated 02/07/2018.
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI

View File

@@ -1,7 +1,7 @@
#!/bin/bash
url='https://www.unicode.org/Public/11.0.0/ucd/'
files='UnicodeData.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt ArabicShaping.txt BidiBrackets.txt BidiMirroring.txt Blocks.txt'
files='Scripts.txt UnicodeData.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt ArabicShaping.txt BidiBrackets.txt BidiMirroring.txt Blocks.txt'
if test ! -d ucd; then
mkdir ucd

270
src/font/utils/gen-indic-table.py Executable file
View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python
# Copied from HarfBuzz
# https://github.com/harfbuzz/harfbuzz
# On 2019-03-07
#
# Revised by Vincent Wei for MiniGUI 3.4
from __future__ import print_function, division, absolute_import
import io, sys
#if len (sys.argv) != 4:
# print ("usage: ./gen-indic-table.py", file=sys.stderr)
# sys.exit (1)
ALLOWED_SINGLES = [0x00A0, 0x25CC]
ALLOWED_BLOCKS = [
'Basic Latin',
'Latin-1 Supplement',
'Devanagari',
'Bengali',
'Gurmukhi',
'Gujarati',
'Oriya',
'Tamil',
'Telugu',
'Kannada',
'Malayalam',
'Sinhala',
'Myanmar',
'Khmer',
'Vedic Extensions',
'General Punctuation',
'Superscripts and Subscripts',
'Devanagari Extended',
'Myanmar Extended-B',
'Myanmar Extended-A',
]
filenames = ['ucd/IndicSyllabicCategory.txt', 'ucd/IndicPositionalCategory.txt', 'ucd/Blocks.txt']
files = [io.open (x, encoding='utf-8') for x in filenames]
headers = [[f.readline () for i in range (2)] for f in files]
data = [{} for f in files]
values = [{} for f in files]
for i, f in enumerate (files):
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
uu = fields[0].split ('..')
start = int (uu[0], 16)
if len (uu) == 1:
end = start
else:
end = int (uu[1], 16)
t = fields[1]
for u in range (start, end + 1):
data[i][u] = t
values[i][t] = values[i].get (t, 0) + end - start + 1
# Merge data into one dict:
defaults = ('Other', 'Not_Applicable', 'No_Block')
for i,v in enumerate (defaults):
values[i][v] = values[i].get (v, 0) + 1
combined = {}
for i,d in enumerate (data):
for u,v in d.items ():
if i == 2 and not u in combined:
continue
if not u in combined:
combined[u] = list (defaults)
combined[u][i] = v
combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
data = combined
del combined
num = len (data)
for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]:
if data[u][0] == 'Other':
data[u][0] = "Vowel_Dependent"
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
singles = {}
for u in ALLOWED_SINGLES:
singles[u] = data[u]
del data[u]
print ("/* == Start of generated table == */")
print ("/*")
print (" * The following table is generated by running:")
print (" *")
print (" * ./gen-indic-table.py")
print (" *")
print (" * on ucd/IndicSyllabicCategory.txt ucd/IndicPositionalCategory.txt ucd/Blocks.txt files with these headers:")
print (" *")
for h in headers:
for l in h:
print (" * %s" % (l.strip()))
print (" */")
print ()
print ('#include "unicode-shape-complex-indic.h"')
print ()
# Shorten values
short = [{
"Bindu": 'Bi',
"Cantillation_Mark": 'Ca',
"Joiner": 'ZWJ',
"Non_Joiner": 'ZWNJ',
"Number": 'Nd',
"Visarga": 'Vs',
"Vowel": 'Vo',
"Vowel_Dependent": 'M',
"Consonant_Prefixed": 'CPrf',
"Other": 'x',
},{
"Not_Applicable": 'x',
}]
all_shorts = [{},{}]
# Add some of the values, to make them more readable, and to avoid duplicates
for i in range (2):
for v,s in short[i].items ():
all_shorts[i][s] = v
what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
what_short = ["ISC", "IMC"]
print ('#pragma GCC diagnostic push')
print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
for i in range (2):
print ()
vv = sorted (values[i].keys ())
for v in vv:
v_no_and = v.replace ('_And_', '_')
if v in short[i]:
s = short[i][v]
else:
s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
if s in all_shorts[i]:
raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
all_shorts[i][s] = v
short[i][v] = s
print ("#define %s_%s %s_%s %s/* %3d chars; %s */" %
(what_short[i], s, what[i], v.upper (),
' '* ((48-1 - len (what[i]) - 1 - len (v)) // 8),
values[i][v], v))
print ('#pragma GCC diagnostic pop')
print ()
print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)")
print ()
print ()
total = 0
used = 0
last_block = None
def print_block (block, start, end, data):
global total, used, last_block
if block and block != last_block:
print ()
print ()
print (" /* %s */" % block)
num = 0
assert start % 8 == 0
assert (end+1) % 8 == 0
for u in range (start, end+1):
if u % 8 == 0:
print ()
print (" /* %04X */" % u, end="")
if u in data:
num += 1
d = data.get (u, defaults)
print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="")
total += end - start + 1
used += num
if block:
last_block = block
uu = sorted (data.keys ())
last = -100000
num = 0
offset = 0
starts = []
ends = []
print ("static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {")
for u in uu:
if u <= last:
continue
block = data[u][2]
start = u//8*8
end = start+1
while end in uu and block == data[end][2]:
end += 1
end = (end-1)//8*8 + 7
if start != last + 1:
if start - last <= 1+16*3:
print_block (None, last+1, start-1, data)
last = start-1
else:
if last >= 0:
ends.append (last + 1)
offset += ends[-1] - starts[-1]
print ()
print ()
print ("#define indic_offset_0x%04xu %d" % (start, offset))
starts.append (start)
print_block (block, start, end, data)
last = end
ends.append (last + 1)
offset += ends[-1] - starts[-1]
print ()
print ()
occupancy = used * 100. / total
page_bits = 12
print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
print ()
print ("INDIC_TABLE_ELEMENT_TYPE")
print ("_unicode_indic_get_categories (Uchar32 u)")
print ("{")
print (" switch (u >> %d)" % page_bits)
print (" {")
pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())])
for p in sorted(pages):
print (" case 0x%0Xu:" % p)
for u,d in singles.items ():
if p != u>>page_bits: continue
print (" if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]))
for (start,end) in zip (starts, ends):
if p not in [start>>page_bits, end>>page_bits]: continue
offset = "indic_offset_0x%04xu" % start
print (" if (uc_in_range(u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
print (" break;")
print ("")
print (" default:")
print (" break;")
print (" }")
print (" return _(x,x);")
print ("}")
print ()
print ("#undef _")
for i in range (2):
print
vv = sorted (values[i].keys ())
for v in vv:
print ("#undef %s_%s" %
(what_short[i], short[i][v]))
print ()
print ("/* == End of generated table == */")
# Maintain at least 30% occupancy in the table */
if occupancy < 30:
raise Exception ("Table too sparse, please investigate: ", occupancy)

View File

@@ -0,0 +1,219 @@
#!/usr/bin/python
"""Generator of the function to prohibit certain vowel sequences.
It creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted
circles into sequences prohibited by the USE script development spec.
This function should be used as the ``preprocess_text`` of an
``hb_ot_complex_shaper_t``.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import collections
try:
from HTMLParser import HTMLParser
def write (s):
print (s.encode ('utf-8'), end='')
except ImportError:
from html.parser import HTMLParser
def write (s):
sys.stdout.flush ()
sys.stdout.buffer.write (s.encode ('utf-8'))
import itertools
import io
import sys
#if len (sys.argv) != 3:
# print ('usage: ./gen-vowel-constraints.py', file=sys.stderr)
# sys.exit (1)
with io.open ('ucd/Scripts.txt', encoding='utf-8') as f:
scripts_header = [f.readline () for i in range (2)]
scripts = {}
script_order = {}
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
fields = [x.strip () for x in line.split (';')]
if len (fields) == 1:
continue
uu = fields[0].split ('..')
start = int (uu[0], 16)
if len (uu) == 1:
end = start
else:
end = int (uu[1], 16)
script = fields[1]
for u in range (start, end + 1):
scripts[u] = script
if script not in script_order:
script_order[script] = start
class ConstraintSet (object):
"""A set of prohibited code point sequences.
Args:
constraint (List[int]): A prohibited code point sequence.
"""
def __init__ (self, constraint):
# Either a list or a dictionary. As a list of code points, it
# represents a prohibited code point sequence. As a dictionary,
# it represents a set of prohibited sequences, where each item
# represents the set of prohibited sequences starting with the
# key (a code point) concatenated with any of the values
# (ConstraintSets).
self._c = constraint
def add (self, constraint):
"""Add a constraint to this set."""
if not constraint:
return
first = constraint[0]
rest = constraint[1:]
if isinstance (self._c, list):
if constraint == self._c[:len (constraint)]:
self._c = constraint
elif self._c != constraint[:len (self._c)]:
self._c = {self._c[0]: ConstraintSet (self._c[1:])}
if isinstance (self._c, dict):
if first in self._c:
self._c[first].add (rest)
else:
self._c[first] = ConstraintSet (rest)
def _indent (self, depth):
return (' ' * depth).replace (' ', '\t')
def __str__ (self, index=0, depth=4):
s = []
indent = self._indent (depth)
if isinstance (self._c, list):
if len (self._c) == 0:
s.append ('{}matched = true;\n'.format (indent))
elif len (self._c) == 1:
s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or ''))
else:
s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index))
s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), len (self._c)))
for i, cp in enumerate (self._c[1:], start=1):
s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format (
self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&'))
s.append ('{}{{\n'.format (indent))
for i in range (len (self._c)):
s.append ('{}buffer->next_glyph ();\n'.format (self._indent (depth + 1)))
s.append ('{}_output_dotted_circle (buffer);\n'.format (self._indent (depth + 1)))
s.append ('{}}}\n'.format (indent))
else:
s.append ('{}switch (buffer->cur ({}).codepoint)\n'.format(indent, index or ''))
s.append ('{}{{\n'.format (indent))
cases = collections.defaultdict (set)
for first, rest in sorted (self._c.items ()):
cases[rest.__str__ (index + 1, depth + 2)].add (first)
for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]):
for i, cp in enumerate (sorted (labels)):
if i % 4 == 0:
s.append (self._indent (depth + 1))
else:
s.append (' ')
s.append ('case 0x{:04X}u:{}'.format (cp, '\n' if i % 4 == 3 else ''))
if len (labels) % 4 != 0:
s.append ('\n')
s.append (body)
s.append ('{}break;\n'.format (self._indent (depth + 2)))
s.append ('{}}}\n'.format (indent))
return ''.join (s)
constraints = {}
with io.open ('IndicVowelConstraints.txt', encoding='utf-8') as f:
constraints_header = [f.readline ().strip () for i in range (2)]
for line in f:
j = line.find ('#')
if j >= 0:
line = line[:j]
constraint = [int (cp, 16) for cp in line.split (';')[0].split ()]
if not constraint: continue
assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint)
script = scripts[constraint[0]]
if script in constraints:
constraints[script].add (constraint)
else:
constraints[script] = ConstraintSet (constraint)
assert constraints, 'No constraints found'
print ('/* == Start of generated functions == */')
print ('/*')
print (' * The following functions are generated by running:')
print (' *')
print (' * %s' % sys.argv[0])
print (' *')
print (' * on IndicVowelConstraints.txt and ucd/Scripts.txt files with these headers:')
print (' *')
for line in constraints_header:
print (' * %s' % line.strip ())
print (' *')
for line in scripts_header:
print (' * %s' % line.strip ())
print (' */')
print ()
print ('#include "hb-ot-shape-complex-vowel-constraints.hh"')
print ()
print ('static void')
print ('_output_dotted_circle (hb_buffer_t *buffer)')
print ('{')
print (' hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);')
print (' _hb_glyph_info_reset_continuation (&dottedcircle);')
print ('}')
print ()
print ('static void')
print ('_output_with_dotted_circle (hb_buffer_t *buffer)')
print ('{')
print (' _output_dotted_circle (buffer);')
print (' buffer->next_glyph ();')
print ('}')
print ()
print ('void')
print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,')
print ('\t\t\t\t hb_buffer_t *buffer,')
print ('\t\t\t\t hb_font_t *font HB_UNUSED)')
print ('{')
print (' /* UGLY UGLY UGLY business of adding dotted-circle in the middle of')
print (' * vowel-sequences that look like another vowel. Data for each script')
print (' * collected from the USE script development spec.')
print (' *')
print (' * https://github.com/harfbuzz/harfbuzz/issues/1019')
print (' */')
print (' bool processed = false;')
print (' buffer->clear_output ();')
print (' unsigned int count = buffer->len;')
print (' switch ((unsigned) buffer->props.script)')
print (' {')
for script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]):
print (' case HB_SCRIPT_{}:'.format (script.upper ()))
print (' for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)')
print (' {')
print ('\tbool matched = false;')
write (str (constraints))
print ('\tbuffer->next_glyph ();')
print ('\tif (matched) _output_with_dotted_circle (buffer);')
print (' }')
print (' processed = true;')
print (' break;')
print ()
print (' default:')
print (' break;')
print (' }')
print (' if (processed)')
print (' {')
print (' if (buffer->idx < count)')
print (' buffer->next_glyph ();')
print (' }')
print ('}')
print ()
print ('/* == End of generated functions == */')