mirror of
https://github.com/VincentWei/MiniGUI.git
synced 2026-02-08 03:41:52 +08:00
Indic shaping table
This commit is contained in:
490
src/font/unicode-shape-complex-indic-table.c
Normal file
490
src/font/unicode-shape-complex-indic-table.c
Normal file
@@ -0,0 +1,490 @@
|
||||
/* == Start of generated table == */
|
||||
/*
|
||||
* The following table is generated by running:
|
||||
*
|
||||
* ./gen-indic-table.py
|
||||
*
|
||||
* on ucd/IndicSyllabicCategory.txt ucd/IndicPositionalCategory.txt ucd/Blocks.txt files with these headers:
|
||||
*
|
||||
* # IndicSyllabicCategory-11.0.0.txt
|
||||
* # Date: 2018-05-21, 18:33:00 GMT [KW, RP]
|
||||
* # IndicPositionalCategory-11.0.0.txt
|
||||
* # Date: 2018-02-05, 16:21:00 GMT [KW, RP]
|
||||
* # Blocks-11.0.0.txt
|
||||
* # Date: 2017-10-16, 24:39:00 GMT [KW]
|
||||
*/
|
||||
|
||||
#include "unicode-shape-complex-indic.h"
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-macros"
|
||||
|
||||
#define ISC_A INDIC_SYLLABIC_CATEGORY_AVAGRAHA /* 16 chars; Avagraha */
|
||||
#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 83 chars; Bindu */
|
||||
#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */
|
||||
#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 58 chars; Cantillation_Mark */
|
||||
#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2110 chars; Consonant */
|
||||
#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 10 chars; Consonant_Dead */
|
||||
#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 67 chars; Consonant_Final */
|
||||
#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */
|
||||
#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */
|
||||
#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */
|
||||
#define ISC_CM INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL /* 28 chars; Consonant_Medial */
|
||||
#define ISC_CP INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER /* 21 chars; Consonant_Placeholder */
|
||||
#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 2 chars; Consonant_Preceding_Repha */
|
||||
#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 7 chars; Consonant_Prefixed */
|
||||
#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 95 chars; Consonant_Subjoined */
|
||||
#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 4 chars; Consonant_Succeeding_Repha */
|
||||
#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 6 chars; Consonant_With_Stacker */
|
||||
#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */
|
||||
#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 11 chars; Invisible_Stacker */
|
||||
#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */
|
||||
#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */
|
||||
#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */
|
||||
#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 30 chars; Nukta */
|
||||
#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 480 chars; Number */
|
||||
#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */
|
||||
#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */
|
||||
#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 21 chars; Pure_Killer */
|
||||
#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */
|
||||
#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */
|
||||
#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */
|
||||
#define ISC_TM INDIC_SYLLABIC_CATEGORY_TONE_MARK /* 42 chars; Tone_Mark */
|
||||
#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 25 chars; Virama */
|
||||
#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 36 chars; Visarga */
|
||||
#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */
|
||||
#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 660 chars; Vowel_Dependent */
|
||||
#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 464 chars; Vowel_Independent */
|
||||
|
||||
#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 340 chars; Bottom */
|
||||
#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */
|
||||
#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 2 chars; Bottom_And_Right */
|
||||
#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 59 chars; Left */
|
||||
#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 21 chars; Left_And_Right */
|
||||
#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */
|
||||
#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */
|
||||
#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 276 chars; Right */
|
||||
#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 393 chars; Top */
|
||||
#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */
|
||||
#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */
|
||||
#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */
|
||||
#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */
|
||||
#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */
|
||||
#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)
|
||||
|
||||
|
||||
static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {
|
||||
|
||||
|
||||
#define indic_offset_0x0028u 0
|
||||
|
||||
|
||||
/* Basic Latin */
|
||||
|
||||
/* 0028 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), _(x,x), _(x,x),
|
||||
/* 0030 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0038 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0x00b0u 24
|
||||
|
||||
|
||||
/* Latin-1 Supplement */
|
||||
|
||||
/* 00B0 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 00B8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 00C0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 00C8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 00D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x),
|
||||
|
||||
#define indic_offset_0x0900u 64
|
||||
|
||||
|
||||
/* Devanagari */
|
||||
|
||||
/* 0900 */ _(Bi,T), _(Bi,T), _(Bi,T), _(Vs,R), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0908 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0910 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0918 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0920 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0928 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0930 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(A,x), _(M,R), _(M,L),
|
||||
/* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T),
|
||||
/* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(V,B), _(M,L), _(M,R),
|
||||
/* 0950 */ _(x,x), _(Ca,T), _(Ca,B), _(x,T), _(x,T), _(M,T), _(M,B), _(M,B),
|
||||
/* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0960 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0968 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0970 */ _(x,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
|
||||
/* Bengali */
|
||||
|
||||
/* 0980 */ _(CP,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0988 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x),
|
||||
/* 0990 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 09A0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 09A8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 09B0 */ _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x),
|
||||
/* 09B8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L),
|
||||
/* 09C0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L),
|
||||
/* 09C8 */ _(M,L), _(x,x), _(x,x), _(M,LR), _(M,LR), _(V,B), _(CD,x), _(x,x),
|
||||
/* 09D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
|
||||
/* 09D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x),
|
||||
/* 09E0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 09E8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 09F0 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 09F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(Bi,x), _(x,x), _(SM,T), _(x,x),
|
||||
|
||||
/* Gurmukhi */
|
||||
|
||||
/* 0A00 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0A08 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x),
|
||||
/* 0A10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0A18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0A20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0A28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0A30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(x,x),
|
||||
/* 0A38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(x,x), _(M,R), _(M,L),
|
||||
/* 0A40 */ _(M,R), _(M,B), _(M,B), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T),
|
||||
/* 0A48 */ _(M,T), _(x,x), _(x,x), _(M,T), _(M,T), _(V,B), _(x,x), _(x,x),
|
||||
/* 0A50 */ _(x,x), _(Ca,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0A58 */ _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x),
|
||||
/* 0A60 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0A68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0A70 */ _(Bi,T), _(GM,T), _(CP,x), _(CP,x), _(x,x), _(CM,B), _(x,x), _(x,x),
|
||||
/* 0A78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
/* Gujarati */
|
||||
|
||||
/* 0A80 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0A88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x),
|
||||
/* 0A90 */ _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0A98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0AA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0AA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0AB0 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0AB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L),
|
||||
/* 0AC0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(x,x), _(M,T),
|
||||
/* 0AC8 */ _(M,T), _(M,TR), _(x,x), _(M,R), _(M,R), _(V,B), _(x,x), _(x,x),
|
||||
/* 0AD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0AD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0AE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0AE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0AF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0AF8 */ _(x,x), _(C,x), _(Ca,T), _(Ca,T), _(Ca,T), _(N,T), _(N,T), _(N,T),
|
||||
|
||||
/* Oriya */
|
||||
|
||||
/* 0B00 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0B08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x),
|
||||
/* 0B10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0B18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0B20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0B28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0B30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0B38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T),
|
||||
/* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L),
|
||||
/* 0B48 */ _(M,TL), _(x,x), _(x,x), _(M,LR),_(M,TLR), _(V,B), _(x,x), _(x,x),
|
||||
/* 0B50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,TR),
|
||||
/* 0B58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x),
|
||||
/* 0B60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0B68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0B70 */ _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0B78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
/* Tamil */
|
||||
|
||||
/* 0B80 */ _(x,x), _(x,x), _(Bi,T), _(ML,x), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0B88 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(VI,x),
|
||||
/* 0B90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(x,x), _(x,x),
|
||||
/* 0B98 */ _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(C,x), _(C,x),
|
||||
/* 0BA0 */ _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0BA8 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x),
|
||||
/* 0BB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0BB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R),
|
||||
/* 0BC0 */ _(M,T), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(M,L), _(M,L),
|
||||
/* 0BC8 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T), _(x,x), _(x,x),
|
||||
/* 0BD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
|
||||
/* 0BD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0BE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0BE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0BF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0BF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
/* Telugu */
|
||||
|
||||
/* 0C00 */ _(Bi,T), _(Bi,R), _(Bi,R), _(Vs,R), _(Bi,T), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0C08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
|
||||
/* 0C10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(A,x), _(M,T), _(M,T),
|
||||
/* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T),
|
||||
/* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x),
|
||||
/* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x),
|
||||
/* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0C78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
/* Kannada */
|
||||
|
||||
/* 0C80 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0C88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
|
||||
/* 0C90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0CA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0CA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0CB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0CB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T),
|
||||
/* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR),
|
||||
/* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x),
|
||||
/* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x),
|
||||
/* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(x,x),
|
||||
/* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0CF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
/* Malayalam */
|
||||
|
||||
/* 0D00 */ _(Bi,T), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0D08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x),
|
||||
/* 0D10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0D20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0D28 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0D30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0D38 */ _(C,x), _(C,x), _(C,x), _(PK,T), _(PK,T), _(A,x), _(M,R), _(M,R),
|
||||
/* 0D40 */ _(M,R), _(M,R), _(M,R), _(M,B), _(M,B), _(x,x), _(M,L), _(M,L),
|
||||
/* 0D48 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T),_(CPR,x), _(x,x),
|
||||
/* 0D50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(M,R),
|
||||
/* 0D58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x),
|
||||
/* 0D60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0D68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0D70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 0D78 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x),
|
||||
|
||||
/* Sinhala */
|
||||
|
||||
/* 0D80 */ _(x,x), _(x,x), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0D88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 0D90 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x),
|
||||
/* 0D98 */ _(x,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0DA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0DA8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0DB0 */ _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 0DB8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(x,x),
|
||||
/* 0DC0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x),
|
||||
/* 0DC8 */ _(x,x), _(x,x), _(V,T), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R),
|
||||
/* 0DD0 */ _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), _(x,x), _(M,B), _(x,x),
|
||||
/* 0DD8 */ _(M,R), _(M,L), _(M,TL), _(M,L), _(M,LR),_(M,TLR), _(M,LR), _(M,R),
|
||||
/* 0DE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x),
|
||||
/* 0DE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 0DF0 */ _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0x1000u 1336
|
||||
|
||||
|
||||
/* Myanmar */
|
||||
|
||||
/* 1000 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1008 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1010 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1018 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1020 */ _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 1028 */ _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), _(M,T), _(M,T), _(M,B),
|
||||
/* 1030 */ _(M,B), _(M,L), _(M,T), _(M,T), _(M,T), _(M,T), _(Bi,T), _(TM,B),
|
||||
/* 1038 */ _(Vs,R), _(IS,x), _(PK,T), _(CM,R), _(CM,x), _(CM,B), _(CM,B), _(C,x),
|
||||
/* 1040 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 1048 */ _(Nd,x), _(Nd,x), _(x,x), _(CP,x), _(x,x), _(x,x), _(CP,x), _(x,x),
|
||||
/* 1050 */ _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R),
|
||||
/* 1058 */ _(M,B), _(M,B), _(C,x), _(C,x), _(C,x), _(C,x), _(CM,B), _(CM,B),
|
||||
/* 1060 */ _(CM,B), _(C,x), _(M,R), _(TM,R), _(TM,R), _(C,x), _(C,x), _(M,R),
|
||||
/* 1068 */ _(M,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(C,x), _(C,x),
|
||||
/* 1070 */ _(C,x), _(M,T), _(M,T), _(M,T), _(M,T), _(C,x), _(C,x), _(C,x),
|
||||
/* 1078 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1080 */ _(C,x), _(C,x), _(CM,B), _(M,R), _(M,L), _(M,T), _(M,T), _(TM,R),
|
||||
/* 1088 */ _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,B), _(C,x), _(TM,R),
|
||||
/* 1090 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 1098 */ _(Nd,x), _(Nd,x), _(TM,R), _(TM,R), _(M,R), _(M,T), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0x1780u 1496
|
||||
|
||||
|
||||
/* Khmer */
|
||||
|
||||
/* 1780 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1788 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1790 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 1798 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* 17A0 */ _(C,x), _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 17A8 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x),
|
||||
/* 17B0 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(M,R), _(M,T),
|
||||
/* 17B8 */ _(M,T), _(M,T), _(M,T), _(M,B), _(M,B), _(M,B), _(M,TL),_(M,TLR),
|
||||
/* 17C0 */ _(M,LR), _(M,L), _(M,L), _(M,L), _(M,LR), _(M,LR), _(Bi,T), _(Vs,R),
|
||||
/* 17C8 */ _(M,R), _(RS,T), _(RS,T), _(SM,T),_(CSR,T), _(CK,T), _(SM,T), _(SM,T),
|
||||
/* 17D0 */ _(SM,T), _(PK,T), _(IS,x), _(SM,T), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 17D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(A,x), _(SM,T), _(x,x), _(x,x),
|
||||
/* 17E0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* 17E8 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0x1cd0u 1608
|
||||
|
||||
|
||||
/* Vedic Extensions */
|
||||
|
||||
/* 1CD0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(x,x), _(Ca,O), _(Ca,B), _(Ca,B), _(Ca,B),
|
||||
/* 1CD8 */ _(Ca,B), _(Ca,B), _(Ca,T), _(Ca,T), _(Ca,B), _(Ca,B), _(Ca,B), _(Ca,B),
|
||||
/* 1CE0 */ _(Ca,T), _(Ca,R), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O),
|
||||
/* 1CE8 */ _(x,O), _(x,x), _(x,x), _(x,x), _(x,x), _(x,B), _(x,x), _(x,x),
|
||||
/* 1CF0 */ _(x,x), _(x,x), _(Vs,x), _(Vs,x), _(Ca,T),_(CWS,x),_(CWS,x), _(Ca,R),
|
||||
/* 1CF8 */ _(Ca,x), _(Ca,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0x2008u 1656
|
||||
|
||||
|
||||
/* General Punctuation */
|
||||
|
||||
/* 2008 */ _(x,x), _(x,x), _(x,x), _(x,x),_(ZWNJ,x),_(ZWJ,x), _(x,x), _(x,x),
|
||||
/* 2010 */ _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0x2070u 1672
|
||||
|
||||
|
||||
/* Superscripts and Subscripts */
|
||||
|
||||
/* 2070 */ _(x,x), _(x,x), _(x,x), _(x,x), _(SM,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 2078 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* 2080 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x),
|
||||
|
||||
#define indic_offset_0xa8e0u 1696
|
||||
|
||||
|
||||
/* Devanagari Extended */
|
||||
|
||||
/* A8E0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T),
|
||||
/* A8E8 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T),
|
||||
/* A8F0 */ _(Ca,T), _(Ca,T), _(Bi,x), _(Bi,x), _(x,x), _(x,x), _(x,x), _(x,x),
|
||||
/* A8F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(M,T),
|
||||
|
||||
#define indic_offset_0xa9e0u 1728
|
||||
|
||||
|
||||
/* Myanmar Extended-B */
|
||||
|
||||
/* A9E0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(M,T), _(x,x), _(C,x),
|
||||
/* A9E8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* A9F0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x),
|
||||
/* A9F8 */ _(Nd,x), _(Nd,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x),
|
||||
|
||||
#define indic_offset_0xaa60u 1760
|
||||
|
||||
|
||||
/* Myanmar Extended-A */
|
||||
|
||||
/* AA60 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* AA68 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x),
|
||||
/* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x),
|
||||
/* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x),
|
||||
|
||||
}; /* Table items: 1792; occupancy: 70% */
|
||||
|
||||
INDIC_TABLE_ELEMENT_TYPE
|
||||
_unicode_indic_get_categories (Uchar32 u)
|
||||
{
|
||||
switch (u >> 12)
|
||||
{
|
||||
case 0x0u:
|
||||
if (unlikely (u == 0x00A0u)) return _(CP,x);
|
||||
if (uc_in_range(u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u];
|
||||
if (uc_in_range(u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u];
|
||||
if (uc_in_range(u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u];
|
||||
break;
|
||||
|
||||
case 0x1u:
|
||||
if (uc_in_range(u, 0x1000u, 0x109Fu)) return indic_table[u - 0x1000u + indic_offset_0x1000u];
|
||||
if (uc_in_range(u, 0x1780u, 0x17EFu)) return indic_table[u - 0x1780u + indic_offset_0x1780u];
|
||||
if (uc_in_range(u, 0x1CD0u, 0x1CFFu)) return indic_table[u - 0x1CD0u + indic_offset_0x1cd0u];
|
||||
break;
|
||||
|
||||
case 0x2u:
|
||||
if (unlikely (u == 0x25CCu)) return _(CP,x);
|
||||
if (uc_in_range(u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u];
|
||||
if (uc_in_range(u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u];
|
||||
break;
|
||||
|
||||
case 0xAu:
|
||||
if (uc_in_range(u, 0xA8E0u, 0xA8FFu)) return indic_table[u - 0xA8E0u + indic_offset_0xa8e0u];
|
||||
if (uc_in_range(u, 0xA9E0u, 0xA9FFu)) return indic_table[u - 0xA9E0u + indic_offset_0xa9e0u];
|
||||
if (uc_in_range(u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return _(x,x);
|
||||
}
|
||||
|
||||
#undef _
|
||||
#undef ISC_A
|
||||
#undef ISC_Bi
|
||||
#undef ISC_BJN
|
||||
#undef ISC_Ca
|
||||
#undef ISC_C
|
||||
#undef ISC_CD
|
||||
#undef ISC_CF
|
||||
#undef ISC_CHL
|
||||
#undef ISC_CIP
|
||||
#undef ISC_CK
|
||||
#undef ISC_CM
|
||||
#undef ISC_CP
|
||||
#undef ISC_CPR
|
||||
#undef ISC_CPrf
|
||||
#undef ISC_CS
|
||||
#undef ISC_CSR
|
||||
#undef ISC_CWS
|
||||
#undef ISC_GM
|
||||
#undef ISC_IS
|
||||
#undef ISC_ZWJ
|
||||
#undef ISC_ML
|
||||
#undef ISC_ZWNJ
|
||||
#undef ISC_N
|
||||
#undef ISC_Nd
|
||||
#undef ISC_NJ
|
||||
#undef ISC_x
|
||||
#undef ISC_PK
|
||||
#undef ISC_RS
|
||||
#undef ISC_SM
|
||||
#undef ISC_TL
|
||||
#undef ISC_TM
|
||||
#undef ISC_V
|
||||
#undef ISC_Vs
|
||||
#undef ISC_Vo
|
||||
#undef ISC_M
|
||||
#undef ISC_VI
|
||||
#undef IMC_B
|
||||
#undef IMC_BL
|
||||
#undef IMC_BR
|
||||
#undef IMC_L
|
||||
#undef IMC_LR
|
||||
#undef IMC_x
|
||||
#undef IMC_O
|
||||
#undef IMC_R
|
||||
#undef IMC_T
|
||||
#undef IMC_TB
|
||||
#undef IMC_TBR
|
||||
#undef IMC_TL
|
||||
#undef IMC_TLR
|
||||
#undef IMC_TR
|
||||
#undef IMC_VOL
|
||||
|
||||
/* == End of generated table == */
|
||||
97
src/font/utils/IndicVowelConstraints.txt
Normal file
97
src/font/utils/IndicVowelConstraints.txt
Normal file
@@ -0,0 +1,97 @@
|
||||
# Copied from https://docs.microsoft.com/en-us/typography/script-development/use
|
||||
# On October 23, 2018; with documentd dated 02/07/2018.
|
||||
|
||||
0905 0946 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT E
|
||||
0905 093E ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AA
|
||||
0930 094D 0907 ; # DEVANAGARI LETTER RA, DEVANAGARI SIGN VIRAMA, DEVANAGARI LETTER I
|
||||
0909 0941 ; # DEVANAGARI LETTER U, DEVANAGARI VOWEL SIGN U
|
||||
090F 0945 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN CANDRA E
|
||||
090F 0946 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN SHORT E
|
||||
090F 0947 ; # DEVANAGARI LETTER E, DEVANAGARI VOWEL SIGN E
|
||||
0905 0949 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA O
|
||||
0906 0945 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN CANDRA E
|
||||
0905 094A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN SHORT O
|
||||
0906 0946 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN SHORT E
|
||||
0905 094B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN O
|
||||
0906 0947 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN E
|
||||
0905 094C ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AU
|
||||
0906 0948 ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN AI
|
||||
0905 0945 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN CANDRA E
|
||||
0905 093A ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OE
|
||||
0905 093B ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN OOE
|
||||
0906 093A ; # DEVANAGARI LETTER AA, DEVANAGARI VOWEL SIGN OE
|
||||
0905 094F ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN AW
|
||||
0905 0956 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UE
|
||||
0905 0957 ; # DEVANAGARI LETTER A, DEVANAGARI VOWEL SIGN UUE
|
||||
0985 09BE ; # BENGALI LETTER A, BENGALI VOWEL SIGN AA
|
||||
098B 09C3 ; # BENGALI LETTER VOCALIC R, BENGALI VOWEL SIGN VOCALIC R
|
||||
098C 09E2 ; # BENGALI LETTER VOCALIC L, BENGALI VOWEL SIGN VOCALIC L
|
||||
0A05 0A3E ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AA
|
||||
0A72 0A3F ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN I
|
||||
0A72 0A40 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN II
|
||||
0A73 0A41 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN U
|
||||
0A73 0A42 ; # GURMUKHI URA, GURMUKHI VOWEL SIGN UU
|
||||
0A72 0A47 ; # GURMUKHI IRI, GURMUKHI VOWEL SIGN EE
|
||||
0A05 0A48 ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AI
|
||||
0A73 0A4B ; # GURMUKHI URA, GURMUKHI VOWEL SIGN OO
|
||||
0A05 0A4C ; # GURMUKHI LETTER A, GURMUKHI VOWEL SIGN AU
|
||||
0A85 0ABE ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA
|
||||
0A85 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA E
|
||||
0A85 0AC7 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN E
|
||||
0A85 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AI
|
||||
0A85 0AC9 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN CANDRA O
|
||||
0A85 0ACB ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN O
|
||||
0A85 0ABE 0AC5 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN CANDRA E
|
||||
0A85 0ACC ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AU
|
||||
0A85 0ABE 0AC8 ; # GUJARATI LETTER A, GUJARATI VOWEL SIGN AA, GUJARATI VOWEL SIGN AI
|
||||
0AC5 0ABE ; # GUJARATI VOWEL SIGN CANDRA E, GUJARATI VOWEL SIGN AA
|
||||
0B05 0B3E ; # ORIYA LETTER A, ORIYA VOWEL SIGN AA
|
||||
0B0F 0B57 ; # ORIYA LETTER E, ORIYA AU LENGTH MARK
|
||||
0B13 0B57 ; # ORIYA LETTER O, ORIYA AU LENGTH MARK
|
||||
0C12 0C55 ; # TELUGU LETTER O, TELUGU LENGTH MARK
|
||||
0C12 0C4C ; # TELUGU LETTER O, TELUGU VOWEL SIGN AU
|
||||
0C3F 0C55 ; # TELUGU VOWEL SIGN I, TELUGU LENGTH MARK
|
||||
0C46 0C55 ; # TELUGU VOWEL SIGN E, TELUGU LENGTH MARK
|
||||
0C4A 0C55 ; # TELUGU VOWEL SIGN O, TELUGU LENGTH MARK
|
||||
0C89 0CBE ; # KANNADA LETTER U, KANNADA VOWEL SIGN AA
|
||||
0C92 0CCC ; # KANNADA LETTER O, KANNADA VOWEL SIGN AU
|
||||
0C8B 0CBE ; # KANNADA LETTER VOCALIC R, KANNADA VOWEL SIGN AA
|
||||
0D07 0D57 ; # MALAYALAM LETTER I, MALAYALAM AU LENGTH MARK
|
||||
0D09 0D57 ; # MALAYALAM LETTER U, MALAYALAM AU LENGTH MARK
|
||||
0D0E 0D46 ; # MALAYALAM LETTER E, MALAYALAM VOWEL SIGN E
|
||||
0D12 0D3E ; # MALAYALAM LETTER O, MALAYALAM VOWEL SIGN AA
|
||||
0D12 0D57 ; # MALAYALAM LETTER O, MALAYALAM AU LENGTH MARK
|
||||
0D85 0DCF ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN AELA-PILLA
|
||||
0D85 0DD0 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN KETTI AEDA-PILLA
|
||||
0D85 0DD1 ; # SINHALA LETTER AYANNA, SINHALA VOWEL SIGN DIGA AEDA-PILLA
|
||||
0D8B 0DDF ; # SINHALA LETTER UYANNA, SINHALA VOWEL SIGN GAYANUKITTA
|
||||
0D8D 0DD8 ; # SINHALA LETTER IRUYANNA, SINHALA VOWEL SIGN GAETTA-PILLA
|
||||
0D8F 0DDF ; # SINHALA LETTER ILUYANNA, SINHALA VOWEL SIGN GAYANUKITTA
|
||||
0D91 0DCA ; # SINHALA LETTER EYANNA, SINHALA SIGN AL-LAKUNA
|
||||
0D91 0DD9 ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA
|
||||
0D91 0DDA ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN DIGA KOMBUVA
|
||||
0D91 0DDC ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA AELA-PILLA
|
||||
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
|
||||
0D91 0DDD ; # SINHALA LETTER EYANNA, SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA
|
||||
0D94 0DDF ; # SINHALA LETTER OYANNA, SINHALA VOWEL SIGN GAYANUKITTA
|
||||
11005 11038 ; # BRAHMI LETTER A, BRAHMI VOWEL SIGN AA
|
||||
1100B 1103E ; # BRAHMI LETTER VOCALIC R, BRAHMI VOWEL SIGN VOCALIC R
|
||||
1100F 11042 ; # BRAHMI LETTER E, BRAHMI VOWEL SIGN E
|
||||
11680 116AD ; # TAKRI LETTER A, TAKRI VOWEL SIGN AA
|
||||
11686 116B2 ; # TAKRI LETTER E, TAKRI VOWEL SIGN E
|
||||
11680 116B4 ; # TAKRI LETTER A, TAKRI VOWEL SIGN O
|
||||
11680 116B5 ; # TAKRI LETTER A, TAKRI VOWEL SIGN AU
|
||||
112B0 112E0 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AA
|
||||
112B0 112E5 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN E
|
||||
112B0 112E6 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AI
|
||||
112B0 112E7 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN O
|
||||
112B0 112E8 ; # KHUDAWADI LETTER A, KHUDAWADI VOWEL SIGN AU
|
||||
11481 114B0 ; # TIRHUTA LETTER A, TIRHUTA VOWEL SIGN AA
|
||||
114AA 114B5 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC R
|
||||
114AA 114B6 ; # TIRHUTA LETTER LA, TIRHUTA VOWEL SIGN VOCALIC RR
|
||||
1148B 114BA ; # TIRHUTA LETTER E, TIRHUTA VOWEL SIGN SHORT E
|
||||
1148D 114BA ; # TIRHUTA LETTER O, TIRHUTA VOWEL SIGN SHORT E
|
||||
11600 11639 ; # MODI LETTER A, MODI VOWEL SIGN E
|
||||
11600 1163A ; # MODI LETTER A, MODI VOWEL SIGN AI
|
||||
11601 11639 ; # MODI LETTER AA, MODI VOWEL SIGN E
|
||||
11601 1163A ; # MODI LETTER AA, MODI VOWEL SIGN AI
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
url='https://www.unicode.org/Public/11.0.0/ucd/'
|
||||
files='UnicodeData.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt ArabicShaping.txt BidiBrackets.txt BidiMirroring.txt Blocks.txt'
|
||||
files='Scripts.txt UnicodeData.txt IndicPositionalCategory.txt IndicSyllabicCategory.txt ArabicShaping.txt BidiBrackets.txt BidiMirroring.txt Blocks.txt'
|
||||
|
||||
if test ! -d ucd; then
|
||||
mkdir ucd
|
||||
|
||||
270
src/font/utils/gen-indic-table.py
Executable file
270
src/font/utils/gen-indic-table.py
Executable file
@@ -0,0 +1,270 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copied from HarfBuzz
|
||||
# https://github.com/harfbuzz/harfbuzz
|
||||
# On 2019-03-07
|
||||
#
|
||||
# Revised by Vincent Wei for MiniGUI 3.4
|
||||
|
||||
from __future__ import print_function, division, absolute_import
|
||||
|
||||
import io, sys
|
||||
|
||||
#if len (sys.argv) != 4:
|
||||
# print ("usage: ./gen-indic-table.py", file=sys.stderr)
|
||||
# sys.exit (1)
|
||||
|
||||
ALLOWED_SINGLES = [0x00A0, 0x25CC]
|
||||
ALLOWED_BLOCKS = [
|
||||
'Basic Latin',
|
||||
'Latin-1 Supplement',
|
||||
'Devanagari',
|
||||
'Bengali',
|
||||
'Gurmukhi',
|
||||
'Gujarati',
|
||||
'Oriya',
|
||||
'Tamil',
|
||||
'Telugu',
|
||||
'Kannada',
|
||||
'Malayalam',
|
||||
'Sinhala',
|
||||
'Myanmar',
|
||||
'Khmer',
|
||||
'Vedic Extensions',
|
||||
'General Punctuation',
|
||||
'Superscripts and Subscripts',
|
||||
'Devanagari Extended',
|
||||
'Myanmar Extended-B',
|
||||
'Myanmar Extended-A',
|
||||
]
|
||||
|
||||
filenames = ['ucd/IndicSyllabicCategory.txt', 'ucd/IndicPositionalCategory.txt', 'ucd/Blocks.txt']
|
||||
|
||||
files = [io.open (x, encoding='utf-8') for x in filenames]
|
||||
|
||||
headers = [[f.readline () for i in range (2)] for f in files]
|
||||
|
||||
data = [{} for f in files]
|
||||
values = [{} for f in files]
|
||||
for i, f in enumerate (files):
|
||||
for line in f:
|
||||
|
||||
j = line.find ('#')
|
||||
if j >= 0:
|
||||
line = line[:j]
|
||||
|
||||
fields = [x.strip () for x in line.split (';')]
|
||||
if len (fields) == 1:
|
||||
continue
|
||||
|
||||
uu = fields[0].split ('..')
|
||||
start = int (uu[0], 16)
|
||||
if len (uu) == 1:
|
||||
end = start
|
||||
else:
|
||||
end = int (uu[1], 16)
|
||||
|
||||
t = fields[1]
|
||||
|
||||
for u in range (start, end + 1):
|
||||
data[i][u] = t
|
||||
values[i][t] = values[i].get (t, 0) + end - start + 1
|
||||
|
||||
# Merge data into one dict:
|
||||
defaults = ('Other', 'Not_Applicable', 'No_Block')
|
||||
for i,v in enumerate (defaults):
|
||||
values[i][v] = values[i].get (v, 0) + 1
|
||||
combined = {}
|
||||
for i,d in enumerate (data):
|
||||
for u,v in d.items ():
|
||||
if i == 2 and not u in combined:
|
||||
continue
|
||||
if not u in combined:
|
||||
combined[u] = list (defaults)
|
||||
combined[u][i] = v
|
||||
combined = {k:v for k,v in combined.items() if k in ALLOWED_SINGLES or v[2] in ALLOWED_BLOCKS}
|
||||
data = combined
|
||||
del combined
|
||||
num = len (data)
|
||||
|
||||
for u in [0x17CD, 0x17CE, 0x17CF, 0x17D0, 0x17D3]:
|
||||
if data[u][0] == 'Other':
|
||||
data[u][0] = "Vowel_Dependent"
|
||||
|
||||
# Move the outliers NO-BREAK SPACE and DOTTED CIRCLE out
|
||||
singles = {}
|
||||
for u in ALLOWED_SINGLES:
|
||||
singles[u] = data[u]
|
||||
del data[u]
|
||||
|
||||
print ("/* == Start of generated table == */")
|
||||
print ("/*")
|
||||
print (" * The following table is generated by running:")
|
||||
print (" *")
|
||||
print (" * ./gen-indic-table.py")
|
||||
print (" *")
|
||||
print (" * on ucd/IndicSyllabicCategory.txt ucd/IndicPositionalCategory.txt ucd/Blocks.txt files with these headers:")
|
||||
print (" *")
|
||||
for h in headers:
|
||||
for l in h:
|
||||
print (" * %s" % (l.strip()))
|
||||
print (" */")
|
||||
print ()
|
||||
print ('#include "unicode-shape-complex-indic.h"')
|
||||
print ()
|
||||
|
||||
# Shorten values
|
||||
short = [{
|
||||
"Bindu": 'Bi',
|
||||
"Cantillation_Mark": 'Ca',
|
||||
"Joiner": 'ZWJ',
|
||||
"Non_Joiner": 'ZWNJ',
|
||||
"Number": 'Nd',
|
||||
"Visarga": 'Vs',
|
||||
"Vowel": 'Vo',
|
||||
"Vowel_Dependent": 'M',
|
||||
"Consonant_Prefixed": 'CPrf',
|
||||
"Other": 'x',
|
||||
},{
|
||||
"Not_Applicable": 'x',
|
||||
}]
|
||||
all_shorts = [{},{}]
|
||||
|
||||
# Add some of the values, to make them more readable, and to avoid duplicates
|
||||
|
||||
|
||||
for i in range (2):
|
||||
for v,s in short[i].items ():
|
||||
all_shorts[i][s] = v
|
||||
|
||||
what = ["INDIC_SYLLABIC_CATEGORY", "INDIC_MATRA_CATEGORY"]
|
||||
what_short = ["ISC", "IMC"]
|
||||
print ('#pragma GCC diagnostic push')
|
||||
print ('#pragma GCC diagnostic ignored "-Wunused-macros"')
|
||||
for i in range (2):
|
||||
print ()
|
||||
vv = sorted (values[i].keys ())
|
||||
for v in vv:
|
||||
v_no_and = v.replace ('_And_', '_')
|
||||
if v in short[i]:
|
||||
s = short[i][v]
|
||||
else:
|
||||
s = ''.join ([c for c in v_no_and if ord ('A') <= ord (c) <= ord ('Z')])
|
||||
if s in all_shorts[i]:
|
||||
raise Exception ("Duplicate short value alias", v, all_shorts[i][s])
|
||||
all_shorts[i][s] = v
|
||||
short[i][v] = s
|
||||
print ("#define %s_%s %s_%s %s/* %3d chars; %s */" %
|
||||
(what_short[i], s, what[i], v.upper (),
|
||||
' '* ((48-1 - len (what[i]) - 1 - len (v)) // 8),
|
||||
values[i][v], v))
|
||||
print ('#pragma GCC diagnostic pop')
|
||||
print ()
|
||||
print ("#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M)")
|
||||
print ()
|
||||
print ()
|
||||
|
||||
total = 0
|
||||
used = 0
|
||||
last_block = None
|
||||
def print_block (block, start, end, data):
|
||||
global total, used, last_block
|
||||
if block and block != last_block:
|
||||
print ()
|
||||
print ()
|
||||
print (" /* %s */" % block)
|
||||
num = 0
|
||||
assert start % 8 == 0
|
||||
assert (end+1) % 8 == 0
|
||||
for u in range (start, end+1):
|
||||
if u % 8 == 0:
|
||||
print ()
|
||||
print (" /* %04X */" % u, end="")
|
||||
if u in data:
|
||||
num += 1
|
||||
d = data.get (u, defaults)
|
||||
print ("%9s" % ("_(%s,%s)," % (short[0][d[0]], short[1][d[1]])), end="")
|
||||
|
||||
total += end - start + 1
|
||||
used += num
|
||||
if block:
|
||||
last_block = block
|
||||
|
||||
uu = sorted (data.keys ())
|
||||
|
||||
last = -100000
|
||||
num = 0
|
||||
offset = 0
|
||||
starts = []
|
||||
ends = []
|
||||
print ("static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = {")
|
||||
for u in uu:
|
||||
if u <= last:
|
||||
continue
|
||||
block = data[u][2]
|
||||
|
||||
start = u//8*8
|
||||
end = start+1
|
||||
while end in uu and block == data[end][2]:
|
||||
end += 1
|
||||
end = (end-1)//8*8 + 7
|
||||
|
||||
if start != last + 1:
|
||||
if start - last <= 1+16*3:
|
||||
print_block (None, last+1, start-1, data)
|
||||
last = start-1
|
||||
else:
|
||||
if last >= 0:
|
||||
ends.append (last + 1)
|
||||
offset += ends[-1] - starts[-1]
|
||||
print ()
|
||||
print ()
|
||||
print ("#define indic_offset_0x%04xu %d" % (start, offset))
|
||||
starts.append (start)
|
||||
|
||||
print_block (block, start, end, data)
|
||||
last = end
|
||||
ends.append (last + 1)
|
||||
offset += ends[-1] - starts[-1]
|
||||
print ()
|
||||
print ()
|
||||
occupancy = used * 100. / total
|
||||
page_bits = 12
|
||||
print ("}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy))
|
||||
print ()
|
||||
print ("INDIC_TABLE_ELEMENT_TYPE")
|
||||
print ("_unicode_indic_get_categories (Uchar32 u)")
|
||||
print ("{")
|
||||
print (" switch (u >> %d)" % page_bits)
|
||||
print (" {")
|
||||
pages = set ([u>>page_bits for u in starts+ends+list (singles.keys ())])
|
||||
for p in sorted(pages):
|
||||
print (" case 0x%0Xu:" % p)
|
||||
for u,d in singles.items ():
|
||||
if p != u>>page_bits: continue
|
||||
print (" if (unlikely (u == 0x%04Xu)) return _(%s,%s);" % (u, short[0][d[0]], short[1][d[1]]))
|
||||
for (start,end) in zip (starts, ends):
|
||||
if p not in [start>>page_bits, end>>page_bits]: continue
|
||||
offset = "indic_offset_0x%04xu" % start
|
||||
print (" if (uc_in_range(u, 0x%04Xu, 0x%04Xu)) return indic_table[u - 0x%04Xu + %s];" % (start, end-1, start, offset))
|
||||
print (" break;")
|
||||
print ("")
|
||||
print (" default:")
|
||||
print (" break;")
|
||||
print (" }")
|
||||
print (" return _(x,x);")
|
||||
print ("}")
|
||||
print ()
|
||||
print ("#undef _")
|
||||
for i in range (2):
|
||||
print
|
||||
vv = sorted (values[i].keys ())
|
||||
for v in vv:
|
||||
print ("#undef %s_%s" %
|
||||
(what_short[i], short[i][v]))
|
||||
print ()
|
||||
print ("/* == End of generated table == */")
|
||||
|
||||
# Maintain at least 30% occupancy in the table */
|
||||
if occupancy < 30:
|
||||
raise Exception ("Table too sparse, please investigate: ", occupancy)
|
||||
219
src/font/utils/gen-vowel-constraints.py
Executable file
219
src/font/utils/gen-vowel-constraints.py
Executable file
@@ -0,0 +1,219 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
"""Generator of the function to prohibit certain vowel sequences.
|
||||
|
||||
It creates ``_hb_preprocess_text_vowel_constraints``, which inserts dotted
|
||||
circles into sequences prohibited by the USE script development spec.
|
||||
This function should be used as the ``preprocess_text`` of an
|
||||
``hb_ot_complex_shaper_t``.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
try:
|
||||
from HTMLParser import HTMLParser
|
||||
def write (s):
|
||||
print (s.encode ('utf-8'), end='')
|
||||
except ImportError:
|
||||
from html.parser import HTMLParser
|
||||
def write (s):
|
||||
sys.stdout.flush ()
|
||||
sys.stdout.buffer.write (s.encode ('utf-8'))
|
||||
import itertools
|
||||
import io
|
||||
import sys
|
||||
|
||||
#if len (sys.argv) != 3:
|
||||
# print ('usage: ./gen-vowel-constraints.py', file=sys.stderr)
|
||||
# sys.exit (1)
|
||||
|
||||
with io.open ('ucd/Scripts.txt', encoding='utf-8') as f:
|
||||
scripts_header = [f.readline () for i in range (2)]
|
||||
scripts = {}
|
||||
script_order = {}
|
||||
for line in f:
|
||||
j = line.find ('#')
|
||||
if j >= 0:
|
||||
line = line[:j]
|
||||
fields = [x.strip () for x in line.split (';')]
|
||||
if len (fields) == 1:
|
||||
continue
|
||||
uu = fields[0].split ('..')
|
||||
start = int (uu[0], 16)
|
||||
if len (uu) == 1:
|
||||
end = start
|
||||
else:
|
||||
end = int (uu[1], 16)
|
||||
script = fields[1]
|
||||
for u in range (start, end + 1):
|
||||
scripts[u] = script
|
||||
if script not in script_order:
|
||||
script_order[script] = start
|
||||
|
||||
class ConstraintSet (object):
|
||||
"""A set of prohibited code point sequences.
|
||||
|
||||
Args:
|
||||
constraint (List[int]): A prohibited code point sequence.
|
||||
|
||||
"""
|
||||
def __init__ (self, constraint):
|
||||
# Either a list or a dictionary. As a list of code points, it
|
||||
# represents a prohibited code point sequence. As a dictionary,
|
||||
# it represents a set of prohibited sequences, where each item
|
||||
# represents the set of prohibited sequences starting with the
|
||||
# key (a code point) concatenated with any of the values
|
||||
# (ConstraintSets).
|
||||
self._c = constraint
|
||||
|
||||
def add (self, constraint):
|
||||
"""Add a constraint to this set."""
|
||||
if not constraint:
|
||||
return
|
||||
first = constraint[0]
|
||||
rest = constraint[1:]
|
||||
if isinstance (self._c, list):
|
||||
if constraint == self._c[:len (constraint)]:
|
||||
self._c = constraint
|
||||
elif self._c != constraint[:len (self._c)]:
|
||||
self._c = {self._c[0]: ConstraintSet (self._c[1:])}
|
||||
if isinstance (self._c, dict):
|
||||
if first in self._c:
|
||||
self._c[first].add (rest)
|
||||
else:
|
||||
self._c[first] = ConstraintSet (rest)
|
||||
|
||||
def _indent (self, depth):
|
||||
return (' ' * depth).replace (' ', '\t')
|
||||
|
||||
def __str__ (self, index=0, depth=4):
|
||||
s = []
|
||||
indent = self._indent (depth)
|
||||
if isinstance (self._c, list):
|
||||
if len (self._c) == 0:
|
||||
s.append ('{}matched = true;\n'.format (indent))
|
||||
elif len (self._c) == 1:
|
||||
s.append ('{}matched = 0x{:04X}u == buffer->cur ({}).codepoint;\n'.format (indent, next (iter (self._c)), index or ''))
|
||||
else:
|
||||
s.append ('{}if (0x{:04X}u == buffer->cur ({}).codepoint &&\n'.format (indent, self._c[0], index))
|
||||
s.append ('{}buffer->idx + {} < count &&\n'.format (self._indent (depth + 2), len (self._c)))
|
||||
for i, cp in enumerate (self._c[1:], start=1):
|
||||
s.append ('{}0x{:04X}u == buffer->cur ({}).codepoint{}\n'.format (
|
||||
self._indent (depth + 2), cp, index + i, ')' if i == len (self._c) - 1 else ' &&'))
|
||||
s.append ('{}{{\n'.format (indent))
|
||||
for i in range (len (self._c)):
|
||||
s.append ('{}buffer->next_glyph ();\n'.format (self._indent (depth + 1)))
|
||||
s.append ('{}_output_dotted_circle (buffer);\n'.format (self._indent (depth + 1)))
|
||||
s.append ('{}}}\n'.format (indent))
|
||||
else:
|
||||
s.append ('{}switch (buffer->cur ({}).codepoint)\n'.format(indent, index or ''))
|
||||
s.append ('{}{{\n'.format (indent))
|
||||
cases = collections.defaultdict (set)
|
||||
for first, rest in sorted (self._c.items ()):
|
||||
cases[rest.__str__ (index + 1, depth + 2)].add (first)
|
||||
for body, labels in sorted (cases.items (), key=lambda b_ls: sorted (b_ls[1])[0]):
|
||||
for i, cp in enumerate (sorted (labels)):
|
||||
if i % 4 == 0:
|
||||
s.append (self._indent (depth + 1))
|
||||
else:
|
||||
s.append (' ')
|
||||
s.append ('case 0x{:04X}u:{}'.format (cp, '\n' if i % 4 == 3 else ''))
|
||||
if len (labels) % 4 != 0:
|
||||
s.append ('\n')
|
||||
s.append (body)
|
||||
s.append ('{}break;\n'.format (self._indent (depth + 2)))
|
||||
s.append ('{}}}\n'.format (indent))
|
||||
return ''.join (s)
|
||||
|
||||
constraints = {}
|
||||
with io.open ('IndicVowelConstraints.txt', encoding='utf-8') as f:
|
||||
constraints_header = [f.readline ().strip () for i in range (2)]
|
||||
for line in f:
|
||||
j = line.find ('#')
|
||||
if j >= 0:
|
||||
line = line[:j]
|
||||
constraint = [int (cp, 16) for cp in line.split (';')[0].split ()]
|
||||
if not constraint: continue
|
||||
assert 2 <= len (constraint), 'Prohibited sequence is too short: {}'.format (constraint)
|
||||
script = scripts[constraint[0]]
|
||||
if script in constraints:
|
||||
constraints[script].add (constraint)
|
||||
else:
|
||||
constraints[script] = ConstraintSet (constraint)
|
||||
assert constraints, 'No constraints found'
|
||||
|
||||
print ('/* == Start of generated functions == */')
|
||||
print ('/*')
|
||||
print (' * The following functions are generated by running:')
|
||||
print (' *')
|
||||
print (' * %s' % sys.argv[0])
|
||||
print (' *')
|
||||
print (' * on IndicVowelConstraints.txt and ucd/Scripts.txt files with these headers:')
|
||||
print (' *')
|
||||
for line in constraints_header:
|
||||
print (' * %s' % line.strip ())
|
||||
print (' *')
|
||||
for line in scripts_header:
|
||||
print (' * %s' % line.strip ())
|
||||
print (' */')
|
||||
print ()
|
||||
print ('#include "hb-ot-shape-complex-vowel-constraints.hh"')
|
||||
print ()
|
||||
print ('static void')
|
||||
print ('_output_dotted_circle (hb_buffer_t *buffer)')
|
||||
print ('{')
|
||||
print (' hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu);')
|
||||
print (' _hb_glyph_info_reset_continuation (&dottedcircle);')
|
||||
print ('}')
|
||||
print ()
|
||||
print ('static void')
|
||||
print ('_output_with_dotted_circle (hb_buffer_t *buffer)')
|
||||
print ('{')
|
||||
print (' _output_dotted_circle (buffer);')
|
||||
print (' buffer->next_glyph ();')
|
||||
print ('}')
|
||||
print ()
|
||||
|
||||
print ('void')
|
||||
print ('_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED,')
|
||||
print ('\t\t\t\t hb_buffer_t *buffer,')
|
||||
print ('\t\t\t\t hb_font_t *font HB_UNUSED)')
|
||||
print ('{')
|
||||
print (' /* UGLY UGLY UGLY business of adding dotted-circle in the middle of')
|
||||
print (' * vowel-sequences that look like another vowel. Data for each script')
|
||||
print (' * collected from the USE script development spec.')
|
||||
print (' *')
|
||||
print (' * https://github.com/harfbuzz/harfbuzz/issues/1019')
|
||||
print (' */')
|
||||
print (' bool processed = false;')
|
||||
print (' buffer->clear_output ();')
|
||||
print (' unsigned int count = buffer->len;')
|
||||
print (' switch ((unsigned) buffer->props.script)')
|
||||
print (' {')
|
||||
|
||||
for script, constraints in sorted (constraints.items (), key=lambda s_c: script_order[s_c[0]]):
|
||||
print (' case HB_SCRIPT_{}:'.format (script.upper ()))
|
||||
print (' for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;)')
|
||||
print (' {')
|
||||
print ('\tbool matched = false;')
|
||||
write (str (constraints))
|
||||
print ('\tbuffer->next_glyph ();')
|
||||
print ('\tif (matched) _output_with_dotted_circle (buffer);')
|
||||
print (' }')
|
||||
print (' processed = true;')
|
||||
print (' break;')
|
||||
print ()
|
||||
|
||||
print (' default:')
|
||||
print (' break;')
|
||||
print (' }')
|
||||
print (' if (processed)')
|
||||
print (' {')
|
||||
print (' if (buffer->idx < count)')
|
||||
print (' buffer->next_glyph ();')
|
||||
print (' }')
|
||||
print ('}')
|
||||
|
||||
print ()
|
||||
print ('/* == End of generated functions == */')
|
||||
Reference in New Issue
Block a user