mirror of
https://github.com/ccxvii/mujs.git
synced 2026-02-05 17:29:43 +08:00
Generate new tables for isalpha/toupper/tolower from UnicodeDate.txt
This commit is contained in:
77
genucd.py
Normal file
77
genucd.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Create utfdata.h from UnicodeData.txt
|
||||
|
||||
tolower = []
|
||||
toupper = []
|
||||
isalpha = []
|
||||
|
||||
for line in open("UnicodeData.txt").readlines():
|
||||
line = line.split(";")
|
||||
code = int(line[0],16)
|
||||
# if code > 65535: continue # skip non-BMP codepoints
|
||||
if line[2][0] == 'L':
|
||||
isalpha.append(code)
|
||||
if line[12]:
|
||||
toupper.append((code,int(line[12],16)))
|
||||
if line[13]:
|
||||
tolower.append((code,int(line[13],16)))
|
||||
|
||||
def dumpalpha():
|
||||
table = []
|
||||
prev = 0
|
||||
start = 0
|
||||
for code in isalpha:
|
||||
if code != prev+1:
|
||||
if start:
|
||||
table.append((start,prev))
|
||||
start = code
|
||||
prev = code
|
||||
table.append((start,prev))
|
||||
|
||||
print("")
|
||||
print("static const Rune ucd_alpha2[] = {")
|
||||
for a, b in table:
|
||||
if b - a > 0:
|
||||
print(hex(a)+","+hex(b)+",")
|
||||
print("};");
|
||||
|
||||
print("")
|
||||
print("static const Rune ucd_alpha1[] = {")
|
||||
for a, b in table:
|
||||
if b - a == 0:
|
||||
print(hex(a)+",")
|
||||
print("};");
|
||||
|
||||
def dumpmap(name, input):
|
||||
table = []
|
||||
prev_a = 0
|
||||
prev_b = 0
|
||||
start_a = 0
|
||||
start_b = 0
|
||||
for a, b in input:
|
||||
if a != prev_a+1 or b != prev_b+1:
|
||||
if start_a:
|
||||
table.append((start_a,prev_a,start_b))
|
||||
start_a = a
|
||||
start_b = b
|
||||
prev_a = a
|
||||
prev_b = b
|
||||
table.append((start_a,prev_a,start_b))
|
||||
|
||||
print("")
|
||||
print("static const Rune " + name + "2[] = {")
|
||||
for a, b, n in table:
|
||||
if b - a > 0:
|
||||
print(hex(a)+","+hex(b)+","+str(n-a)+",")
|
||||
print("};");
|
||||
|
||||
print("")
|
||||
print("static const Rune " + name + "1[] = {")
|
||||
for a, b, n in table:
|
||||
if b - a == 0:
|
||||
print(hex(a)+","+str(n-a)+",")
|
||||
print("};");
|
||||
|
||||
print("/* This file was automatically created from UnicodeData.txt */")
|
||||
dumpalpha()
|
||||
dumpmap("ucd_tolower", tolower)
|
||||
dumpmap("ucd_toupper", toupper)
|
||||
1
one.c
1
one.c
@@ -24,4 +24,3 @@
|
||||
#include "jsvalue.c"
|
||||
#include "regexp.c"
|
||||
#include "utf.c"
|
||||
#include "utftype.c"
|
||||
|
||||
93
utf.c
93
utf.c
@@ -15,6 +15,9 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "utf.h"
|
||||
#include "utfdata.h"
|
||||
|
||||
#define nelem(a) (int)(sizeof (a) / sizeof (a)[0])
|
||||
|
||||
typedef unsigned char uchar;
|
||||
|
||||
@@ -210,3 +213,93 @@ utflen(const char *s)
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
static const Rune *
|
||||
ucd_bsearch(Rune c, const Rune *t, int n, int ne)
|
||||
{
|
||||
const Rune *p;
|
||||
int m;
|
||||
|
||||
while(n > 1) {
|
||||
m = n/2;
|
||||
p = t + m*ne;
|
||||
if(c >= p[0]) {
|
||||
t = p;
|
||||
n = n-m;
|
||||
} else
|
||||
n = m;
|
||||
}
|
||||
if(n && c >= t[0])
|
||||
return t;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Rune
|
||||
tolowerrune(Rune c)
|
||||
{
|
||||
const Rune *p;
|
||||
|
||||
p = ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2)/3, 3);
|
||||
if(p && c >= p[0] && c <= p[1])
|
||||
return c + p[2];
|
||||
p = ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1)/2, 2);
|
||||
if(p && c == p[0])
|
||||
return c + p[1];
|
||||
return c;
|
||||
}
|
||||
|
||||
Rune
|
||||
toupperrune(Rune c)
|
||||
{
|
||||
const Rune *p;
|
||||
|
||||
p = ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2)/3, 3);
|
||||
if(p && c >= p[0] && c <= p[1])
|
||||
return c + p[2];
|
||||
p = ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1)/2, 2);
|
||||
if(p && c == p[0])
|
||||
return c + p[1];
|
||||
return c;
|
||||
}
|
||||
|
||||
int
|
||||
islowerrune(Rune c)
|
||||
{
|
||||
const Rune *p;
|
||||
|
||||
p = ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2)/3, 3);
|
||||
if(p && c >= p[0] && c <= p[1])
|
||||
return 1;
|
||||
p = ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1)/2, 2);
|
||||
if(p && c == p[0])
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
isupperrune(Rune c)
|
||||
{
|
||||
const Rune *p;
|
||||
|
||||
p = ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2)/3, 3);
|
||||
if(p && c >= p[0] && c <= p[1])
|
||||
return 1;
|
||||
p = ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1)/2, 2);
|
||||
if(p && c == p[0])
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
isalpharune(Rune c)
|
||||
{
|
||||
const Rune *p;
|
||||
|
||||
p = ucd_bsearch(c, ucd_alpha2, nelem(ucd_alpha2)/2, 2);
|
||||
if(p && c >= p[0] && c <= p[1])
|
||||
return 1;
|
||||
p = ucd_bsearch(c, ucd_alpha1, nelem(ucd_alpha1), 1);
|
||||
if(p && c == p[0])
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
6
utf.h
6
utf.h
@@ -23,11 +23,8 @@ typedef int Rune; /* 32 bits */
|
||||
|
||||
#define isalpharune jsU_isalpharune
|
||||
#define islowerrune jsU_islowerrune
|
||||
#define isspacerune jsU_isspacerune
|
||||
#define istitlerune jsU_istitlerune
|
||||
#define isupperrune jsU_isupperrune
|
||||
#define tolowerrune jsU_tolowerrune
|
||||
#define totitlerune jsU_totitlerune
|
||||
#define toupperrune jsU_toupperrune
|
||||
|
||||
enum
|
||||
@@ -46,11 +43,8 @@ int utflen(const char *s);
|
||||
|
||||
int isalpharune(Rune c);
|
||||
int islowerrune(Rune c);
|
||||
int isspacerune(Rune c);
|
||||
int istitlerune(Rune c);
|
||||
int isupperrune(Rune c);
|
||||
Rune tolowerrune(Rune c);
|
||||
Rune totitlerune(Rune c);
|
||||
Rune toupperrune(Rune c);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user