mirror of
https://github.com/ccxvii/mujs.git
synced 2026-02-06 01:41:37 +08:00
694 lines
14 KiB
C
694 lines
14 KiB
C
#include "jsi.h"
|
|
#include "jsvalue.h"
|
|
#include "jsbuiltin.h"
|
|
#include "utf.h"
|
|
|
|
#include <regex.h>
|
|
|
|
#define nelem(a) (sizeof (a) / sizeof (a)[0])
|
|
|
|
struct sbuffer { int n, m; char s[64]; };
|
|
|
|
static struct sbuffer *sb_putc(struct sbuffer *sb, int c)
|
|
{
|
|
if (!sb) {
|
|
sb = malloc(sizeof *sb);
|
|
sb->n = 0;
|
|
sb->m = sizeof sb->s;
|
|
} else if (sb->n == sb->m) {
|
|
sb = realloc(sb, (sb->m *= 2) + offsetof(struct sbuffer, s));
|
|
}
|
|
sb->s[sb->n++] = c;
|
|
return sb;
|
|
}
|
|
|
|
static struct sbuffer *sb_puts(struct sbuffer *sb, const char *s)
|
|
{
|
|
while (*s)
|
|
sb = sb_putc(sb, *s++);
|
|
return sb;
|
|
}
|
|
|
|
static struct sbuffer *sb_putm(struct sbuffer *sb, const char *s, const char *e)
|
|
{
|
|
while (s < e)
|
|
sb = sb_putc(sb, *s++);
|
|
return sb;
|
|
}
|
|
|
|
static int jsB_new_String(js_State *J, int argc)
|
|
{
|
|
js_newstring(J, argc > 0 ? js_tostring(J, 1) : "");
|
|
return 1;
|
|
}
|
|
|
|
static int jsB_String(js_State *J, int argc)
|
|
{
|
|
js_pushliteral(J, argc > 0 ? js_tostring(J, 1) : "");
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_toString(js_State *J, int argc)
|
|
{
|
|
js_Object *self = js_toobject(J, 0);
|
|
if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
|
|
js_pushliteral(J, self->u.string);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_valueOf(js_State *J, int argc)
|
|
{
|
|
js_Object *self = js_toobject(J, 0);
|
|
if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
|
|
js_pushliteral(J, self->u.string);
|
|
return 1;
|
|
}
|
|
|
|
static inline Rune runeAt(const char *s, int i)
|
|
{
|
|
Rune rune = 0;
|
|
while (i-- >= 0) {
|
|
rune = *(unsigned char*)s;
|
|
if (rune < Runeself) {
|
|
if (rune == 0)
|
|
return 0;
|
|
++s;
|
|
} else
|
|
s += chartorune(&rune, s);
|
|
}
|
|
return rune;
|
|
}
|
|
|
|
static inline const char *utfidx(const char *s, int i)
|
|
{
|
|
Rune rune = 0;
|
|
while (i-- > 0) {
|
|
rune = *(unsigned char*)s;
|
|
if (rune < Runeself) {
|
|
if (rune == 0)
|
|
return NULL;
|
|
++s;
|
|
} else
|
|
s += chartorune(&rune, s);
|
|
}
|
|
return s;
|
|
}
|
|
|
|
static int Sp_charAt(js_State *J, int argc)
|
|
{
|
|
char buf[UTFmax + 1];
|
|
const char *s = js_tostring(J, 0);
|
|
int pos = js_tointeger(J, 1);
|
|
Rune rune = runeAt(s, pos);
|
|
if (rune > 0) {
|
|
buf[runetochar(buf, &rune)] = 0;
|
|
js_pushstring(J, buf);
|
|
} else {
|
|
js_pushliteral(J, "");
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_charCodeAt(js_State *J, int argc)
|
|
{
|
|
const char *s = js_tostring(J, 0);
|
|
int pos = js_tointeger(J, 1);
|
|
Rune rune = runeAt(s, pos);
|
|
if (rune > 0)
|
|
js_pushnumber(J, rune);
|
|
else
|
|
js_pushnumber(J, NAN);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_concat(js_State *J, int argc)
|
|
{
|
|
char * volatile out;
|
|
const char *s;
|
|
int i, n;
|
|
|
|
if (argc == 0)
|
|
return 1;
|
|
|
|
s = js_tostring(J, 0);
|
|
n = strlen(s);
|
|
out = malloc(n + 1);
|
|
strcpy(out, s);
|
|
|
|
if (js_try(J)) {
|
|
free(out);
|
|
js_throw(J);
|
|
}
|
|
|
|
for (i = 1; i <= argc; ++i) {
|
|
s = js_tostring(J, i);
|
|
n += strlen(s);
|
|
out = realloc(out, n + 1);
|
|
strcat(out, s);
|
|
}
|
|
|
|
js_pushstring(J, out);
|
|
js_endtry(J);
|
|
free(out);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_indexOf(js_State *J, int argc)
|
|
{
|
|
const char *haystack = js_tostring(J, 0);
|
|
const char *needle = js_tostring(J, 1);
|
|
int pos = js_tointeger(J, 2);
|
|
int len = strlen(needle);
|
|
int k = 0;
|
|
Rune rune;
|
|
while (*haystack) {
|
|
if (k >= pos && !strncmp(haystack, needle, len)) {
|
|
js_pushnumber(J, k);
|
|
return 1;
|
|
}
|
|
haystack += chartorune(&rune, haystack);
|
|
++k;
|
|
}
|
|
js_pushnumber(J, -1);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_lastIndexOf(js_State *J, int argc)
|
|
{
|
|
const char *haystack = js_tostring(J, 0);
|
|
const char *needle = js_tostring(J, 1);
|
|
int pos = argc > 1 ? js_tointeger(J, 2) : strlen(haystack);
|
|
int len = strlen(needle);
|
|
int k = 0, last = -1;
|
|
Rune rune;
|
|
while (*haystack && k <= pos) {
|
|
if (!strncmp(haystack, needle, len))
|
|
last = k;
|
|
haystack += chartorune(&rune, haystack);
|
|
++k;
|
|
}
|
|
js_pushnumber(J, last);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_localeCompare(js_State *J, int argc)
|
|
{
|
|
const char *a = js_tostring(J, 0);
|
|
const char *b = js_tostring(J, 1);
|
|
return strcmp(a, b);
|
|
}
|
|
|
|
static int Sp_slice(js_State *J, int argc)
|
|
{
|
|
const char *str = js_tostring(J, 0);
|
|
const char *ss, *ee;
|
|
int len = utflen(str);
|
|
int s = js_tointeger(J, 1);
|
|
int e = argc > 1 ? js_tointeger(J, 2) : len;
|
|
|
|
s = s < 0 ? s + len : s;
|
|
e = e < 0 ? e + len : e;
|
|
|
|
s = s < 0 ? 0 : s > len ? len : s;
|
|
e = e < 0 ? 0 : e > len ? len : e;
|
|
|
|
if (s < e) {
|
|
ss = utfidx(str, s);
|
|
ee = utfidx(ss, e - s);
|
|
} else {
|
|
ss = utfidx(str, e);
|
|
ee = utfidx(ss, s - e);
|
|
}
|
|
|
|
js_pushlstring(J, ss, ee - ss);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_substring(js_State *J, int argc)
|
|
{
|
|
const char *str = js_tostring(J, 0);
|
|
const char *ss, *ee;
|
|
int len = utflen(str);
|
|
int s = js_tointeger(J, 1);
|
|
int e = argc > 1 ? js_tointeger(J, 2) : len;
|
|
|
|
s = s < 0 ? 0 : s > len ? len : s;
|
|
e = e < 0 ? 0 : e > len ? len : e;
|
|
|
|
if (s < e) {
|
|
ss = utfidx(str, s);
|
|
ee = utfidx(ss, e - s);
|
|
} else {
|
|
ss = utfidx(str, e);
|
|
ee = utfidx(ss, s - e);
|
|
}
|
|
|
|
js_pushlstring(J, ss, ee - ss);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_toLowerCase(js_State *J, int argc)
|
|
{
|
|
const char *src = js_tostring(J, 0);
|
|
char *dst = malloc(UTFmax * strlen(src) + 1);
|
|
const char *s = src;
|
|
char *d = dst;
|
|
Rune rune;
|
|
while (*s) {
|
|
s += chartorune(&rune, s);
|
|
rune = tolowerrune(rune);
|
|
d += runetochar(d, &rune);
|
|
}
|
|
if (js_try(J)) {
|
|
free(dst);
|
|
js_throw(J);
|
|
}
|
|
js_pushstring(J, dst);
|
|
js_endtry(J);
|
|
free(dst);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_toUpperCase(js_State *J, int argc)
|
|
{
|
|
const char *src = js_tostring(J, 0);
|
|
char *dst = malloc(UTFmax * strlen(src) + 1);
|
|
const char *s = src;
|
|
char *d = dst;
|
|
Rune rune;
|
|
while (*s) {
|
|
s += chartorune(&rune, s);
|
|
rune = toupperrune(rune);
|
|
d += runetochar(d, &rune);
|
|
}
|
|
if (js_try(J)) {
|
|
free(dst);
|
|
js_throw(J);
|
|
}
|
|
js_pushstring(J, dst);
|
|
js_endtry(J);
|
|
free(dst);
|
|
return 1;
|
|
}
|
|
|
|
static int S_fromCharCode(js_State *J, int argc)
|
|
{
|
|
int i;
|
|
Rune c;
|
|
char *s, *p;
|
|
|
|
s = p = malloc(argc * UTFmax + 1);
|
|
|
|
if (js_try(J)) {
|
|
free(s);
|
|
js_throw(J);
|
|
}
|
|
|
|
for (i = 1; i <= argc; ++i) {
|
|
c = js_tointeger(J, i); // TODO: ToUInt16()
|
|
p += runetochar(p, &c);
|
|
}
|
|
*p = 0;
|
|
js_pushstring(J, s);
|
|
|
|
js_endtry(J);
|
|
free(s);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_match(js_State *J, int argc)
|
|
{
|
|
const char *text;
|
|
regmatch_t m;
|
|
regex_t *prog;
|
|
int flags;
|
|
unsigned int len, a, b, c, e;
|
|
|
|
text = js_tostring(J, 0);
|
|
|
|
if (js_isregexp(J, 1))
|
|
js_copy(J, 1);
|
|
else if (js_isundefined(J, 1))
|
|
js_newregexp(J, "", 0);
|
|
else
|
|
js_newregexp(J, js_tostring(J, 1), 0);
|
|
|
|
prog = js_toregexp(J, -1, &flags);
|
|
if (!(flags & JS_REGEXP_G))
|
|
return js_RegExp_prototype_exec(J, -1, text);
|
|
|
|
js_newarray(J);
|
|
|
|
e = strlen(text);
|
|
len = 0;
|
|
a = 0;
|
|
while (a <= e) {
|
|
if (regexec(prog, text + a, 1, &m, a > 0 ? REG_NOTBOL : 0))
|
|
break;
|
|
|
|
b = a + m.rm_so;
|
|
c = a + m.rm_eo;
|
|
|
|
js_pushlstring(J, text + b, c - b);
|
|
js_setindex(J, -2, len++);
|
|
|
|
a = c;
|
|
if (c - b == 0)
|
|
++a;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_search(js_State *J, int argc)
|
|
{
|
|
const char *text;
|
|
regmatch_t m;
|
|
regex_t *prog;
|
|
int flags;
|
|
|
|
text = js_tostring(J, 0);
|
|
|
|
if (js_isregexp(J, 1))
|
|
js_copy(J, 1);
|
|
else if (js_isundefined(J, 1))
|
|
js_newregexp(J, "", 0);
|
|
else
|
|
js_newregexp(J, js_tostring(J, 1), 0);
|
|
|
|
prog = js_toregexp(J, -1, &flags);
|
|
|
|
if (!regexec(prog, text, 1, &m, 0))
|
|
js_pushnumber(J, m.rm_so); // TODO: convert to utf-8 index offset
|
|
else
|
|
js_pushnumber(J, -1);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_replace_regexp(js_State *J, int argc)
|
|
{
|
|
const char *source, *s, *r;
|
|
regmatch_t m[10];
|
|
regex_t *prog;
|
|
int flags;
|
|
struct sbuffer *sb = NULL;
|
|
int n, x;
|
|
|
|
source = js_tostring(J, 0);
|
|
prog = js_toregexp(J, 1, &flags);
|
|
|
|
if (regexec(prog, source, nelem(m), m, 0)) {
|
|
js_copy(J, 0);
|
|
return 1;
|
|
}
|
|
|
|
loop:
|
|
s = source + m[0].rm_so;
|
|
n = m[0].rm_eo - m[0].rm_so;
|
|
|
|
if (js_iscallable(J, 2)) {
|
|
js_copy(J, 2);
|
|
js_pushglobal(J);
|
|
for (x = 0; m[x].rm_so >= 0; ++x) /* arg 0..x: substring and subexps that matched */
|
|
js_pushlstring(J, source + m[x].rm_so, m[x].rm_eo - m[x].rm_so);
|
|
js_pushnumber(J, s - source); /* arg x+2: offset within search string */
|
|
js_copy(J, 0); /* arg x+3: search string */
|
|
js_call(J, 2 + x);
|
|
r = js_tostring(J, -1);
|
|
sb = sb_putm(sb, source, s);
|
|
sb = sb_puts(sb, r);
|
|
js_pop(J, 1);
|
|
} else {
|
|
r = js_tostring(J, 2);
|
|
sb = sb_putm(sb, source, s);
|
|
while (*r) {
|
|
if (*r == '$') {
|
|
switch (*(++r)) {
|
|
case '$': sb = sb_putc(sb, '$'); break;
|
|
case '`': sb = sb_putm(sb, source, s); break;
|
|
case '\'': sb = sb_puts(sb, s + n); break;
|
|
case '&':
|
|
sb = sb_putm(sb, s, s + n);
|
|
break;
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
x = *r - '0';
|
|
if (m[x].rm_so >= 0) {
|
|
sb = sb_putm(sb, source + m[x].rm_so, source + m[x].rm_eo);
|
|
} else {
|
|
sb = sb_putc(sb, '$');
|
|
sb = sb_putc(sb, '0'+x);
|
|
}
|
|
break;
|
|
default:
|
|
sb = sb_putc(sb, '$');
|
|
sb = sb_putc(sb, *r);
|
|
break;
|
|
}
|
|
++r;
|
|
} else {
|
|
sb = sb_putc(sb, *r++);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (flags & JS_REGEXP_G) {
|
|
source = source + m[0].rm_eo;
|
|
if (n == 0) {
|
|
if (*source)
|
|
sb = sb_putc(sb, *source++);
|
|
else
|
|
goto end;
|
|
}
|
|
if (!regexec(prog, source, nelem(m), m, REG_NOTBOL))
|
|
goto loop;
|
|
}
|
|
|
|
end:
|
|
sb = sb_puts(sb, s + n);
|
|
sb = sb_putc(sb, 0);
|
|
|
|
if (js_try(J)) {
|
|
free(sb);
|
|
js_throw(J);
|
|
}
|
|
js_pushstring(J, sb ? sb->s : "");
|
|
js_endtry(J);
|
|
free(sb);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_replace_string(js_State *J, int argc)
|
|
{
|
|
const char *source, *needle, *s, *r;
|
|
struct sbuffer *sb = NULL;
|
|
int n;
|
|
|
|
source = js_tostring(J, 0);
|
|
needle = js_tostring(J, 1);
|
|
|
|
s = strstr(source, needle);
|
|
if (!s) {
|
|
js_copy(J, 0);
|
|
return 1;
|
|
}
|
|
n = strlen(needle);
|
|
|
|
if (js_iscallable(J, 2)) {
|
|
js_copy(J, 2);
|
|
js_pushglobal(J);
|
|
js_pushlstring(J, s, n); /* arg 1: substring that matched */
|
|
js_pushnumber(J, s - source); /* arg 2: offset within search string */
|
|
js_copy(J, 0); /* arg 3: search string */
|
|
js_call(J, 3);
|
|
r = js_tostring(J, -1);
|
|
sb = sb_putm(sb, source, s);
|
|
sb = sb_puts(sb, r);
|
|
sb = sb_puts(sb, s + n);
|
|
sb = sb_putc(sb, 0);
|
|
js_pop(J, 1);
|
|
} else {
|
|
r = js_tostring(J, 2);
|
|
sb = sb_putm(sb, source, s);
|
|
while (*r) {
|
|
if (*r == '$') {
|
|
switch (*(++r)) {
|
|
case '$': sb = sb_putc(sb, '$'); break;
|
|
case '&': sb = sb_putm(sb, s, s + n); break;
|
|
case '`': sb = sb_putm(sb, source, s); break;
|
|
case '\'': sb = sb_puts(sb, s + n); break;
|
|
default: sb = sb_putc(sb, '$'); sb = sb_putc(sb, *r); break;
|
|
}
|
|
++r;
|
|
} else {
|
|
sb = sb_putc(sb, *r++);
|
|
}
|
|
}
|
|
sb = sb_puts(sb, s + n);
|
|
sb = sb_putc(sb, 0);
|
|
}
|
|
|
|
if (js_try(J)) {
|
|
free(sb);
|
|
js_throw(J);
|
|
}
|
|
js_pushstring(J, sb ? sb->s : "");
|
|
js_endtry(J);
|
|
free(sb);
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_replace(js_State *J, int argc)
|
|
{
|
|
if (js_isregexp(J, 1))
|
|
return Sp_replace_regexp(J, argc);
|
|
return Sp_replace_string(J, argc);
|
|
}
|
|
|
|
static int Sp_split_regexp(js_State *J, int argc)
|
|
{
|
|
const char *str = js_tostring(J, 0);
|
|
unsigned int limit = !js_isundefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
|
|
regmatch_t m[10];
|
|
regex_t *prog;
|
|
int flags;
|
|
unsigned int len, k, e;
|
|
unsigned int p, a, b, c;
|
|
|
|
prog = js_toregexp(J, 1, &flags);
|
|
|
|
js_newarray(J);
|
|
len = 0;
|
|
|
|
e = strlen(str);
|
|
|
|
/* splitting the empty string */
|
|
if (e == 0) {
|
|
if (regexec(prog, str, nelem(m), m, 0)) {
|
|
if (len == limit) return 1;
|
|
js_pushliteral(J, "");
|
|
js_setindex(J, -2, 0);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
p = a = 0;
|
|
while (a < e) {
|
|
if (regexec(prog, str + a, nelem(m), m, a > 0 ? REG_NOTBOL : 0))
|
|
break; /* no match */
|
|
|
|
b = a + m[0].rm_so;
|
|
c = a + m[0].rm_eo;
|
|
|
|
/* empty string at end of last match */
|
|
if (b == p) {
|
|
++a;
|
|
continue;
|
|
}
|
|
|
|
if (len == limit) return 1;
|
|
js_pushlstring(J, str + p, b - p);
|
|
js_setindex(J, -2, len++);
|
|
|
|
for (k = 1; k < nelem(m) && m[k].rm_so >= 0; ++k) {
|
|
if (len == limit) return 1;
|
|
js_pushlstring(J, str + a + m[k].rm_so, m[k].rm_eo - m[k].rm_so);
|
|
js_setindex(J, -2, len++);
|
|
}
|
|
|
|
a = p = c;
|
|
}
|
|
|
|
if (len == limit) return 1;
|
|
js_pushstring(J, str + p);
|
|
js_setindex(J, -2, len);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_split_string(js_State *J, int argc)
|
|
{
|
|
const char *str = js_tostring(J, 0);
|
|
const char *sep = js_tostring(J, 1);
|
|
unsigned int limit = !js_isundefined(J, 2) ? js_touint32(J, 2) : 1 << 30;
|
|
unsigned int i, n;
|
|
|
|
js_newarray(J);
|
|
|
|
n = strlen(sep);
|
|
|
|
/* empty string */
|
|
if (n == 0) {
|
|
Rune rune;
|
|
for (i = 0; *str && i < limit; ++i) {
|
|
n = chartorune(&rune, str);
|
|
js_pushlstring(J, str, n);
|
|
js_setindex(J, -2, i);
|
|
str += n;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
for (i = 0; str && i < limit; ++i) {
|
|
const char *s = strstr(str, sep);
|
|
if (s) {
|
|
js_pushlstring(J, str, s-str);
|
|
js_setindex(J, -2, i);
|
|
str = s + n;
|
|
} else {
|
|
js_pushstring(J, str);
|
|
js_setindex(J, -2, i);
|
|
str = NULL;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int Sp_split(js_State *J, int argc)
|
|
{
|
|
if (js_isundefined(J, 1)) {
|
|
js_newarray(J);
|
|
js_copy(J, 0);
|
|
js_setindex(J, -2, 0);
|
|
return 1;
|
|
}
|
|
if (js_isregexp(J, 1))
|
|
return Sp_split_regexp(J, argc);
|
|
return Sp_split_string(J, argc);
|
|
}
|
|
|
|
void jsB_initstring(js_State *J)
|
|
{
|
|
J->String_prototype->u.string = "";
|
|
|
|
js_pushobject(J, J->String_prototype);
|
|
{
|
|
jsB_propf(J, "toString", Sp_toString, 0);
|
|
jsB_propf(J, "valueOf", Sp_valueOf, 0);
|
|
jsB_propf(J, "charAt", Sp_charAt, 1);
|
|
jsB_propf(J, "charCodeAt", Sp_charCodeAt, 1);
|
|
jsB_propf(J, "concat", Sp_concat, 1);
|
|
jsB_propf(J, "indexOf", Sp_indexOf, 1);
|
|
jsB_propf(J, "lastIndexOf", Sp_lastIndexOf, 1);
|
|
jsB_propf(J, "localeCompare", Sp_localeCompare, 1);
|
|
jsB_propf(J, "match", Sp_match, 1);
|
|
jsB_propf(J, "replace", Sp_replace, 2);
|
|
jsB_propf(J, "search", Sp_search, 1);
|
|
jsB_propf(J, "slice", Sp_slice, 2);
|
|
jsB_propf(J, "split", Sp_split, 2);
|
|
jsB_propf(J, "substring", Sp_substring, 2);
|
|
jsB_propf(J, "toLowerCase", Sp_toLowerCase, 0);
|
|
jsB_propf(J, "toLocaleLowerCase", Sp_toLowerCase, 0);
|
|
jsB_propf(J, "toUpperCase", Sp_toUpperCase, 0);
|
|
jsB_propf(J, "toLocaleUpperCase", Sp_toUpperCase, 0);
|
|
// trim (ES5)
|
|
}
|
|
js_newcconstructor(J, jsB_String, jsB_new_String, 1);
|
|
{
|
|
jsB_propf(J, "fromCharCode", S_fromCharCode, 1);
|
|
}
|
|
js_defglobal(J, "String", JS_DONTENUM);
|
|
}
|