Files
mujs/jsstring.c
Tor Andersson 822061539a Fix leaks if js_try runs out of exception stack space.
Since js_try can throw to its surrounding exception scope if it runs out
of space in the exception stack, we need to make sure to allocate
resources guarded by the try inside the try not outside it.
2021-07-23 12:43:15 +02:00

720 lines
14 KiB
C

#include "jsi.h"
#include "jsvalue.h"
#include "jsbuiltin.h"
#include "utf.h"
#include "regexp.h"
static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
{
int result = js_regexec(prog, string, sub, eflags);
if (result < 0)
js_error(J, "regexec failed");
return result;
}
static const char *checkstring(js_State *J, int idx)
{
if (!js_iscoercible(J, idx))
js_typeerror(J, "string function called on null or undefined");
return js_tostring(J, idx);
}
int js_runeat(js_State *J, const char *s, int i)
{
Rune rune = EOF;
while (i-- >= 0) {
rune = *(unsigned char*)s;
if (rune < Runeself) {
if (rune == 0)
return EOF;
++s;
} else
s += chartorune(&rune, s);
}
return rune;
}
const char *js_utfidxtoptr(const char *s, int i)
{
Rune rune;
while (i-- > 0) {
rune = *(unsigned char*)s;
if (rune < Runeself) {
if (rune == 0)
return NULL;
++s;
} else
s += chartorune(&rune, s);
}
return s;
}
int js_utfptrtoidx(const char *s, const char *p)
{
Rune rune;
int i = 0;
while (s < p) {
if (*(unsigned char *)s < Runeself)
++s;
else
s += chartorune(&rune, s);
++i;
}
return i;
}
static void jsB_new_String(js_State *J)
{
js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
}
static void jsB_String(js_State *J)
{
js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
}
static void Sp_toString(js_State *J)
{
js_Object *self = js_toobject(J, 0);
if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
js_pushliteral(J, self->u.s.string);
}
static void Sp_valueOf(js_State *J)
{
js_Object *self = js_toobject(J, 0);
if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
js_pushliteral(J, self->u.s.string);
}
static void Sp_charAt(js_State *J)
{
char buf[UTFmax + 1];
const char *s = checkstring(J, 0);
int pos = js_tointeger(J, 1);
Rune rune = js_runeat(J, s, pos);
if (rune >= 0) {
buf[runetochar(buf, &rune)] = 0;
js_pushstring(J, buf);
} else {
js_pushliteral(J, "");
}
}
static void Sp_charCodeAt(js_State *J)
{
const char *s = checkstring(J, 0);
int pos = js_tointeger(J, 1);
Rune rune = js_runeat(J, s, pos);
if (rune >= 0)
js_pushnumber(J, rune);
else
js_pushnumber(J, NAN);
}
static void Sp_concat(js_State *J)
{
int i, top = js_gettop(J);
int n;
char * volatile out = NULL;
const char *s;
if (top == 1)
return;
s = checkstring(J, 0);
n = strlen(s);
if (js_try(J)) {
js_free(J, out);
js_throw(J);
}
out = js_malloc(J, n + 1);
strcpy(out, s);
for (i = 1; i < top; ++i) {
s = js_tostring(J, i);
n += strlen(s);
out = js_realloc(J, out, n + 1);
strcat(out, s);
}
js_pushstring(J, out);
js_endtry(J);
js_free(J, out);
}
static void Sp_indexOf(js_State *J)
{
const char *haystack = checkstring(J, 0);
const char *needle = js_tostring(J, 1);
int pos = js_tointeger(J, 2);
int len = strlen(needle);
int k = 0;
Rune rune;
while (*haystack) {
if (k >= pos && !strncmp(haystack, needle, len)) {
js_pushnumber(J, k);
return;
}
haystack += chartorune(&rune, haystack);
++k;
}
js_pushnumber(J, -1);
}
static void Sp_lastIndexOf(js_State *J)
{
const char *haystack = checkstring(J, 0);
const char *needle = js_tostring(J, 1);
int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
int len = strlen(needle);
int k = 0, last = -1;
Rune rune;
while (*haystack && k <= pos) {
if (!strncmp(haystack, needle, len))
last = k;
haystack += chartorune(&rune, haystack);
++k;
}
js_pushnumber(J, last);
}
static void Sp_localeCompare(js_State *J)
{
const char *a = checkstring(J, 0);
const char *b = js_tostring(J, 1);
js_pushnumber(J, strcmp(a, b));
}
static void Sp_slice(js_State *J)
{
const char *str = checkstring(J, 0);
const char *ss, *ee;
int len = utflen(str);
int s = js_tointeger(J, 1);
int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
s = s < 0 ? s + len : s;
e = e < 0 ? e + len : e;
s = s < 0 ? 0 : s > len ? len : s;
e = e < 0 ? 0 : e > len ? len : e;
if (s < e) {
ss = js_utfidxtoptr(str, s);
ee = js_utfidxtoptr(ss, e - s);
} else {
ss = js_utfidxtoptr(str, e);
ee = js_utfidxtoptr(ss, s - e);
}
js_pushlstring(J, ss, ee - ss);
}
static void Sp_substring(js_State *J)
{
const char *str = checkstring(J, 0);
const char *ss, *ee;
int len = utflen(str);
int s = js_tointeger(J, 1);
int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
s = s < 0 ? 0 : s > len ? len : s;
e = e < 0 ? 0 : e > len ? len : e;
if (s < e) {
ss = js_utfidxtoptr(str, s);
ee = js_utfidxtoptr(ss, e - s);
} else {
ss = js_utfidxtoptr(str, e);
ee = js_utfidxtoptr(ss, s - e);
}
js_pushlstring(J, ss, ee - ss);
}
static void Sp_toLowerCase(js_State *J)
{
const char *s = checkstring(J, 0);
char * volatile dst = NULL;
char *d;
Rune rune;
if (js_try(J)) {
js_free(J, dst);
js_throw(J);
}
d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
while (*s) {
s += chartorune(&rune, s);
rune = tolowerrune(rune);
d += runetochar(d, &rune);
}
*d = 0;
js_pushstring(J, dst);
js_endtry(J);
js_free(J, dst);
}
static void Sp_toUpperCase(js_State *J)
{
const char *s = checkstring(J, 0);
char * volatile dst = NULL;
char *d;
Rune rune;
if (js_try(J)) {
js_free(J, dst);
js_throw(J);
}
d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
while (*s) {
s += chartorune(&rune, s);
rune = toupperrune(rune);
d += runetochar(d, &rune);
}
*d = 0;
js_pushstring(J, dst);
js_endtry(J);
js_free(J, dst);
}
static int istrim(int c)
{
return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}
static void Sp_trim(js_State *J)
{
const char *s, *e;
s = checkstring(J, 0);
while (istrim(*s))
++s;
e = s + strlen(s);
while (e > s && istrim(e[-1]))
--e;
js_pushlstring(J, s, e - s);
}
static void S_fromCharCode(js_State *J)
{
int i, top = js_gettop(J);
char * volatile s = NULL;
char *p;
Rune c;
if (js_try(J)) {
js_free(J, s);
js_throw(J);
}
s = p = js_malloc(J, (top-1) * UTFmax + 1);
for (i = 1; i < top; ++i) {
c = js_touint32(J, i);
p += runetochar(p, &c);
}
*p = 0;
js_pushstring(J, s);
js_endtry(J);
js_free(J, s);
}
static void Sp_match(js_State *J)
{
js_Regexp *re;
const char *text;
int len;
const char *a, *b, *c, *e;
Resub m;
text = checkstring(J, 0);
if (js_isregexp(J, 1))
js_copy(J, 1);
else if (js_isundefined(J, 1))
js_newregexp(J, "", 0);
else
js_newregexp(J, js_tostring(J, 1), 0);
re = js_toregexp(J, -1);
if (!(re->flags & JS_REGEXP_G)) {
js_RegExp_prototype_exec(J, re, text);
return;
}
re->last = 0;
js_newarray(J);
len = 0;
a = text;
e = text + strlen(text);
while (a <= e) {
if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
break;
b = m.sub[0].sp;
c = m.sub[0].ep;
js_pushlstring(J, b, c - b);
js_setindex(J, -2, len++);
a = c;
if (c - b == 0)
++a;
}
if (len == 0) {
js_pop(J, 1);
js_pushnull(J);
}
}
static void Sp_search(js_State *J)
{
js_Regexp *re;
const char *text;
Resub m;
text = checkstring(J, 0);
if (js_isregexp(J, 1))
js_copy(J, 1);
else if (js_isundefined(J, 1))
js_newregexp(J, "", 0);
else
js_newregexp(J, js_tostring(J, 1), 0);
re = js_toregexp(J, -1);
if (!js_doregexec(J, re->prog, text, &m, 0))
js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
else
js_pushnumber(J, -1);
}
static void Sp_replace_regexp(js_State *J)
{
js_Regexp *re;
const char *source, *s, *r;
js_Buffer *sb = NULL;
int n, x;
Resub m;
source = checkstring(J, 0);
re = js_toregexp(J, 1);
if (js_doregexec(J, re->prog, source, &m, 0)) {
js_copy(J, 0);
return;
}
re->last = 0;
loop:
s = m.sub[0].sp;
n = m.sub[0].ep - m.sub[0].sp;
if (js_iscallable(J, 2)) {
js_copy(J, 2);
js_pushundefined(J);
for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
js_pushnumber(J, s - source); /* arg x+2: offset within search string */
js_copy(J, 0); /* arg x+3: search string */
js_call(J, 2 + x);
r = js_tostring(J, -1);
js_putm(J, &sb, source, s);
js_puts(J, &sb, r);
js_pop(J, 1);
} else {
r = js_tostring(J, 2);
js_putm(J, &sb, source, s);
while (*r) {
if (*r == '$') {
switch (*(++r)) {
case 0: --r; /* end of string; back up */
/* fallthrough */
case '$': js_putc(J, &sb, '$'); break;
case '`': js_putm(J, &sb, source, s); break;
case '\'': js_puts(J, &sb, s + n); break;
case '&':
js_putm(J, &sb, s, s + n);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
x = *r - '0';
if (r[1] >= '0' && r[1] <= '9')
x = x * 10 + *(++r) - '0';
if (x > 0 && x < m.nsub) {
js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
} else {
js_putc(J, &sb, '$');
if (x > 10) {
js_putc(J, &sb, '0' + x / 10);
js_putc(J, &sb, '0' + x % 10);
} else {
js_putc(J, &sb, '0' + x);
}
}
break;
default:
js_putc(J, &sb, '$');
js_putc(J, &sb, *r);
break;
}
++r;
} else {
js_putc(J, &sb, *r++);
}
}
}
if (re->flags & JS_REGEXP_G) {
source = m.sub[0].ep;
if (n == 0) {
if (*source)
js_putc(J, &sb, *source++);
else
goto end;
}
if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
goto loop;
}
end:
js_puts(J, &sb, s + n);
js_putc(J, &sb, 0);
if (js_try(J)) {
js_free(J, sb);
js_throw(J);
}
js_pushstring(J, sb ? sb->s : "");
js_endtry(J);
js_free(J, sb);
}
static void Sp_replace_string(js_State *J)
{
const char *source, *needle, *s, *r;
js_Buffer *sb = NULL;
int n;
source = checkstring(J, 0);
needle = js_tostring(J, 1);
s = strstr(source, needle);
if (!s) {
js_copy(J, 0);
return;
}
n = strlen(needle);
if (js_iscallable(J, 2)) {
js_copy(J, 2);
js_pushundefined(J);
js_pushlstring(J, s, n); /* arg 1: substring that matched */
js_pushnumber(J, s - source); /* arg 2: offset within search string */
js_copy(J, 0); /* arg 3: search string */
js_call(J, 3);
r = js_tostring(J, -1);
js_putm(J, &sb, source, s);
js_puts(J, &sb, r);
js_puts(J, &sb, s + n);
js_putc(J, &sb, 0);
js_pop(J, 1);
} else {
r = js_tostring(J, 2);
js_putm(J, &sb, source, s);
while (*r) {
if (*r == '$') {
switch (*(++r)) {
case 0: --r; /* end of string; back up */
/* fallthrough */
case '$': js_putc(J, &sb, '$'); break;
case '&': js_putm(J, &sb, s, s + n); break;
case '`': js_putm(J, &sb, source, s); break;
case '\'': js_puts(J, &sb, s + n); break;
default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
}
++r;
} else {
js_putc(J, &sb, *r++);
}
}
js_puts(J, &sb, s + n);
js_putc(J, &sb, 0);
}
if (js_try(J)) {
js_free(J, sb);
js_throw(J);
}
js_pushstring(J, sb ? sb->s : "");
js_endtry(J);
js_free(J, sb);
}
static void Sp_replace(js_State *J)
{
if (js_isregexp(J, 1))
Sp_replace_regexp(J);
else
Sp_replace_string(J);
}
static void Sp_split_regexp(js_State *J)
{
js_Regexp *re;
const char *text;
int limit, len, k;
const char *p, *a, *b, *c, *e;
Resub m;
text = checkstring(J, 0);
re = js_toregexp(J, 1);
limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
js_newarray(J);
len = 0;
e = text + strlen(text);
/* splitting the empty string */
if (e == text) {
if (js_doregexec(J, re->prog, text, &m, 0)) {
if (len == limit) return;
js_pushliteral(J, "");
js_setindex(J, -2, 0);
}
return;
}
p = a = text;
while (a < e) {
if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
break; /* no match */
b = m.sub[0].sp;
c = m.sub[0].ep;
/* empty string at end of last match */
if (b == p) {
++a;
continue;
}
if (len == limit) return;
js_pushlstring(J, p, b - p);
js_setindex(J, -2, len++);
for (k = 1; k < m.nsub; ++k) {
if (len == limit) return;
js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
js_setindex(J, -2, len++);
}
a = p = c;
}
if (len == limit) return;
js_pushstring(J, p);
js_setindex(J, -2, len);
}
static void Sp_split_string(js_State *J)
{
const char *str = checkstring(J, 0);
const char *sep = js_tostring(J, 1);
int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
int i, n;
js_newarray(J);
n = strlen(sep);
/* empty string */
if (n == 0) {
Rune rune;
for (i = 0; *str && i < limit; ++i) {
n = chartorune(&rune, str);
js_pushlstring(J, str, n);
js_setindex(J, -2, i);
str += n;
}
return;
}
for (i = 0; str && i < limit; ++i) {
const char *s = strstr(str, sep);
if (s) {
js_pushlstring(J, str, s-str);
js_setindex(J, -2, i);
str = s + n;
} else {
js_pushstring(J, str);
js_setindex(J, -2, i);
str = NULL;
}
}
}
static void Sp_split(js_State *J)
{
if (js_isundefined(J, 1)) {
js_newarray(J);
js_pushstring(J, js_tostring(J, 0));
js_setindex(J, -2, 0);
} else if (js_isregexp(J, 1)) {
Sp_split_regexp(J);
} else {
Sp_split_string(J);
}
}
void jsB_initstring(js_State *J)
{
J->String_prototype->u.s.string = "";
J->String_prototype->u.s.length = 0;
js_pushobject(J, J->String_prototype);
{
jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
jsB_propf(J, "String.prototype.match", Sp_match, 1);
jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
jsB_propf(J, "String.prototype.search", Sp_search, 1);
jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
jsB_propf(J, "String.prototype.split", Sp_split, 2);
jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);
/* ES5 */
jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
}
js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
{
jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
}
js_defglobal(J, "String", JS_DONTENUM);
}