From adbbeec67255f04049dbf28a3fe7573e8248f97d Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Sat, 28 Dec 2013 18:22:09 +0100 Subject: [PATCH] String interning table. --- Makefile | 2 +- js-string.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++ js.h | 7 ++++ 3 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 js-string.c diff --git a/Makefile b/Makefile index 9e836d1..ded872f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -SRCS := js-state.c js-load.c js-lex.c js-parse.c +SRCS := js-state.c js-string.c js-load.c js-lex.c js-parse.c HDRS := js.h js-parse.h OBJS := $(SRCS:%.c=build/%.o) diff --git a/js-string.c b/js-string.c new file mode 100644 index 0000000..b6d61a6 --- /dev/null +++ b/js-string.c @@ -0,0 +1,107 @@ +#include "js.h" + +/* Use an AA-tree to quickly look up interned strings. */ + +struct js_StringNode +{ + const char *string; + js_StringNode *left, *right; + int level; +}; + +static js_StringNode sentinel = { "", &sentinel, &sentinel, 0 }; + +static js_StringNode *makestringnode(const char *string, const char **out) +{ + js_StringNode *node = malloc(sizeof(js_StringNode)); + node->string = *out = strdup(string); + node->left = node->right = &sentinel; + node->level = 1; + return node; +} + +static const char *lookup(js_StringNode *node, const char *string) +{ + if (node && node != &sentinel) { + int c = strcmp(string, node->string); + if (c == 0) + return node->string; + else if (c < 0) + return lookup(node->left, string); + else + return lookup(node->right, string); + } + return NULL; +} + +static js_StringNode *skew(js_StringNode *node) +{ + if (node->level != 0) { + if (node->left->level == node->level) { + js_StringNode *save = node; + node = node->left; + save->left = node->right; + node->right = save; + } + node->right = skew(node->right); + } + return node; +} + +static js_StringNode *split(js_StringNode *node) +{ + if (node->level != 0 && node->right->right->level == node->level) { + js_StringNode *save = node; + node = node->right; + save->right = node->left; + node->left = save; + node->level++; + node->right = split(node->right); + } + return node; +} + +static js_StringNode *insert(js_StringNode *node, const char *string, const char **out) +{ + if (node && node != &sentinel) { + int c = strcmp(string, node->string); + if (c < 0) + node->left = insert(node->left, string, out); + else + node->right = insert(node->right, string, out); + node = skew(node); + node = split(node); + return node; + } else { + return makestringnode(string, out); + } +} + +static void printstringnode(js_StringNode *node, int level) +{ + int i; + if (node->left != &sentinel) + printstringnode(node->left, level + 1); + for (i = 0; i < level; i++) + putchar(' '); + printf("'%s' (%d)\n", node->string, node->level); + if (node->right != &sentinel) + printstringnode(node->right, level + 1); +} + +void js_printstringtree(js_State *J) +{ + js_StringNode *root = J->strings; + printf("--- string dump ---\n"); + if (root && root != &sentinel) + printstringnode(root, 0); + printf("---\n"); +} + +const char *js_intern(js_State *J, const char *s) +{ + const char *a = lookup(J->strings, s); + if (!a) + J->strings = insert(J->strings, s, &a); + return a; +} diff --git a/js.h b/js.h index 6e33654..b2a85b2 100644 --- a/js.h +++ b/js.h @@ -9,6 +9,7 @@ #include typedef struct js_State js_State; +typedef struct js_StringNode js_StringNode; typedef int (*js_CFunction)(js_State *J); @@ -20,12 +21,16 @@ int js_error(js_State *J, const char *fmt, ...); int js_loadstring(js_State *J, const char *s); int js_loadfile(js_State *J, const char *filename); +const char *js_intern(js_State *J, const char *s); + /* private */ void jsP_initlex(js_State *J, const char *source); int jsP_lex(js_State *J); int jsP_parse(js_State *J); +void js_printstringtree(js_State *J); + struct js_State { const char *yysource; @@ -37,6 +42,8 @@ struct js_State int lasttoken; int newline; int strict; + + js_StringNode *strings; }; #endif