Implement inline packed symbols.

Small symbols with all alphanumeric characters (<5) are packed in 32bit
symbol integer a la base64. This means those small symbols are not
listed in `Symbol.all_symbols`.
parent 3a95bf0d
......@@ -242,6 +242,9 @@ typedef struct mrb_state {
struct symbol_name *symtbl; /* symbol table */
mrb_sym symhash[256];
size_t symcapa;
#ifndef MRB_ENABLE_SYMBOLL_ALL
char symbuf[8]; /* buffer for small symbol names */
#endif
#ifdef MRB_ENABLE_DEBUG_HOOK
void (*code_fetch_hook)(struct mrb_state* mrb, struct mrb_irep *irep, mrb_code *pc, mrb_value *regs);
......
......@@ -30,7 +30,8 @@ mrb_sym_all_symbols(mrb_state *mrb, mrb_value self)
mrb_value ary = mrb_ary_new_capa(mrb, mrb->symidx);
for (i=1, lim=mrb->symidx+1; i<lim; i++) {
mrb_ary_push(mrb, ary, mrb_symbol_value(i));
mrb_sym sym = i<<1;
mrb_ary_push(mrb, ary, mrb_symbol_value(sym));
}
return ary;
......
......@@ -28,6 +28,79 @@ sym_validate_len(mrb_state *mrb, size_t len)
}
}
static char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
static mrb_sym
sym_inline_pack(const char *name, uint16_t len)
{
char c;
char *p;
int i;
mrb_sym sym = 0;
int lower = 1;
if (len > 6) return 0; /* too long */
for (i=0; i<len; i++) {
uint32_t bits;
c = name[i];
if (c == 0) return 0; /* NUL in name */
p = strchr(pack_table, (int)c);
if (p == 0) return 0; /* non alnum char */
bits = (uint32_t)(p - pack_table)+1;
if (bits > 27) lower = 0;
sym |= bits<<(i*6+2);
}
if (lower) {
sym = 0;
for (i=0; i<len; i++) {
uint32_t bits;
c = name[i];
p = strchr(pack_table, (int)c);
bits = (uint32_t)(p - pack_table)+1;
sym |= bits<<(i*5+2);
}
return sym | 3;
}
if (len == 6) return 0;
return sym | 1;
}
static const char*
sym_inline_unpack(mrb_sym sym, char *buf)
{
int i;
if (sym == 0) return NULL;
if ((sym&1) == 0) return NULL; /* need to be inline sym */
if (sym&2) { /* all lower case (5bits/char) */
for (i=0; i<6; i++) {
uint32_t bits;
char c;
bits = sym>>(i*5+2) & 31;
if (bits == 0) break;
c = pack_table[bits-1];
buf[i] = c;
}
buf[i] = '\0';
return buf;
}
for (i=0; i<5; i++) {
uint32_t bits;
char c;
bits = sym>>(i*6+2) & 63;
if (bits == 0) break;
c = pack_table[bits-1];
buf[i] = c;
}
buf[i] = '\0';
return buf;
}
uint8_t
symhash(const char *key, size_t len)
{
......@@ -50,19 +123,23 @@ find_symbol(mrb_state *mrb, const char *name, uint16_t len, uint8_t hash)
mrb_sym i;
symbol_name *sname;
/* inline symbol */
i = sym_inline_pack(name, len);
if (i > 0) return i;
i = mrb->symhash[hash];
if (i == 0) return 0;
do {
sname = &mrb->symtbl[i];
if (sname->len == len && memcmp(sname->name, name, len) == 0) {
return i;
return i<<1;
}
if (sname->prev == 0xff) {
i -= 0xff;
sname = &mrb->symtbl[i];
while (mrb->symtbl < sname) {
if (sname->len == len && memcmp(sname->name, name, len) == 0) {
return (mrb_sym)(sname - mrb->symtbl);
return (mrb_sym)(sname - mrb->symtbl)<<1;
}
sname--;
}
......@@ -117,7 +194,7 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
}
mrb->symhash[hash] = sym;
return sym;
return sym<<1;
}
MRB_API mrb_sym
......@@ -170,6 +247,13 @@ mrb_check_intern_str(mrb_state *mrb, mrb_value str)
MRB_API const char*
mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
{
if (sym & 1) { /* inline packed symbol */
sym_inline_unpack(sym, mrb->symbuf);
if (lenp) *lenp = strlen(mrb->symbuf);
return mrb->symbuf;
}
sym >>= 1;
if (sym == 0 || mrb->symidx < sym) {
if (lenp) *lenp = 0;
return NULL;
......@@ -446,6 +530,9 @@ mrb_sym2str(mrb_state *mrb, mrb_sym sym)
const char *name = mrb_sym2name_len(mrb, sym, &len);
if (!name) return mrb_undef_value(); /* can't happen */
if (sym&1) { /* inline symbol */
return mrb_str_new(mrb, name, len);
}
return mrb_str_new_static(mrb, name, len);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment