Abandon packing all lower case symbols with 6 characters.

To make packed inline symbols within 31 bits, because the new method
hash tables allows only 31 bits of symbols. They use top 1 bit to maek
unused slots.
parent 99ef00fe
...@@ -73,20 +73,15 @@ typedef struct symbol_name { ...@@ -73,20 +73,15 @@ typedef struct symbol_name {
const char *name; const char *name;
} symbol_name; } symbol_name;
#define SYMBOL_INLINE_BIT_POS 1
#define SYMBOL_INLINE_LOWER_BIT_POS 2
#define SYMBOL_INLINE (1 << (SYMBOL_INLINE_BIT_POS - 1))
#define SYMBOL_INLINE_LOWER (1 << (SYMBOL_INLINE_LOWER_BIT_POS - 1))
#define SYMBOL_NORMAL_SHIFT SYMBOL_INLINE_BIT_POS
#define SYMBOL_INLINE_SHIFT SYMBOL_INLINE_LOWER_BIT_POS
#ifdef MRB_ENABLE_ALL_SYMBOLS #ifdef MRB_ENABLE_ALL_SYMBOLS
#define SYMBOL_SHIFT 0
# define SYMBOL_INLINE_P(sym) FALSE # define SYMBOL_INLINE_P(sym) FALSE
# define SYMBOL_INLINE_LOWER_P(sym) FALSE
# define sym_inline_pack(name, len) 0 # define sym_inline_pack(name, len) 0
# define sym_inline_unpack(sym, buf, lenp) NULL # define sym_inline_unpack(sym, buf, lenp) NULL
#else #else
#define SYMBOL_INLINE 1
#define SYMBOL_SHIFT 1
# define SYMBOL_INLINE_P(sym) ((sym) & SYMBOL_INLINE) # define SYMBOL_INLINE_P(sym) ((sym) & SYMBOL_INLINE)
# define SYMBOL_INLINE_LOWER_P(sym) ((sym) & SYMBOL_INLINE_LOWER)
#endif #endif
static void static void
...@@ -103,16 +98,14 @@ static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS ...@@ -103,16 +98,14 @@ static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS
static mrb_sym static mrb_sym
sym_inline_pack(const char *name, size_t len) sym_inline_pack(const char *name, size_t len)
{ {
const size_t lower_length_max = (MRB_SYMBOL_BIT - 2) / 5; const size_t pack_length_max = (MRB_SYMBOL_BIT - 2) / 6;
const size_t mix_length_max = (MRB_SYMBOL_BIT - 2) / 6;
char c; char c;
const char *p; const char *p;
size_t i; size_t i;
mrb_sym sym = 0; mrb_sym sym = 0;
mrb_bool lower = TRUE;
if (len > lower_length_max) return 0; /* too long */ if (len > pack_length_max) return 0; /* too long */
for (i=0; i<len; i++) { for (i=0; i<len; i++) {
uint32_t bits; uint32_t bits;
...@@ -121,36 +114,21 @@ sym_inline_pack(const char *name, size_t len) ...@@ -121,36 +114,21 @@ sym_inline_pack(const char *name, size_t len)
p = strchr(pack_table, (int)c); p = strchr(pack_table, (int)c);
if (p == 0) return 0; /* non alnum char */ if (p == 0) return 0; /* non alnum char */
bits = (uint32_t)(p - pack_table)+1; bits = (uint32_t)(p - pack_table)+1;
if (bits > 27) lower = FALSE; if (i >= pack_length_max) break;
if (i >= mix_length_max) break; sym |= bits<<(i*6+SYMBOL_SHIFT);
sym |= bits<<(i*6+SYMBOL_INLINE_SHIFT);
} }
if (lower) {
sym = 0;
for (i=0; i<len; i++) {
uint32_t bits;
c = name[i];
p = strchr(pack_table, (int)c);
bits = (uint32_t)(p - pack_table)+1;
sym |= bits<<(i*5+SYMBOL_INLINE_SHIFT);
}
return sym | SYMBOL_INLINE | SYMBOL_INLINE_LOWER;
}
if (len > mix_length_max) return 0;
return sym | SYMBOL_INLINE; return sym | SYMBOL_INLINE;
} }
static const char* static const char*
sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp) sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp)
{ {
int bit_per_char = SYMBOL_INLINE_LOWER_P(sym) ? 5 : 6;
int i; int i;
mrb_assert(SYMBOL_INLINE_P(sym)); mrb_assert(SYMBOL_INLINE_P(sym));
for (i=0; i<30/bit_per_char; i++) { for (i=0; i<5; i++) {
uint32_t bits = sym>>(i*bit_per_char+SYMBOL_INLINE_SHIFT) & ((1<<bit_per_char)-1); uint32_t bits = sym>>(i*6+SYMBOL_SHIFT) & ((1<<6)-1);
if (bits == 0) break; if (bits == 0) break;
buf[i] = pack_table[bits-1];; buf[i] = pack_table[bits-1];;
} }
...@@ -185,7 +163,7 @@ find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp) ...@@ -185,7 +163,7 @@ find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp)
/* presym */ /* presym */
i = presym_find(name, len); i = presym_find(name, len);
if (i > 0) return i<<SYMBOL_NORMAL_SHIFT; if (i > 0) return i<<SYMBOL_SHIFT;
/* inline symbol */ /* inline symbol */
i = sym_inline_pack(name, len); i = sym_inline_pack(name, len);
...@@ -199,14 +177,14 @@ find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp) ...@@ -199,14 +177,14 @@ find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp)
do { do {
sname = &mrb->symtbl[i]; sname = &mrb->symtbl[i];
if (sname->len == len && memcmp(sname->name, name, len) == 0) { if (sname->len == len && memcmp(sname->name, name, len) == 0) {
return (i+MRB_PRESYM_MAX)<<SYMBOL_NORMAL_SHIFT; return (i+MRB_PRESYM_MAX)<<SYMBOL_SHIFT;
} }
if (sname->prev == 0xff) { if (sname->prev == 0xff) {
i -= 0xff; i -= 0xff;
sname = &mrb->symtbl[i]; sname = &mrb->symtbl[i];
while (mrb->symtbl < sname) { while (mrb->symtbl < sname) {
if (sname->len == len && memcmp(sname->name, name, len) == 0) { if (sname->len == len && memcmp(sname->name, name, len) == 0) {
return (mrb_sym)(sname - mrb->symtbl)<<SYMBOL_NORMAL_SHIFT; return (mrb_sym)(sname - mrb->symtbl)<<SYMBOL_SHIFT;
} }
sname--; sname--;
} }
...@@ -262,7 +240,7 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit) ...@@ -262,7 +240,7 @@ sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
} }
mrb->symhash[hash] = mrb->symidx = sym; mrb->symhash[hash] = mrb->symidx = sym;
return (sym+MRB_PRESYM_MAX)<<SYMBOL_NORMAL_SHIFT; return (sym+MRB_PRESYM_MAX)<<SYMBOL_SHIFT;
} }
MRB_API mrb_sym MRB_API mrb_sym
...@@ -317,7 +295,7 @@ sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp) ...@@ -317,7 +295,7 @@ sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp)
{ {
if (SYMBOL_INLINE_P(sym)) return sym_inline_unpack(sym, buf, lenp); if (SYMBOL_INLINE_P(sym)) return sym_inline_unpack(sym, buf, lenp);
sym >>= SYMBOL_NORMAL_SHIFT; sym >>= SYMBOL_SHIFT;
{ {
const char *name = presym_sym2name(sym, lenp); const char *name = presym_sym2name(sym, lenp);
if (name) return name; if (name) return name;
...@@ -343,7 +321,7 @@ mrb_bool ...@@ -343,7 +321,7 @@ mrb_bool
mrb_sym_static_p(mrb_state *mrb, mrb_sym sym) mrb_sym_static_p(mrb_state *mrb, mrb_sym sym)
{ {
if (SYMBOL_INLINE_P(sym)) return TRUE; if (SYMBOL_INLINE_P(sym)) return TRUE;
sym >>= SYMBOL_NORMAL_SHIFT; sym >>= SYMBOL_SHIFT;
if (sym > MRB_PRESYM_MAX) return FALSE; if (sym > MRB_PRESYM_MAX) return FALSE;
return TRUE; return TRUE;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment