Add index to larger segment lists for performance

parent 8ffd4e47
# coding: cp932
class Array class Array
## ##
# call-seq: # call-seq:
...@@ -41,26 +42,19 @@ class Array ...@@ -41,26 +42,19 @@ class Array
# c.uniq! { |s| s.first } # => [["student", "sam"], ["teacher", "matz"]] # c.uniq! { |s| s.first } # => [["student", "sam"], ["teacher", "matz"]]
# #
def uniq!(&block) def uniq!(&block)
if block
hash = {} hash = {}
if block
self.each do |val| self.each do |val|
key = block.call(val) key = block.call(val)
hash[key] = val unless hash.key?(key) hash[key] = val unless hash.key?(key)
end end
result = hash.values result = hash.values
elsif self.size > 20 else
hash = {} hash = {}
self.each do |val| self.each do |val|
hash[val] = val hash[val] = val
end end
result = hash.values result = hash.keys
else
ary = self.dup
result = []
while ary.size > 0
result << ary.shift
ary.delete(result.last)
end
end end
if result.size == self.size if result.size == self.size
nil nil
......
...@@ -334,7 +334,7 @@ class Hash ...@@ -334,7 +334,7 @@ class Hash
# h["AA"] #=> "b" # h["AA"] #=> "b"
# #
def rehash def rehash
self.size # do nothing (for now)
self self
end end
end end
......
...@@ -16,14 +16,48 @@ ...@@ -16,14 +16,48 @@
mrb_int mrb_float_id(mrb_float f); mrb_int mrb_float_id(mrb_float f);
#endif #endif
static inline khint_t /* return non zero to break the loop */
mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key, mrb_value val, void *data);
#ifndef MRB_SG_SEGMENT_SIZE
#define MRB_SG_SEGMENT_SIZE 5
#endif
struct segkv {
mrb_value key;
mrb_value val;
};
typedef struct segment {
struct segment *next;
struct segkv e[MRB_SG_SEGMENT_SIZE];
} segment;
typedef struct segindex {
size_t size;
size_t capa;
struct segkv *table[];
} segindex;
/* Instance variable table structure */
typedef struct seglist {
segment *rootseg;
segment *lastseg;
mrb_int size;
mrb_int last_len;
segindex *index;
} seglist;
static /* inline */ size_t
sg_hash_func(mrb_state *mrb, seglist *t, mrb_value key)
{ {
enum mrb_vtype t = mrb_type(key); enum mrb_vtype tt = mrb_type(key);
mrb_value hv; mrb_value hv;
khint_t h; size_t h;
segindex *index = t->index;
size_t capa = index ? index->capa : 0;
switch (t) { switch (tt) {
case MRB_TT_STRING: case MRB_TT_STRING:
h = mrb_str_hash(mrb, key); h = mrb_str_hash(mrb, key);
break; break;
...@@ -35,23 +69,26 @@ mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key) ...@@ -35,23 +69,26 @@ mrb_hash_ht_hash_func(mrb_state *mrb, mrb_value key)
#ifndef MRB_WITHOUT_FLOAT #ifndef MRB_WITHOUT_FLOAT
case MRB_TT_FLOAT: case MRB_TT_FLOAT:
#endif #endif
h = (khint_t)mrb_obj_id(key); h = (size_t)mrb_obj_id(key);
break; break;
default: default:
hv = mrb_funcall(mrb, key, "hash", 0); hv = mrb_funcall(mrb, key, "hash", 0);
h = (khint_t)t ^ (khint_t)mrb_fixnum(hv); h = (size_t)t ^ (size_t)mrb_fixnum(hv);
break; break;
} }
return kh_int_hash_func(mrb, h); if (index && (index != t->index || capa != index->capa)) {
mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified");
}
return ((h)^((h)<<2)^((h)>>2));
} }
static inline mrb_bool static inline mrb_bool
mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) sg_hash_equal(mrb_state *mrb, seglist *t, mrb_value a, mrb_value b)
{ {
enum mrb_vtype t = mrb_type(a); enum mrb_vtype tt = mrb_type(a);
switch (t) { switch (tt) {
case MRB_TT_STRING: case MRB_TT_STRING:
return mrb_str_equal(mrb, a, b); return mrb_str_equal(mrb, a, b);
...@@ -84,32 +121,18 @@ mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b) ...@@ -84,32 +121,18 @@ mrb_hash_ht_hash_equal(mrb_state *mrb, mrb_value a, mrb_value b)
#endif #endif
default: default:
return mrb_eql(mrb, a, b); {
segindex *index = t->index;
size_t capa = index ? index->capa : 0;
mrb_bool eql = mrb_eql(mrb, a, b);
if (index && (index != t->index || capa != index->capa)) {
mrb_raise(mrb, E_RUNTIME_ERROR, "hash modified");
}
return eql;
}
} }
} }
/* return non zero to break the loop */
typedef int (sg_foreach_func)(mrb_state *mrb,mrb_value key,mrb_value val, void *data);
#ifndef MRB_SG_SEGMENT_SIZE
#define MRB_SG_SEGMENT_SIZE 5
#endif
typedef struct segment {
struct segment *next;
struct {
mrb_value key;
mrb_value val;
} e[MRB_SG_SEGMENT_SIZE];
} segment;
/* Instance variable table structure */
typedef struct seglist {
segment *rootseg;
mrb_int size;
mrb_int last_len;
} seglist;
/* Creates the instance variable table. */ /* Creates the instance variable table. */
static seglist* static seglist*
sg_new(mrb_state *mrb) sg_new(mrb_state *mrb)
...@@ -119,87 +142,87 @@ sg_new(mrb_state *mrb) ...@@ -119,87 +142,87 @@ sg_new(mrb_state *mrb)
t = (seglist*)mrb_malloc(mrb, sizeof(seglist)); t = (seglist*)mrb_malloc(mrb, sizeof(seglist));
t->size = 0; t->size = 0;
t->rootseg = NULL; t->rootseg = NULL;
t->lastseg = NULL;
t->last_len = 0; t->last_len = 0;
t->index = NULL;
return t; return t;
} }
/* Set the value for the symbol in the instance variable table. */ #define power2(v) do { \
v--;\
v |= v >> 1;\
v |= v >> 2;\
v |= v >> 4;\
v |= v >> 8;\
v |= v >> 16;\
v++;\
} while (0)
#ifndef UPPER_BOUND
#define UPPER_BOUND(x) ((x)>>2|(x)>>1)
#endif
#define SG_MASK(index) ((index->capa)-1)
/* Build index for the segment list */
static void static void
sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val) sg_index(mrb_state *mrb, seglist *t)
{ {
size_t size = (size_t)t->size;
size_t mask;
segindex *index = t->index;
segment *seg; segment *seg;
segment *prev = NULL; size_t i;
mrb_int i;
if (t == NULL) return; if (size < MRB_SG_SEGMENT_SIZE) {
seg = t->rootseg; if (index) {
while (seg) { failed:
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { mrb_free(mrb, index);
mrb_value k = seg->e[i].key; t->index = NULL;
/* Found room in last segment after last_len */
if (!seg->next && i >= t->last_len) {
seg->e[i].key = key;
seg->e[i].val = val;
t->last_len = i+1;
if (t->size >= 0) t->size++;
return;
} }
if (mrb_undef_p(k)) continue;
if (mrb_hash_ht_hash_equal(mrb, k, key)) {
seg->e[i].val = val;
return; return;
} }
/* allocate index table */
if (index && index->size >= UPPER_BOUND(index->capa)) {
size = index->capa+1;
} }
prev = seg; power2(size);
seg = seg->next; if (!index || index->capa < size) {
} index = (segindex*)mrb_realloc_simple(mrb, index, sizeof(segindex)+sizeof(struct segkv*)*size);
if (index == NULL) goto failed;
/* Not found */ t->index = index;
if (t->size >= 0) t->size++;
seg = (segment*)mrb_malloc(mrb, sizeof(segment));
if (!seg) return;
seg->next = NULL;
seg->e[0].key = key;
seg->e[0].val = val;
t->last_len = 1;
if (prev) {
prev->next = seg;
} }
else { index->size = t->size;
t->rootseg = seg; index->capa = size;
for (i=0; i<size; i++) {
index->table[i] = NULL;
} }
}
/* Get a value for a symbol from the instance variable table. */
static mrb_bool
sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp)
{
segment *seg;
mrb_int i;
if (t == NULL) return FALSE; /* rebuld index */
mask = SG_MASK(index);
seg = t->rootseg; seg = t->rootseg;
while (seg) { while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
mrb_value k = seg->e[i].key; mrb_value key;
size_t k, step = 0;
if (!seg->next && i >= t->last_len) { if (!seg->next && i >= (size_t)t->last_len) {
return FALSE; return;
} }
if (mrb_undef_p(k)) continue; key = seg->e[i].key;
if (mrb_hash_ht_hash_equal(mrb, k, key)) { if (mrb_undef_p(key)) continue;
if (vp) *vp = seg->e[i].val; k = sg_hash_func(mrb, t, key) & mask;
return TRUE; while (index->table[k]) {
k = (k+(++step)) & mask;
} }
index->table[k] = &seg->e[i];
} }
seg = seg->next; seg = seg->next;
} }
return FALSE;
} }
/* Compacts the hash removing delete entries. */ /* Compacts the segment list removing deleted entries. */
static void static void
sg_compact(mrb_state *mrb, seglist *t) sg_compact(mrb_state *mrb, seglist *t)
{ {
...@@ -209,6 +232,10 @@ sg_compact(mrb_state *mrb, seglist *t) ...@@ -209,6 +232,10 @@ sg_compact(mrb_state *mrb, seglist *t)
mrb_int i2; mrb_int i2;
mrb_int size = 0; mrb_int size = 0;
if (t->index && (size_t)t->size == t->index->size) {
sg_index(mrb, t);
return;
}
while (seg) { while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
mrb_value k = seg->e[i].key; mrb_value k = seg->e[i].key;
...@@ -238,16 +265,179 @@ sg_compact(mrb_state *mrb, seglist *t) ...@@ -238,16 +265,179 @@ sg_compact(mrb_state *mrb, seglist *t)
exit: exit:
/* reached at end */ /* reached at end */
t->size = size; t->size = size;
t->last_len = i2; if (seg2) {
if (seg != seg2) {
seg = seg2->next; seg = seg2->next;
seg2->next = NULL; seg2->next = NULL;
t->last_len = i2;
t->lastseg = seg2;
while (seg) { while (seg) {
seg2 = seg->next; seg2 = seg->next;
mrb_free(mrb, seg); mrb_free(mrb, seg);
seg = seg2; seg = seg2;
} }
} }
if (t->index) {
sg_index(mrb, t);
}
}
/* Set the value for the key in the indexed segment list. */
static void
sg_index_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val)
{
segindex *index = t->index;
size_t k, sp, step = 0, mask;
segment *seg;
if (index->size >= UPPER_BOUND(index->capa)) {
/* need to expand table */
sg_compact(mrb, t);
index = t->index;
}
mask = SG_MASK(index);
sp = index->capa;
k = sg_hash_func(mrb, t, key) & mask;
while (index->table[k]) {
mrb_value key2 = index->table[k]->key;
if (mrb_undef_p(key2)) {
if (sp == index->capa) sp = k;
}
else if (sg_hash_equal(mrb, t, key, key2)) {
index->table[k]->val = val;
return;
}
k = (k+(++step)) & mask;
}
if (sp < index->capa) {
k = sp;
}
/* put the value at the last */
seg = t->lastseg;
if (t->last_len < MRB_SG_SEGMENT_SIZE) {
index->table[k] = &seg->e[t->last_len++];
}
else { /* append a new segment */
seg->next = (segment*)mrb_malloc(mrb, sizeof(segment));
seg = seg->next;
seg->next = NULL;
t->lastseg = seg;
t->last_len = 1;
index->table[k] = &seg->e[0];
}
index->table[k]->key = key;
index->table[k]->val = val;
index->size++;
t->size++;
}
/* Set the value for the key in the segment list. */
static void
sg_put(mrb_state *mrb, seglist *t, mrb_value key, mrb_value val)
{
segment *seg;
mrb_int i, deleted = 0;
if (t == NULL) return;
if (t->index) {
sg_index_put(mrb, t, key, val);
return;
}
seg = t->rootseg;
while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
mrb_value k = seg->e[i].key;
/* Found room in last segment after last_len */
if (!seg->next && i >= t->last_len) {
seg->e[i].key = key;
seg->e[i].val = val;
t->last_len = i+1;
t->size++;
return;
}
if (mrb_undef_p(k)) {
deleted++;
continue;
}
if (sg_hash_equal(mrb, t, k, key)) {
seg->e[i].val = val;
return;
}
}
seg = seg->next;
}
/* Not found */
if (deleted > MRB_SG_SEGMENT_SIZE) {
sg_compact(mrb, t);
}
t->size++;
seg = (segment*)mrb_malloc(mrb, sizeof(segment));
seg->next = NULL;
seg->e[0].key = key;
seg->e[0].val = val;
t->last_len = 1;
if (t->rootseg == NULL) {
t->rootseg = seg;
}
else {
t->lastseg->next = seg;
}
t->lastseg = seg;
if (t->index == NULL && t->size > MRB_SG_SEGMENT_SIZE*4) {
sg_index(mrb, t);
}
}
/* Get a value for a key from the indexed segment list. */
static mrb_bool
sg_index_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp)
{
segindex *index = t->index;
size_t mask = SG_MASK(index);
size_t k = sg_hash_func(mrb, t, key) & mask;
size_t step = 0;
while (index->table[k]) {
if (sg_hash_equal(mrb, t, key, index->table[k]->key)) {
if (vp) *vp = index->table[k]->val;
return TRUE;
}
k = (k+(++step)) & mask;
}
return FALSE;
}
/* Get a value for a key from the segment list. */
static mrb_bool
sg_get(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp)
{
segment *seg;
mrb_int i;
if (t == NULL) return FALSE;
if (t->index) {
return sg_index_get(mrb, t, key, vp);
}
seg = t->rootseg;
while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
mrb_value k = seg->e[i].key;
if (!seg->next && i >= t->last_len) {
return FALSE;
}
if (mrb_undef_p(k)) continue;
if (sg_hash_equal(mrb, t, k, key)) {
if (vp) *vp = seg->e[i].val;
return TRUE;
}
}
seg = seg->next;
}
return FALSE;
} }
/* Deletes the value for the symbol from the instance variable table. */ /* Deletes the value for the symbol from the instance variable table. */
...@@ -262,18 +452,17 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp) ...@@ -262,18 +452,17 @@ sg_del(mrb_state *mrb, seglist *t, mrb_value key, mrb_value *vp)
seg = t->rootseg; seg = t->rootseg;
while (seg) { while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
mrb_value k = seg->e[i].key; mrb_value key2;
if (!seg->next && i >= t->last_len) { if (!seg->next && i >= t->last_len) {
/* not found */ /* not found */
return FALSE; return FALSE;
} }
if (mrb_undef_p(k)) continue; key2 = seg->e[i].key;
if (mrb_hash_ht_hash_equal(mrb, k, key)) { if (!mrb_undef_p(key2) && sg_hash_equal(mrb, t, key, key2)) {
if (vp) *vp = k; if (vp) *vp = key2;
seg->e[i].key = mrb_undef_value(); seg->e[i].key = mrb_undef_value();
if (t->size > 0) t->size = -1; t->size--;
else t->size--; /* count number of deleted */
return TRUE; return TRUE;
} }
} }
...@@ -290,6 +479,9 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p) ...@@ -290,6 +479,9 @@ sg_foreach(mrb_state *mrb, seglist *t, sg_foreach_func *func, void *p)
mrb_int i; mrb_int i;
if (t == NULL) return; if (t == NULL) return;
if (t->index && t->index->size-(size_t)t->size > MRB_SG_SEGMENT_SIZE) {
sg_compact(mrb, t);
}
seg = t->rootseg; seg = t->rootseg;
while (seg) { while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) { for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
...@@ -310,9 +502,6 @@ static mrb_int ...@@ -310,9 +502,6 @@ static mrb_int
sg_size(mrb_state *mrb, seglist *t) sg_size(mrb_state *mrb, seglist *t)
{ {
if (t == NULL) return 0; if (t == NULL) return 0;
if (t->size < 0) {
sg_compact(mrb, t);
}
return t->size; return t->size;
} }
...@@ -355,13 +544,14 @@ sg_free(mrb_state *mrb, seglist *t) ...@@ -355,13 +544,14 @@ sg_free(mrb_state *mrb, seglist *t)
seg = seg->next; seg = seg->next;
mrb_free(mrb, p); mrb_free(mrb, p);
} }
if (t->index) mrb_free(mrb, t->index);
mrb_free(mrb, t); mrb_free(mrb, t);
} }
static void mrb_hash_modify(mrb_state *mrb, mrb_value hash); static void mrb_hash_modify(mrb_state *mrb, mrb_value hash);
static inline mrb_value static inline mrb_value
mrb_hash_ht_key(mrb_state *mrb, mrb_value key) ht_key(mrb_state *mrb, mrb_value key)
{ {
if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) { if (mrb_string_p(key) && !MRB_FROZEN_P(mrb_str_ptr(key))) {
key = mrb_str_dup(mrb, key); key = mrb_str_dup(mrb, key);
...@@ -370,7 +560,7 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key) ...@@ -370,7 +560,7 @@ mrb_hash_ht_key(mrb_state *mrb, mrb_value key)
return key; return key;
} }
#define KEY(key) mrb_hash_ht_key(mrb, key) #define KEY(key) ht_key(mrb, key)
static int static int
hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p) hash_mark_i(mrb_state *mrb, mrb_value key, mrb_value val, void *p)
...@@ -489,15 +679,6 @@ mrb_hash_dup(mrb_state *mrb, mrb_value self) ...@@ -489,15 +679,6 @@ mrb_hash_dup(mrb_state *mrb, mrb_value self)
return mrb_obj_value(copy); return mrb_obj_value(copy);
} }
MRB_API mrb_bool
mrb_hash_has_key_p(mrb_state *mrb, mrb_value hash, mrb_value key)
{
if (sg_get(mrb, RHASH_TBL(hash), key, NULL)) {
return TRUE;
}
return FALSE;
}
MRB_API mrb_value MRB_API mrb_value
mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key) mrb_hash_get(mrb_state *mrb, mrb_value hash, mrb_value key)
{ {
...@@ -821,6 +1002,33 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self) ...@@ -821,6 +1002,33 @@ mrb_hash_delete(mrb_state *mrb, mrb_value self)
return mrb_hash_delete_key(mrb, self, key); return mrb_hash_delete_key(mrb, self, key);
} }
/* find first element in segment list, and remove it. */
static void
sg_shift(mrb_state *mrb, seglist *t, mrb_value *kp, mrb_value *vp)
{
segment *seg = t->rootseg;
mrb_int i;
while (seg) {
for (i=0; i<MRB_SG_SEGMENT_SIZE; i++) {
mrb_value key;
if (!seg->next && i >= t->last_len) {
return;
}
key = seg->e[i].key;
if (mrb_undef_p(key)) continue;
*kp = key;
*vp = seg->e[i].val;
/* delete element */
seg->e[i].key = mrb_undef_value();
t->size--;
return;
}
seg = seg->next;
}
}
/* 15.2.13.4.24 */ /* 15.2.13.4.24 */
/* /*
* call-seq: * call-seq:
...@@ -842,9 +1050,9 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash) ...@@ -842,9 +1050,9 @@ mrb_hash_shift(mrb_state *mrb, mrb_value hash)
mrb_hash_modify(mrb, hash); mrb_hash_modify(mrb, hash);
if (sg && sg_size(mrb, sg) > 0) { if (sg && sg_size(mrb, sg) > 0) {
mrb_value del_key = sg->rootseg->e[0].key; mrb_value del_key, del_val;
mrb_value del_val = sg->rootseg->e[0].val;
sg_del(mrb, sg, del_key, NULL); sg_shift(mrb, sg, &del_key, &del_val);
return mrb_assoc_new(mrb, del_key, del_val); return mrb_assoc_new(mrb, del_key, del_val);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment