Add new type of shared string: `RSTR_FSHARED`.

`RSTR_FSHARED` use frozen strings as shared body instead of
`struct mrb_shared_string`. This reduces allocation from
literal strings.
parent 473b7d0e
...@@ -52,7 +52,7 @@ mrb_class(mrb_state *mrb, mrb_value v) ...@@ -52,7 +52,7 @@ mrb_class(mrb_state *mrb, mrb_value v)
} }
/* TODO: figure out where to put user flags */ /* TODO: figure out where to put user flags */
#define MRB_FLAG_IS_FROZEN (1 << 18) /* flags bits >= 18 is reserved */
#define MRB_FLAG_IS_PREPENDED (1 << 19) #define MRB_FLAG_IS_PREPENDED (1 << 19)
#define MRB_FLAG_IS_ORIGIN (1 << 20) #define MRB_FLAG_IS_ORIGIN (1 << 20)
#define MRB_CLASS_ORIGIN(c) do {\ #define MRB_CLASS_ORIGIN(c) do {\
......
...@@ -22,6 +22,8 @@ struct RBasic { ...@@ -22,6 +22,8 @@ struct RBasic {
}; };
#define mrb_basic_ptr(v) ((struct RBasic*)(mrb_ptr(v))) #define mrb_basic_ptr(v) ((struct RBasic*)(mrb_ptr(v)))
/* flags bits >= 18 is reserved */
#define MRB_FLAG_IS_FROZEN (1 << 18)
#define MRB_FROZEN_P(o) ((o)->flags & MRB_FLAG_IS_FROZEN) #define MRB_FROZEN_P(o) ((o)->flags & MRB_FLAG_IS_FROZEN)
#define MRB_SET_FROZEN_FLAG(o) ((o)->flags |= MRB_FLAG_IS_FROZEN) #define MRB_SET_FROZEN_FLAG(o) ((o)->flags |= MRB_FLAG_IS_FROZEN)
#define MRB_UNSET_FROZEN_FLAG(o) ((o)->flags &= ~MRB_FLAG_IS_FROZEN) #define MRB_UNSET_FROZEN_FLAG(o) ((o)->flags &= ~MRB_FLAG_IS_FROZEN)
......
...@@ -26,6 +26,7 @@ struct RString { ...@@ -26,6 +26,7 @@ struct RString {
union { union {
mrb_int capa; mrb_int capa;
struct mrb_shared_string *shared; struct mrb_shared_string *shared;
struct RString *fshared;
} aux; } aux;
char *ptr; char *ptr;
} heap; } heap;
...@@ -59,6 +60,10 @@ struct RString { ...@@ -59,6 +60,10 @@ struct RString {
#define RSTR_SET_SHARED_FLAG(s) ((s)->flags |= MRB_STR_SHARED) #define RSTR_SET_SHARED_FLAG(s) ((s)->flags |= MRB_STR_SHARED)
#define RSTR_UNSET_SHARED_FLAG(s) ((s)->flags &= ~MRB_STR_SHARED) #define RSTR_UNSET_SHARED_FLAG(s) ((s)->flags &= ~MRB_STR_SHARED)
#define RSTR_FSHARED_P(s) ((s)->flags & MRB_STR_FSHARED)
#define RSTR_SET_FSHARED_FLAG(s) ((s)->flags |= MRB_STR_FSHARED)
#define RSTR_UNSET_FSHARED_FLAG(s) ((s)->flags &= ~MRB_STR_FSHARED)
#define RSTR_NOFREE_P(s) ((s)->flags & MRB_STR_NOFREE) #define RSTR_NOFREE_P(s) ((s)->flags & MRB_STR_NOFREE)
#define RSTR_SET_NOFREE_FLAG(s) ((s)->flags |= MRB_STR_NOFREE) #define RSTR_SET_NOFREE_FLAG(s) ((s)->flags |= MRB_STR_NOFREE)
#define RSTR_UNSET_NOFREE_FLAG(s) ((s)->flags &= ~MRB_STR_NOFREE) #define RSTR_UNSET_NOFREE_FLAG(s) ((s)->flags &= ~MRB_STR_NOFREE)
...@@ -76,7 +81,8 @@ struct RString { ...@@ -76,7 +81,8 @@ struct RString {
MRB_API mrb_int mrb_str_strlen(mrb_state*, struct RString*); MRB_API mrb_int mrb_str_strlen(mrb_state*, struct RString*);
#define MRB_STR_SHARED 1 #define MRB_STR_SHARED 1
#define MRB_STR_NOFREE 2 #define MRB_STR_FSHARED 2
#define MRB_STR_NOFREE 4
#define MRB_STR_NO_UTF 8 #define MRB_STR_NO_UTF 8
#define MRB_STR_EMBED 16 #define MRB_STR_EMBED 16
#define MRB_STR_EMBED_LEN_MASK 0x3e0 #define MRB_STR_EMBED_LEN_MASK 0x3e0
......
...@@ -698,6 +698,10 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj) ...@@ -698,6 +698,10 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj)
break; break;
case MRB_TT_STRING: case MRB_TT_STRING:
if (RSTR_FSHARED_P(obj) && !RSTR_NOFREE_P(obj)) {
struct RString *s = (struct RString*)obj;
mrb_gc_mark(mrb, (struct RBasic*)s->as.heap.aux.fshared);
}
break; break;
case MRB_TT_RANGE: case MRB_TT_RANGE:
......
...@@ -237,6 +237,7 @@ mrb_str_pool(mrb_state *mrb, mrb_value str) ...@@ -237,6 +237,7 @@ mrb_str_pool(mrb_state *mrb, mrb_value str)
ns->as.heap.ptr[len] = '\0'; ns->as.heap.ptr[len] = '\0';
} }
} }
MRB_SET_FROZEN_FLAG(ns);
return mrb_obj_value(ns); return mrb_obj_value(ns);
} }
......
...@@ -205,7 +205,7 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str) ...@@ -205,7 +205,7 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str)
/* no code */; /* no code */;
else if (RSTR_SHARED_P(str)) else if (RSTR_SHARED_P(str))
str_decref(mrb, str->as.heap.aux.shared); str_decref(mrb, str->as.heap.aux.shared);
else if (!RSTR_NOFREE_P(str)) else if (!RSTR_NOFREE_P(str) && !RSTR_FSHARED_P(str))
mrb_free(mrb, str->as.heap.ptr); mrb_free(mrb, str->as.heap.ptr);
} }
...@@ -341,57 +341,62 @@ mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n) ...@@ -341,57 +341,62 @@ mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n); return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
} }
static void static mrb_bool
str_make_shared(mrb_state *mrb, struct RString *s) str_make_shared(mrb_state *mrb, struct RString *s)
{ {
if (!RSTR_SHARED_P(s)) { if (!RSTR_SHARED_P(s)) {
mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); if (MRB_FROZEN_P(s) || RSTR_FSHARED_P(s)) {
return FALSE;
shared->refcnt = 1;
if (RSTR_EMBED_P(s)) {
const mrb_int len = RSTR_EMBED_LEN(s);
char *const tmp = (char *)mrb_malloc(mrb, len+1);
memcpy(tmp, s->as.ary, len);
tmp[len] = '\0';
RSTR_UNSET_EMBED_FLAG(s);
s->as.heap.ptr = tmp;
s->as.heap.len = len;
shared->nofree = FALSE;
shared->ptr = s->as.heap.ptr;
}
else if (RSTR_NOFREE_P(s)) {
shared->nofree = TRUE;
shared->ptr = s->as.heap.ptr;
RSTR_UNSET_NOFREE_FLAG(s);
} }
else { else {
shared->nofree = FALSE; mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
if (s->as.heap.aux.capa > s->as.heap.len) {
s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1); shared->refcnt = 1;
if (RSTR_EMBED_P(s)) {
const mrb_int len = RSTR_EMBED_LEN(s);
char *const tmp = (char *)mrb_malloc(mrb, len+1);
memcpy(tmp, s->as.ary, len);
tmp[len] = '\0';
RSTR_UNSET_EMBED_FLAG(s);
s->as.heap.ptr = tmp;
s->as.heap.len = len;
shared->nofree = FALSE;
shared->ptr = s->as.heap.ptr;
} }
else { else if (RSTR_NOFREE_P(s)) {
shared->nofree = TRUE;
shared->ptr = s->as.heap.ptr; shared->ptr = s->as.heap.ptr;
RSTR_UNSET_NOFREE_FLAG(s);
} }
else {
shared->nofree = FALSE;
if (s->as.heap.aux.capa > s->as.heap.len) {
s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
}
else {
shared->ptr = s->as.heap.ptr;
}
}
shared->len = s->as.heap.len;
s->as.heap.aux.shared = shared;
RSTR_SET_SHARED_FLAG(s);
} }
shared->len = s->as.heap.len;
s->as.heap.aux.shared = shared;
RSTR_SET_SHARED_FLAG(s);
} }
return TRUE;
} }
static mrb_value static mrb_value
byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{ {
struct RString *orig, *s; struct RString *orig, *s;
mrb_shared_string *shared;
orig = mrb_str_ptr(str); orig = mrb_str_ptr(str);
if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) { if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) {
s = str_new(mrb, orig->as.ary+beg, len); s = str_new(mrb, orig->as.ary+beg, len);
} }
else { else if (str_make_shared(mrb, orig)) {
str_make_shared(mrb, orig); mrb_shared_string *shared = orig->as.heap.aux.shared;
shared = orig->as.heap.aux.shared;
s = mrb_obj_alloc_string(mrb); s = mrb_obj_alloc_string(mrb);
s->as.heap.ptr = orig->as.heap.ptr + beg; s->as.heap.ptr = orig->as.heap.ptr + beg;
s->as.heap.len = len; s->as.heap.len = len;
...@@ -399,6 +404,18 @@ byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) ...@@ -399,6 +404,18 @@ byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
RSTR_SET_SHARED_FLAG(s); RSTR_SET_SHARED_FLAG(s);
shared->refcnt++; shared->refcnt++;
} }
else {
s = mrb_obj_alloc_string(mrb);
s->as.heap.ptr = orig->as.heap.ptr + beg;
s->as.heap.len = len;
if (MRB_FROZEN_P(orig)) {
s->as.heap.aux.fshared = orig;
}
else {
s->as.heap.aux.fshared = orig->as.heap.aux.fshared;
}
RSTR_SET_FSHARED_FLAG(s);
}
return mrb_obj_value(s); return mrb_obj_value(s);
} }
...@@ -497,25 +514,39 @@ str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) ...@@ -497,25 +514,39 @@ str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
len = RSTR_LEN(s2); len = RSTR_LEN(s2);
if (RSTR_SHARED_P(s1)) { if (RSTR_SHARED_P(s1)) {
str_decref(mrb, s1->as.heap.aux.shared); str_decref(mrb, s1->as.heap.aux.shared);
RSTR_UNSET_SHARED_FLAG(s1);
} }
else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) { else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1)
&& s1->as.heap.ptr) {
mrb_free(mrb, s1->as.heap.ptr); mrb_free(mrb, s1->as.heap.ptr);
} }
RSTR_UNSET_FSHARED_FLAG(s1);
RSTR_UNSET_NOFREE_FLAG(s1); RSTR_UNSET_NOFREE_FLAG(s1);
RSTR_UNSET_EMBED_FLAG(s1);
if (RSTR_SHARED_P(s2)) { if (MRB_FROZEN_P(s2)) {
RSTR_SET_FSHARED_FLAG(s1);
s1->as.heap.ptr = RSTR_PTR(s2);
s1->as.heap.len = len;
s1->as.heap.aux.fshared = s2;
}
else if (RSTR_FSHARED_P(s2)) {
RSTR_SET_FSHARED_FLAG(s1);
s1->as.heap.ptr = s2->as.heap.ptr;
s1->as.heap.len = len;
s1->as.heap.aux.fshared = s2->as.heap.aux.fshared;
}
else if (RSTR_SHARED_P(s2)) {
L_SHARE: L_SHARE:
RSTR_UNSET_EMBED_FLAG(s1); RSTR_SET_SHARED_FLAG(s1);
s1->as.heap.ptr = s2->as.heap.ptr; s1->as.heap.ptr = s2->as.heap.ptr;
s1->as.heap.len = len; s1->as.heap.len = len;
s1->as.heap.aux.shared = s2->as.heap.aux.shared; s1->as.heap.aux.shared = s2->as.heap.aux.shared;
RSTR_SET_SHARED_FLAG(s1);
s1->as.heap.aux.shared->refcnt++; s1->as.heap.aux.shared->refcnt++;
} }
else { else {
if (len <= RSTRING_EMBED_LEN_MAX) { if (len <= RSTRING_EMBED_LEN_MAX) {
RSTR_UNSET_SHARED_FLAG(s1);
RSTR_SET_EMBED_FLAG(s1); RSTR_SET_EMBED_FLAG(s1);
memcpy(s1->as.ary, RSTR_PTR(s2), len); memcpy(s1->as.ary, RSTR_PTR(s2), len);
RSTR_SET_EMBED_LEN(s1, len); RSTR_SET_EMBED_LEN(s1, len);
...@@ -675,11 +706,12 @@ mrb_str_modify(mrb_state *mrb, struct RString *s) ...@@ -675,11 +706,12 @@ mrb_str_modify(mrb_state *mrb, struct RString *s)
RSTR_UNSET_SHARED_FLAG(s); RSTR_UNSET_SHARED_FLAG(s);
return; return;
} }
if (RSTR_NOFREE_P(s)) { if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
char *p = s->as.heap.ptr; char *p = s->as.heap.ptr;
mrb_int len = s->as.heap.len; mrb_int len = s->as.heap.len;
RSTR_UNSET_NOFREE_FLAG(s); RSTR_UNSET_NOFREE_FLAG(s);
RSTR_UNSET_FSHARED_FLAG(s);
if (len < RSTRING_EMBED_LEN_MAX) { if (len < RSTRING_EMBED_LEN_MAX) {
RSTR_SET_EMBED_FLAG(s); RSTR_SET_EMBED_FLAG(s);
RSTR_SET_EMBED_LEN(s, len); RSTR_SET_EMBED_LEN(s, len);
...@@ -1219,8 +1251,8 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str) ...@@ -1219,8 +1251,8 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
mrb_int argc; mrb_int argc;
struct RString *s = mrb_str_ptr(str); struct RString *s = mrb_str_ptr(str);
mrb_str_modify(mrb, s);
argc = mrb_get_args(mrb, "|S", &rs); argc = mrb_get_args(mrb, "|S", &rs);
mrb_str_modify(mrb, s);
len = RSTR_LEN(s); len = RSTR_LEN(s);
if (argc == 0) { if (argc == 0) {
if (len == 0) return mrb_nil_value(); if (len == 0) return mrb_nil_value();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment