Add new type of shared string: `RSTR_FSHARED`.

`RSTR_FSHARED` use frozen strings as shared body instead of
`struct mrb_shared_string`. This reduces allocation from
literal strings.
parent 473b7d0e
......@@ -52,7 +52,7 @@ mrb_class(mrb_state *mrb, mrb_value v)
}
/* TODO: figure out where to put user flags */
#define MRB_FLAG_IS_FROZEN (1 << 18)
/* flags bits >= 18 is reserved */
#define MRB_FLAG_IS_PREPENDED (1 << 19)
#define MRB_FLAG_IS_ORIGIN (1 << 20)
#define MRB_CLASS_ORIGIN(c) do {\
......
......@@ -22,6 +22,8 @@ struct RBasic {
};
#define mrb_basic_ptr(v) ((struct RBasic*)(mrb_ptr(v)))
/* flags bits >= 18 is reserved */
#define MRB_FLAG_IS_FROZEN (1 << 18)
#define MRB_FROZEN_P(o) ((o)->flags & MRB_FLAG_IS_FROZEN)
#define MRB_SET_FROZEN_FLAG(o) ((o)->flags |= MRB_FLAG_IS_FROZEN)
#define MRB_UNSET_FROZEN_FLAG(o) ((o)->flags &= ~MRB_FLAG_IS_FROZEN)
......
......@@ -26,6 +26,7 @@ struct RString {
union {
mrb_int capa;
struct mrb_shared_string *shared;
struct RString *fshared;
} aux;
char *ptr;
} heap;
......@@ -59,6 +60,10 @@ struct RString {
#define RSTR_SET_SHARED_FLAG(s) ((s)->flags |= MRB_STR_SHARED)
#define RSTR_UNSET_SHARED_FLAG(s) ((s)->flags &= ~MRB_STR_SHARED)
#define RSTR_FSHARED_P(s) ((s)->flags & MRB_STR_FSHARED)
#define RSTR_SET_FSHARED_FLAG(s) ((s)->flags |= MRB_STR_FSHARED)
#define RSTR_UNSET_FSHARED_FLAG(s) ((s)->flags &= ~MRB_STR_FSHARED)
#define RSTR_NOFREE_P(s) ((s)->flags & MRB_STR_NOFREE)
#define RSTR_SET_NOFREE_FLAG(s) ((s)->flags |= MRB_STR_NOFREE)
#define RSTR_UNSET_NOFREE_FLAG(s) ((s)->flags &= ~MRB_STR_NOFREE)
......@@ -76,7 +81,8 @@ struct RString {
MRB_API mrb_int mrb_str_strlen(mrb_state*, struct RString*);
#define MRB_STR_SHARED 1
#define MRB_STR_NOFREE 2
#define MRB_STR_FSHARED 2
#define MRB_STR_NOFREE 4
#define MRB_STR_NO_UTF 8
#define MRB_STR_EMBED 16
#define MRB_STR_EMBED_LEN_MASK 0x3e0
......
......@@ -698,6 +698,10 @@ gc_mark_children(mrb_state *mrb, mrb_gc *gc, struct RBasic *obj)
break;
case MRB_TT_STRING:
if (RSTR_FSHARED_P(obj) && !RSTR_NOFREE_P(obj)) {
struct RString *s = (struct RString*)obj;
mrb_gc_mark(mrb, (struct RBasic*)s->as.heap.aux.fshared);
}
break;
case MRB_TT_RANGE:
......
......@@ -237,6 +237,7 @@ mrb_str_pool(mrb_state *mrb, mrb_value str)
ns->as.heap.ptr[len] = '\0';
}
}
MRB_SET_FROZEN_FLAG(ns);
return mrb_obj_value(ns);
}
......
......@@ -205,7 +205,7 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str)
/* no code */;
else if (RSTR_SHARED_P(str))
str_decref(mrb, str->as.heap.aux.shared);
else if (!RSTR_NOFREE_P(str))
else if (!RSTR_NOFREE_P(str) && !RSTR_FSHARED_P(str))
mrb_free(mrb, str->as.heap.ptr);
}
......@@ -341,57 +341,62 @@ mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
}
static void
static mrb_bool
str_make_shared(mrb_state *mrb, struct RString *s)
{
if (!RSTR_SHARED_P(s)) {
mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
shared->refcnt = 1;
if (RSTR_EMBED_P(s)) {
const mrb_int len = RSTR_EMBED_LEN(s);
char *const tmp = (char *)mrb_malloc(mrb, len+1);
memcpy(tmp, s->as.ary, len);
tmp[len] = '\0';
RSTR_UNSET_EMBED_FLAG(s);
s->as.heap.ptr = tmp;
s->as.heap.len = len;
shared->nofree = FALSE;
shared->ptr = s->as.heap.ptr;
}
else if (RSTR_NOFREE_P(s)) {
shared->nofree = TRUE;
shared->ptr = s->as.heap.ptr;
RSTR_UNSET_NOFREE_FLAG(s);
if (MRB_FROZEN_P(s) || RSTR_FSHARED_P(s)) {
return FALSE;
}
else {
shared->nofree = FALSE;
if (s->as.heap.aux.capa > s->as.heap.len) {
s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
shared->refcnt = 1;
if (RSTR_EMBED_P(s)) {
const mrb_int len = RSTR_EMBED_LEN(s);
char *const tmp = (char *)mrb_malloc(mrb, len+1);
memcpy(tmp, s->as.ary, len);
tmp[len] = '\0';
RSTR_UNSET_EMBED_FLAG(s);
s->as.heap.ptr = tmp;
s->as.heap.len = len;
shared->nofree = FALSE;
shared->ptr = s->as.heap.ptr;
}
else {
else if (RSTR_NOFREE_P(s)) {
shared->nofree = TRUE;
shared->ptr = s->as.heap.ptr;
RSTR_UNSET_NOFREE_FLAG(s);
}
else {
shared->nofree = FALSE;
if (s->as.heap.aux.capa > s->as.heap.len) {
s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
}
else {
shared->ptr = s->as.heap.ptr;
}
}
shared->len = s->as.heap.len;
s->as.heap.aux.shared = shared;
RSTR_SET_SHARED_FLAG(s);
}
shared->len = s->as.heap.len;
s->as.heap.aux.shared = shared;
RSTR_SET_SHARED_FLAG(s);
}
return TRUE;
}
static mrb_value
byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
struct RString *orig, *s;
mrb_shared_string *shared;
orig = mrb_str_ptr(str);
if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) {
s = str_new(mrb, orig->as.ary+beg, len);
}
else {
str_make_shared(mrb, orig);
shared = orig->as.heap.aux.shared;
else if (str_make_shared(mrb, orig)) {
mrb_shared_string *shared = orig->as.heap.aux.shared;
s = mrb_obj_alloc_string(mrb);
s->as.heap.ptr = orig->as.heap.ptr + beg;
s->as.heap.len = len;
......@@ -399,6 +404,18 @@ byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
RSTR_SET_SHARED_FLAG(s);
shared->refcnt++;
}
else {
s = mrb_obj_alloc_string(mrb);
s->as.heap.ptr = orig->as.heap.ptr + beg;
s->as.heap.len = len;
if (MRB_FROZEN_P(orig)) {
s->as.heap.aux.fshared = orig;
}
else {
s->as.heap.aux.fshared = orig->as.heap.aux.fshared;
}
RSTR_SET_FSHARED_FLAG(s);
}
return mrb_obj_value(s);
}
......@@ -497,25 +514,39 @@ str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
len = RSTR_LEN(s2);
if (RSTR_SHARED_P(s1)) {
str_decref(mrb, s1->as.heap.aux.shared);
RSTR_UNSET_SHARED_FLAG(s1);
}
else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1)
&& s1->as.heap.ptr) {
mrb_free(mrb, s1->as.heap.ptr);
}
RSTR_UNSET_FSHARED_FLAG(s1);
RSTR_UNSET_NOFREE_FLAG(s1);
RSTR_UNSET_EMBED_FLAG(s1);
if (RSTR_SHARED_P(s2)) {
if (MRB_FROZEN_P(s2)) {
RSTR_SET_FSHARED_FLAG(s1);
s1->as.heap.ptr = RSTR_PTR(s2);
s1->as.heap.len = len;
s1->as.heap.aux.fshared = s2;
}
else if (RSTR_FSHARED_P(s2)) {
RSTR_SET_FSHARED_FLAG(s1);
s1->as.heap.ptr = s2->as.heap.ptr;
s1->as.heap.len = len;
s1->as.heap.aux.fshared = s2->as.heap.aux.fshared;
}
else if (RSTR_SHARED_P(s2)) {
L_SHARE:
RSTR_UNSET_EMBED_FLAG(s1);
RSTR_SET_SHARED_FLAG(s1);
s1->as.heap.ptr = s2->as.heap.ptr;
s1->as.heap.len = len;
s1->as.heap.aux.shared = s2->as.heap.aux.shared;
RSTR_SET_SHARED_FLAG(s1);
s1->as.heap.aux.shared->refcnt++;
}
else {
if (len <= RSTRING_EMBED_LEN_MAX) {
RSTR_UNSET_SHARED_FLAG(s1);
RSTR_SET_EMBED_FLAG(s1);
memcpy(s1->as.ary, RSTR_PTR(s2), len);
RSTR_SET_EMBED_LEN(s1, len);
......@@ -675,11 +706,12 @@ mrb_str_modify(mrb_state *mrb, struct RString *s)
RSTR_UNSET_SHARED_FLAG(s);
return;
}
if (RSTR_NOFREE_P(s)) {
if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) {
char *p = s->as.heap.ptr;
mrb_int len = s->as.heap.len;
RSTR_UNSET_NOFREE_FLAG(s);
RSTR_UNSET_FSHARED_FLAG(s);
if (len < RSTRING_EMBED_LEN_MAX) {
RSTR_SET_EMBED_FLAG(s);
RSTR_SET_EMBED_LEN(s, len);
......@@ -1219,8 +1251,8 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
mrb_int argc;
struct RString *s = mrb_str_ptr(str);
mrb_str_modify(mrb, s);
argc = mrb_get_args(mrb, "|S", &rs);
mrb_str_modify(mrb, s);
len = RSTR_LEN(s);
if (argc == 0) {
if (len == 0) return mrb_nil_value();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment