Commit db0a4d90 authored by KOBAYASHI Shuji's avatar KOBAYASHI Shuji

Optimize `chars2bytes` with `MRB_UTF8_STRING` to ASCII only string

### Benchmark (with `MRB_UTF8_STRING`)

  ```
  $ mruby -e '
    COUNT = 150000
    SIZE = 10000
    strs = Array.new(COUNT) do
      s = "a" * SIZE
      s.size  # set `MRB_STR_ASCII` flag
      s
    end
    i = 0
    t = Time.now
    while i < COUNT
      strs[i][-2..-1] = ""
      i += 1
    end
    printf "%.2f sec\n", Time.now - t
  '

  1.10 sec  # before
  0.07 sec  # after
  ```
parent 9c994516
......@@ -330,16 +330,21 @@ utf8_strlen(mrb_value str)
static mrb_int
chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
{
mrb_int i, b, n;
const char *p = RSTRING_PTR(s) + off;
const char *e = RSTRING_END(s);
if (RSTR_ASCII_P(mrb_str_ptr(s))) {
return idx;
}
else {
mrb_int i, b, n;
const char *p = RSTRING_PTR(s) + off;
const char *e = RSTRING_END(s);
for (b=i=0; p<e && i<idx; i++) {
n = utf8len(p, e);
b += n;
p += n;
for (b=i=0; p<e && i<idx; i++) {
n = utf8len(p, e);
b += n;
p += n;
}
return b;
}
return b;
}
/* map byte offset to character index */
......@@ -603,7 +608,7 @@ str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len)
static inline mrb_value
str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
if (!RSTR_ASCII_P(mrb_str_ptr(str))) str_range_to_bytes(str, &beg, &len);
str_range_to_bytes(str, &beg, &len);
return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment