Commit 75df13a9 authored by KOBAYASHI Shuji's avatar KOBAYASHI Shuji

Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases

Example:

  $ bin/mruby -e '
    p "あa".byteslice(1)
    p "bar".byteslice(3)
    p "bar".byteslice(4..0)
  '

  Before this patch:

    "a"
    ""
    RangeError (4..0 out of range)

  After this patch (same as Ruby):

    "\x81"
    nil
    nil
parent cb3ee2d0
......@@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str);
#define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len)
#define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2)
mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp);
mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
#ifdef MRB_UTF8_STRING
mrb_int mrb_utf8_len(const char *str, mrb_int byte_len);
#endif
......
......@@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str)
static mrb_value
mrb_str_byteslice(mrb_state *mrb, mrb_value str)
{
mrb_value a1;
mrb_int len;
if (mrb_get_argc(mrb) == 2) {
mrb_int pos;
mrb_get_args(mrb, "ii", &pos, &len);
return mrb_str_substr(mrb, str, pos, len);
mrb_value a1, a2;
mrb_int str_len = RSTRING_LEN(str), beg, len;
mrb_bool empty = TRUE;
if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) {
beg = mrb_fixnum(mrb_to_int(mrb, a1));
len = mrb_fixnum(mrb_to_int(mrb, a2));
goto subseq;
}
mrb_get_args(mrb, "o|i", &a1, &len);
switch (mrb_type(a1)) {
case MRB_TT_RANGE:
{
mrb_int beg;
len = RSTRING_LEN(str);
switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {
case MRB_RANGE_TYPE_MISMATCH:
break;
case MRB_RANGE_OK:
return mrb_str_substr(mrb, str, beg, len);
case MRB_RANGE_OUT:
mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);
break;
}
return mrb_nil_value();
if (mrb_type(a1) == MRB_TT_RANGE) {
if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) {
goto subseq;
}
#ifndef MRB_WITHOUT_FLOAT
case MRB_TT_FLOAT:
a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
/* fall through */
#endif
case MRB_TT_FIXNUM:
return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);
default:
mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");
return mrb_nil_value();
}
beg = mrb_fixnum(mrb_to_int(mrb, a1));
len = 1;
empty = FALSE;
subseq:
if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) {
return mrb_str_byte_subseq(mrb, str, beg, len);
}
else {
return mrb_nil_value();
}
/* not reached */
return mrb_nil_value();
}
/*
......
......@@ -26,10 +26,61 @@ end
assert('String#byteslice') do
str1 = "hello"
str2 = "\u3042ab" # "\xE3\x81\x82ab"
assert_equal("h", str1.byteslice(0))
assert_equal("e", str1.byteslice(1))
assert_equal(nil, str1.byteslice(5))
assert_equal("o", str1.byteslice(-1))
assert_equal(nil, str1.byteslice(-6))
assert_equal("\xE3", str2.byteslice(0))
assert_equal("\x81", str2.byteslice(1))
assert_equal(nil, str2.byteslice(5))
assert_equal("b", str2.byteslice(-1))
assert_equal(nil, str2.byteslice(-6))
assert_equal("", str1.byteslice(0, 0))
assert_equal(str1, str1.byteslice(0, 6))
assert_equal("el", str1.byteslice(1, 2))
assert_equal("", str1.byteslice(5, 1))
assert_equal("o", str1.byteslice(-1, 6))
assert_equal(nil, str1.byteslice(-6, 1))
assert_equal(nil, str1.byteslice(0, -1))
assert_equal("", str2.byteslice(0, 0))
assert_equal(str2, str2.byteslice(0, 6))
assert_equal("\x81\x82", str2.byteslice(1, 2))
assert_equal("", str2.byteslice(5, 1))
assert_equal("b", str2.byteslice(-1, 6))
assert_equal(nil, str2.byteslice(-6, 1))
assert_equal(nil, str2.byteslice(0, -1))
assert_equal("ell", str1.byteslice(1..3))
assert_equal("el", str1.byteslice(1...3))
assert_equal("h", str1.byteslice(0..0))
assert_equal("", str1.byteslice(5..0))
assert_equal("o", str1.byteslice(4..5))
assert_equal(nil, str1.byteslice(6..0))
assert_equal("", str1.byteslice(-1..0))
assert_equal("llo", str1.byteslice(-3..5))
assert_equal("\x81\x82a", str2.byteslice(1..3))
assert_equal("\x81\x82", str2.byteslice(1...3))
assert_equal("\xE3", str2.byteslice(0..0))
assert_equal("", str2.byteslice(5..0))
assert_equal("b", str2.byteslice(4..5))
assert_equal(nil, str2.byteslice(6..0))
assert_equal("", str2.byteslice(-1..0))
assert_equal("\x82ab", str2.byteslice(-3..5))
assert_raise(ArgumentError) { str1.byteslice }
assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) }
assert_raise(TypeError) { str1.byteslice("1") }
assert_raise(TypeError) { str1.byteslice("1", 2) }
assert_raise(TypeError) { str1.byteslice(1, "2") }
assert_raise(TypeError) { str1.byteslice(1..2, 3) }
skip unless Object.const_defined?(:Float)
assert_equal("o", str1.byteslice(4.0))
assert_equal("\x82ab", str2.byteslice(2.0, 3.0))
end
assert('String#dump') do
......
......@@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s)
}
}
static mrb_value
byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
mrb_value
mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
struct RString *orig, *s;
......@@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
beg = chars2bytes(str, 0, beg);
len = chars2bytes(str, beg, len);
return byte_subseq(mrb, str, beg, len);
return mrb_str_byte_subseq(mrb, str, beg, len);
}
#else
#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
#define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
#endif
static mrb_value
str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
mrb_bool
mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
{
mrb_int clen = RSTRING_CHAR_LEN(str);
if (len < 0) return mrb_nil_value();
if (clen == 0) {
len = 0;
if (str_len < *begp || *lenp < 0) return FALSE;
if (*begp < 0) {
*begp += str_len;
if (*begp < 0) return FALSE;
}
if (beg > clen) return mrb_nil_value();
if (beg < 0) {
beg += clen;
if (beg < 0) return mrb_nil_value();
if (*lenp > str_len - *begp)
*lenp = str_len - *begp;
if (*lenp <= 0) {
*lenp = 0;
}
if (len > clen - beg)
len = clen - beg;
if (len <= 0) {
len = 0;
}
return str_subseq(mrb, str, beg, len);
return TRUE;
}
static mrb_value
str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{
return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
str_subseq(mrb, str, beg, len) : mrb_nil_value();
}
MRB_API mrb_int
......@@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
else if (ISSPACE(c)) {
mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai);
skip = TRUE;
beg = idx;
......@@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else {
end = chars2bytes(str, idx, 1);
}
mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
mrb_gc_arena_restore(mrb, ai);
idx += end + pat_len;
if (lim_p && lim <= ++i) break;
......@@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
tmp = mrb_str_new_empty(mrb, str);
}
else {
tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
}
mrb_ary_push(mrb, result, tmp);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment