Commit 75df13a9 authored by KOBAYASHI Shuji's avatar KOBAYASHI Shuji

Fix `String#byteslice` with `MRB_UTF8_STRING` and some edge cases

Example:

  $ bin/mruby -e '
    p "あa".byteslice(1)
    p "bar".byteslice(3)
    p "bar".byteslice(4..0)
  '

  Before this patch:

    "a"
    ""
    RangeError (4..0 out of range)

  After this patch (same as Ruby):

    "\x81"
    nil
    nil
parent cb3ee2d0
...@@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str); ...@@ -438,6 +438,9 @@ mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str);
#define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len) #define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len)
#define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2) #define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2)
mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp);
mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
#ifdef MRB_UTF8_STRING #ifdef MRB_UTF8_STRING
mrb_int mrb_utf8_len(const char *str, mrb_int byte_len); mrb_int mrb_utf8_len(const char *str, mrb_int byte_len);
#endif #endif
......
...@@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str) ...@@ -42,44 +42,32 @@ mrb_str_setbyte(mrb_state *mrb, mrb_value str)
static mrb_value static mrb_value
mrb_str_byteslice(mrb_state *mrb, mrb_value str) mrb_str_byteslice(mrb_state *mrb, mrb_value str)
{ {
mrb_value a1; mrb_value a1, a2;
mrb_int len; mrb_int str_len = RSTRING_LEN(str), beg, len;
mrb_bool empty = TRUE;
if (mrb_get_argc(mrb) == 2) {
mrb_int pos; if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) {
mrb_get_args(mrb, "ii", &pos, &len); beg = mrb_fixnum(mrb_to_int(mrb, a1));
return mrb_str_substr(mrb, str, pos, len); len = mrb_fixnum(mrb_to_int(mrb, a2));
goto subseq;
} }
mrb_get_args(mrb, "o|i", &a1, &len); if (mrb_type(a1) == MRB_TT_RANGE) {
switch (mrb_type(a1)) { if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) == MRB_RANGE_OK) {
case MRB_TT_RANGE: goto subseq;
{
mrb_int beg;
len = RSTRING_LEN(str);
switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {
case MRB_RANGE_TYPE_MISMATCH:
break;
case MRB_RANGE_OK:
return mrb_str_substr(mrb, str, beg, len);
case MRB_RANGE_OUT:
mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);
break;
}
return mrb_nil_value();
} }
#ifndef MRB_WITHOUT_FLOAT return mrb_nil_value();
case MRB_TT_FLOAT: }
a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
/* fall through */ beg = mrb_fixnum(mrb_to_int(mrb, a1));
#endif len = 1;
case MRB_TT_FIXNUM: empty = FALSE;
return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1); subseq:
default: if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) {
mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument"); return mrb_str_byte_subseq(mrb, str, beg, len);
}
else {
return mrb_nil_value();
} }
/* not reached */
return mrb_nil_value();
} }
/* /*
......
...@@ -26,10 +26,61 @@ end ...@@ -26,10 +26,61 @@ end
assert('String#byteslice') do assert('String#byteslice') do
str1 = "hello" str1 = "hello"
str2 = "\u3042ab" # "\xE3\x81\x82ab"
assert_equal("h", str1.byteslice(0))
assert_equal("e", str1.byteslice(1)) assert_equal("e", str1.byteslice(1))
assert_equal(nil, str1.byteslice(5))
assert_equal("o", str1.byteslice(-1)) assert_equal("o", str1.byteslice(-1))
assert_equal(nil, str1.byteslice(-6))
assert_equal("\xE3", str2.byteslice(0))
assert_equal("\x81", str2.byteslice(1))
assert_equal(nil, str2.byteslice(5))
assert_equal("b", str2.byteslice(-1))
assert_equal(nil, str2.byteslice(-6))
assert_equal("", str1.byteslice(0, 0))
assert_equal(str1, str1.byteslice(0, 6))
assert_equal("el", str1.byteslice(1, 2))
assert_equal("", str1.byteslice(5, 1))
assert_equal("o", str1.byteslice(-1, 6))
assert_equal(nil, str1.byteslice(-6, 1))
assert_equal(nil, str1.byteslice(0, -1))
assert_equal("", str2.byteslice(0, 0))
assert_equal(str2, str2.byteslice(0, 6))
assert_equal("\x81\x82", str2.byteslice(1, 2))
assert_equal("", str2.byteslice(5, 1))
assert_equal("b", str2.byteslice(-1, 6))
assert_equal(nil, str2.byteslice(-6, 1))
assert_equal(nil, str2.byteslice(0, -1))
assert_equal("ell", str1.byteslice(1..3)) assert_equal("ell", str1.byteslice(1..3))
assert_equal("el", str1.byteslice(1...3)) assert_equal("el", str1.byteslice(1...3))
assert_equal("h", str1.byteslice(0..0))
assert_equal("", str1.byteslice(5..0))
assert_equal("o", str1.byteslice(4..5))
assert_equal(nil, str1.byteslice(6..0))
assert_equal("", str1.byteslice(-1..0))
assert_equal("llo", str1.byteslice(-3..5))
assert_equal("\x81\x82a", str2.byteslice(1..3))
assert_equal("\x81\x82", str2.byteslice(1...3))
assert_equal("\xE3", str2.byteslice(0..0))
assert_equal("", str2.byteslice(5..0))
assert_equal("b", str2.byteslice(4..5))
assert_equal(nil, str2.byteslice(6..0))
assert_equal("", str2.byteslice(-1..0))
assert_equal("\x82ab", str2.byteslice(-3..5))
assert_raise(ArgumentError) { str1.byteslice }
assert_raise(ArgumentError) { str1.byteslice(1, 2, 3) }
assert_raise(TypeError) { str1.byteslice("1") }
assert_raise(TypeError) { str1.byteslice("1", 2) }
assert_raise(TypeError) { str1.byteslice(1, "2") }
assert_raise(TypeError) { str1.byteslice(1..2, 3) }
skip unless Object.const_defined?(:Float)
assert_equal("o", str1.byteslice(4.0))
assert_equal("\x82ab", str2.byteslice(2.0, 3.0))
end end
assert('String#dump') do assert('String#dump') do
......
...@@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s) ...@@ -410,8 +410,8 @@ str_make_shared(mrb_state *mrb, struct RString *orig, struct RString *s)
} }
} }
static mrb_value mrb_value
byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{ {
struct RString *orig, *s; struct RString *orig, *s;
...@@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) ...@@ -434,32 +434,33 @@ str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
beg = chars2bytes(str, 0, beg); beg = chars2bytes(str, 0, beg);
len = chars2bytes(str, beg, len); len = chars2bytes(str, beg, len);
return byte_subseq(mrb, str, beg, len); return mrb_str_byte_subseq(mrb, str, beg, len);
} }
#else #else
#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len) #define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len)
#endif #endif
static mrb_value mrb_bool
str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp)
{ {
mrb_int clen = RSTRING_CHAR_LEN(str); if (str_len < *begp || *lenp < 0) return FALSE;
if (*begp < 0) {
if (len < 0) return mrb_nil_value(); *begp += str_len;
if (clen == 0) { if (*begp < 0) return FALSE;
len = 0;
} }
if (beg > clen) return mrb_nil_value(); if (*lenp > str_len - *begp)
if (beg < 0) { *lenp = str_len - *begp;
beg += clen; if (*lenp <= 0) {
if (beg < 0) return mrb_nil_value(); *lenp = 0;
} }
if (len > clen - beg) return TRUE;
len = clen - beg; }
if (len <= 0) {
len = 0; static mrb_value
} str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
return str_subseq(mrb, str, beg, len); {
return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ?
str_subseq(mrb, str, beg, len) : mrb_nil_value();
} }
MRB_API mrb_int MRB_API mrb_int
...@@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) ...@@ -1917,7 +1918,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
} }
} }
else if (ISSPACE(c)) { else if (ISSPACE(c)) {
mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg)); mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg));
mrb_gc_arena_restore(mrb, ai); mrb_gc_arena_restore(mrb, ai);
skip = TRUE; skip = TRUE;
beg = idx; beg = idx;
...@@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) ...@@ -1942,7 +1943,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
else { else {
end = chars2bytes(str, idx, 1); end = chars2bytes(str, idx, 1);
} }
mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end));
mrb_gc_arena_restore(mrb, ai); mrb_gc_arena_restore(mrb, ai);
idx += end + pat_len; idx += end + pat_len;
if (lim_p && lim <= ++i) break; if (lim_p && lim <= ++i) break;
...@@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) ...@@ -1954,7 +1955,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
tmp = mrb_str_new_empty(mrb, str); tmp = mrb_str_new_empty(mrb, str);
} }
else { else {
tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
} }
mrb_ary_push(mrb, result, tmp); mrb_ary_push(mrb, result, tmp);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment