add boundary check to utf8len() to avoid oob memory access

parent 798ec3af
...@@ -250,14 +250,13 @@ static const char utf8len_codepage[256] = ...@@ -250,14 +250,13 @@ static const char utf8len_codepage[256] =
}; };
static mrb_int static mrb_int
utf8len(unsigned char* p) utf8len(const char* p, const char* e)
{ {
mrb_int len; mrb_int len;
mrb_int i; mrb_int i;
if (*p == 0) len = utf8len_codepage[(unsigned char)*p];
return 1; if (p + len > e) return 1;
len = utf8len_codepage[*p];
for (i = 1; i < len; ++i) for (i = 1; i < len; ++i)
if ((p[i] & 0xc0) != 0x80) if ((p[i] & 0xc0) != 0x80)
return 1; return 1;
...@@ -268,11 +267,11 @@ static mrb_int ...@@ -268,11 +267,11 @@ static mrb_int
utf8_strlen(mrb_value str, mrb_int len) utf8_strlen(mrb_value str, mrb_int len)
{ {
mrb_int total = 0; mrb_int total = 0;
unsigned char* p = (unsigned char*) RSTRING_PTR(str); char* p = RSTRING_PTR(str);
unsigned char* e = p; char* e = p;
e += len < 0 ? RSTRING_LEN(str) : len; e += len < 0 ? RSTRING_LEN(str) : len;
while (p<e) { while (p<e) {
p += utf8len(p); p += utf8len(p, e);
total++; total++;
} }
return total; return total;
...@@ -282,12 +281,14 @@ utf8_strlen(mrb_value str, mrb_int len) ...@@ -282,12 +281,14 @@ utf8_strlen(mrb_value str, mrb_int len)
/* map character index to byte offset index */ /* map character index to byte offset index */
static mrb_int static mrb_int
chars2bytes(char *p, mrb_int idx) chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
{ {
mrb_int i, b, n; mrb_int i, b, n;
const char *p = RSTRING_PTR(s) + off;
const char *e = RSTRING_END(s);
for (b=i=0; i<idx; i++) { for (b=i=0; p<e && i<idx; i++) {
n = utf8len((unsigned char*)p); n = utf8len(p, e);
b += n; b += n;
p += n; p += n;
} }
...@@ -301,7 +302,7 @@ bytes2chars(char *p, mrb_int bi) ...@@ -301,7 +302,7 @@ bytes2chars(char *p, mrb_int bi)
mrb_int i, b, n; mrb_int i, b, n;
for (b=i=0; b<bi; i++) { for (b=i=0; b<bi; i++) {
n = utf8len((unsigned char*)p); n = utf8len(p, p+bi);
b += n; b += n;
p += n; p += n;
} }
...@@ -310,7 +311,7 @@ bytes2chars(char *p, mrb_int bi) ...@@ -310,7 +311,7 @@ bytes2chars(char *p, mrb_int bi)
#else #else
#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s) #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
#define chars2bytes(p, ci) (ci) #define chars2bytes(p, off, ci) (ci)
#define bytes2chars(p, bi) (bi) #define bytes2chars(p, bi) (bi)
#endif #endif
...@@ -422,8 +423,8 @@ byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) ...@@ -422,8 +423,8 @@ byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
static inline mrb_value static inline mrb_value
str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
{ {
beg = chars2bytes(RSTRING_PTR(str), beg); beg = chars2bytes(str, 0, beg);
len = chars2bytes(RSTRING_PTR(str)+beg, len); len = chars2bytes(str, beg, len);
return byte_subseq(mrb, str, beg, len); return byte_subseq(mrb, str, beg, len);
} }
...@@ -1565,7 +1566,7 @@ mrb_str_index(mrb_state *mrb, mrb_value str) ...@@ -1565,7 +1566,7 @@ mrb_str_index(mrb_state *mrb, mrb_value str)
} }
} }
if (pos >= clen) return mrb_nil_value(); if (pos >= clen) return mrb_nil_value();
pos = chars2bytes(RSTRING_PTR(str), pos); pos = chars2bytes(str, 0, pos);
switch (mrb_type(sub)) { switch (mrb_type(sub)) {
default: { default: {
...@@ -1738,7 +1739,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) ...@@ -1738,7 +1739,7 @@ mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
r = RSTRING_PTR(str) + len; r = RSTRING_PTR(str) + len;
while (p<e) { while (p<e) {
mrb_int clen = utf8len((unsigned char*)p); mrb_int clen = utf8len(p, e);
r -= clen; r -= clen;
memcpy(r, p, clen); memcpy(r, p, clen);
p += clen; p += clen;
...@@ -1835,8 +1836,8 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str) ...@@ -1835,8 +1836,8 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str)
else else
sub = mrb_nil_value(); sub = mrb_nil_value();
} }
pos = chars2bytes(RSTRING_PTR(str), pos); pos = chars2bytes(str, 0, pos);
len = chars2bytes(RSTRING_PTR(str)+pos, len); len = chars2bytes(str, pos, len);
mrb_regexp_check(mrb, sub); mrb_regexp_check(mrb, sub);
switch (mrb_type(sub)) { switch (mrb_type(sub)) {
...@@ -1986,7 +1987,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str) ...@@ -1986,7 +1987,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx); end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx);
if (end < 0) break; if (end < 0) break;
} else { } else {
end = chars2bytes(RSTRING_PTR(str)+idx, 1); end = chars2bytes(str, idx, 1);
} }
mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end)); mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
mrb_gc_arena_restore(mrb, ai); mrb_gc_arena_restore(mrb, ai);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment