Unverified Commit 8fe6a190 authored by Yukihiro "Matz" Matsumoto's avatar Yukihiro "Matz" Matsumoto Committed by GitHub

Merge pull request #4764 from shuujii/integrate-mrb_str_inspect-and-mrb_str_dump

Integrate `mrb_str_inspect` and `mrb_str_dump`
parents 20aab8a1 7ce5d339
......@@ -13,6 +13,7 @@ end
assert('String#dump') do
assert_equal("\"\\x00\"", "\0".dump)
assert_equal("\"foo\"", "foo".dump)
assert_equal('"\xe3\x82\x8b"', "る".dump)
assert_nothing_raised { ("\1" * 100).dump } # regress #1210
end
......
......@@ -1318,6 +1318,84 @@ str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb
return src;
}
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
static mrb_value
str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect)
{
const char *p, *pend;
char buf[CHAR_ESC_LEN + 1];
mrb_value result = mrb_str_new_lit(mrb, "\"");
#ifdef MRB_UTF8_STRING
uint32_t ascii_flag = MRB_STR_ASCII;
#endif
p = RSTRING_PTR(str); pend = RSTRING_END(str);
for (;p < pend; p++) {
unsigned char c, cc;
#ifdef MRB_UTF8_STRING
if (inspect) {
mrb_int clen = utf8len(p, pend);
if (clen > 1) {
mrb_int i;
for (i=0; i<clen; i++) {
buf[i] = p[i];
}
mrb_str_cat(mrb, result, buf, clen);
p += clen-1;
ascii_flag = 0;
continue;
}
}
#endif
c = *p;
if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
buf[0] = '\\'; buf[1] = c;
mrb_str_cat(mrb, result, buf, 2);
continue;
}
if (ISPRINT(c)) {
buf[0] = c;
mrb_str_cat(mrb, result, buf, 1);
continue;
}
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
case '\t': cc = 't'; break;
case '\f': cc = 'f'; break;
case '\013': cc = 'v'; break;
case '\010': cc = 'b'; break;
case '\007': cc = 'a'; break;
case 033: cc = 'e'; break;
default: cc = 0; break;
}
if (cc) {
buf[0] = '\\';
buf[1] = (char)cc;
mrb_str_cat(mrb, result, buf, 2);
continue;
}
else {
buf[0] = '\\';
buf[1] = 'x';
buf[3] = mrb_digitmap[c % 16]; c /= 16;
buf[2] = mrb_digitmap[c % 16];
mrb_str_cat(mrb, result, buf, 4);
continue;
}
}
mrb_str_cat_lit(mrb, result, "\"");
#ifdef MRB_UTF8_STRING
mrb_str_ptr(str)->flags |= ascii_flag;
mrb_str_ptr(result)->flags |= ascii_flag;
#endif
return result;
}
static void
mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace)
{
......@@ -2574,8 +2652,6 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
return str;
}
#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
/*
* call-seq:
* str.dump -> new_str
......@@ -2586,113 +2662,7 @@ mrb_str_upcase(mrb_state *mrb, mrb_value self)
mrb_value
mrb_str_dump(mrb_state *mrb, mrb_value str)
{
mrb_int len;
const char *p, *pend;
char *q;
struct RString *result;
len = 2; /* "" */
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
while (p < pend) {
unsigned char c = *p++;
switch (c) {
case '"': case '\\':
case '\n': case '\r':
case '\t': case '\f':
case '\013': case '\010': case '\007': case '\033':
len += 2;
break;
case '#':
len += IS_EVSTR(p, pend) ? 2 : 1;
break;
default:
if (ISPRINT(c)) {
len++;
}
else {
len += 4; /* \NNN */
}
break;
}
}
result = str_new(mrb, 0, len);
str_with_class(result, str);
p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
q = RSTR_PTR(result);
*q++ = '"';
while (p < pend) {
unsigned char c = *p++;
switch (c) {
case '"':
case '\\':
*q++ = '\\';
*q++ = c;
break;
case '\n':
*q++ = '\\';
*q++ = 'n';
break;
case '\r':
*q++ = '\\';
*q++ = 'r';
break;
case '\t':
*q++ = '\\';
*q++ = 't';
break;
case '\f':
*q++ = '\\';
*q++ = 'f';
break;
case '\013':
*q++ = '\\';
*q++ = 'v';
break;
case '\010':
*q++ = '\\';
*q++ = 'b';
break;
case '\007':
*q++ = '\\';
*q++ = 'a';
break;
case '\033':
*q++ = '\\';
*q++ = 'e';
break;
case '#':
if (IS_EVSTR(p, pend)) *q++ = '\\';
*q++ = '#';
break;
default:
if (ISPRINT(c)) {
*q++ = c;
}
else {
*q++ = '\\';
*q++ = 'x';
q[1] = mrb_digitmap[c % 16]; c /= 16;
q[0] = mrb_digitmap[c % 16];
q += 2;
}
}
}
*q = '"';
return mrb_obj_value(result);
return str_escape(mrb, str, FALSE);
}
MRB_API mrb_value
......@@ -2762,8 +2732,6 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
return mrb_str_cat_str(mrb, str1, str2);
}
#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
/*
* call-seq:
* str.inspect -> string
......@@ -2778,76 +2746,7 @@ mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
mrb_value
mrb_str_inspect(mrb_state *mrb, mrb_value str)
{
const char *p, *pend;
char buf[CHAR_ESC_LEN + 1];
mrb_value result = mrb_str_new_lit(mrb, "\"");
#ifdef MRB_UTF8_STRING
uint32_t ascii_flag = MRB_STR_ASCII;
#endif
p = RSTRING_PTR(str); pend = RSTRING_END(str);
for (;p < pend; p++) {
unsigned char c, cc;
#ifdef MRB_UTF8_STRING
mrb_int clen;
clen = utf8len(p, pend);
if (clen > 1) {
mrb_int i;
for (i=0; i<clen; i++) {
buf[i] = p[i];
}
mrb_str_cat(mrb, result, buf, clen);
p += clen-1;
ascii_flag = 0;
continue;
}
#endif
c = *p;
if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
buf[0] = '\\'; buf[1] = c;
mrb_str_cat(mrb, result, buf, 2);
continue;
}
if (ISPRINT(c)) {
buf[0] = c;
mrb_str_cat(mrb, result, buf, 1);
continue;
}
switch (c) {
case '\n': cc = 'n'; break;
case '\r': cc = 'r'; break;
case '\t': cc = 't'; break;
case '\f': cc = 'f'; break;
case '\013': cc = 'v'; break;
case '\010': cc = 'b'; break;
case '\007': cc = 'a'; break;
case 033: cc = 'e'; break;
default: cc = 0; break;
}
if (cc) {
buf[0] = '\\';
buf[1] = (char)cc;
mrb_str_cat(mrb, result, buf, 2);
continue;
}
else {
buf[0] = '\\';
buf[1] = 'x';
buf[3] = mrb_digitmap[c % 16]; c /= 16;
buf[2] = mrb_digitmap[c % 16];
mrb_str_cat(mrb, result, buf, 4);
continue;
}
}
mrb_str_cat_lit(mrb, result, "\"");
#ifdef MRB_UTF8_STRING
mrb_str_ptr(str)->flags |= ascii_flag;
mrb_str_ptr(result)->flags |= ascii_flag;
#endif
return result;
return str_escape(mrb, str, TRUE);
}
/*
......
......@@ -748,12 +748,18 @@ assert('String#upcase!', '15.2.10.5.43') do
end
assert('String#inspect', '15.2.10.5.46') do
assert_equal "\"\\x00\"", "\0".inspect
assert_equal "\"foo\"", "foo".inspect
if UTF8STRING
assert_equal '"る"', "る".inspect
else
assert_equal '"\xe3\x82\x8b"', "る".inspect
end
# should not raise an exception - regress #1210
assert_nothing_raised do
("\1" * 100).inspect
("\1" * 100).inspect
end
assert_equal "\"\\x00\"", "\0".inspect
end
# Not ISO specified
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment