Commit 5e5ee841 authored by chasonr's avatar chasonr

Implement sprintf("%c") for UTF-8.

* sprintf("%c") is changed to accept a string for which String#size returns
  1, even if it is longer than one byte, and to convert a Fixnum via
  Fixnum#chr (possibly returning more than one byte).  Thus, if the UTF-8
  gem is in use, a character will be understood as a single UTF-8 character.

* The change to sprintf depends on the implementation of Fixnum#chr added
  to mrbgems/mruby-string-utf8/src/string.c.

This should work with any other gem that implements a multibyte encoding, as
long as it implements String#size and Fixnum#chr as appropriate.
parent 313f6b59
...@@ -666,38 +666,37 @@ retry: ...@@ -666,38 +666,37 @@ retry:
case 'c': { case 'c': {
mrb_value val = GETARG(); mrb_value val = GETARG();
mrb_value tmp; mrb_value tmp;
unsigned int c; char *c;
tmp = mrb_check_string_type(mrb, val); tmp = mrb_check_string_type(mrb, val);
if (!mrb_nil_p(tmp)) { if (!mrb_nil_p(tmp)) {
if (RSTRING_LEN(tmp) != 1 ) { if (mrb_fixnum(mrb_funcall(mrb, tmp, "size", 0)) != 1 ) {
mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character"); mrb_raise(mrb, E_ARGUMENT_ERROR, "%c requires a character");
} }
c = RSTRING_PTR(tmp)[0];
n = 1;
} }
else { else if (mrb_fixnum_p(val)) {
c = mrb_fixnum(val); tmp = mrb_funcall(mrb, val, "chr", 0);
n = 1;
} }
if (n <= 0) { else {
mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character"); mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid character");
} }
c = RSTRING_PTR(tmp);
n = RSTRING_LEN(tmp);
if (!(flags & FWIDTH)) { if (!(flags & FWIDTH)) {
CHECK(n); CHECK(n);
buf[blen] = c; memcpy(buf+blen, c, n);
blen += n; blen += n;
} }
else if ((flags & FMINUS)) { else if ((flags & FMINUS)) {
CHECK(n); CHECK(n);
buf[blen] = c; memcpy(buf+blen, c, n);
blen += n; blen += n;
FILL(' ', width-1); FILL(' ', width-1);
} }
else { else {
FILL(' ', width-1); FILL(' ', width-1);
CHECK(n); CHECK(n);
buf[blen] = c; memcpy(buf+blen, c, n);
blen += n; blen += n;
} }
} }
......
...@@ -279,6 +279,41 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str) ...@@ -279,6 +279,41 @@ mrb_str_reverse(mrb_state *mrb, mrb_value str)
return mrb_str_reverse_bang(mrb, mrb_str_dup(mrb, str)); return mrb_str_reverse_bang(mrb, mrb_str_dup(mrb, str));
} }
static mrb_value
mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
{
mrb_int cp = mrb_fixnum(num);
char utf8[4];
int len;
if (cp < 0 || 0x10FFFF < cp) {
mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
}
if (cp < 0x80) {
utf8[0] = (char)cp;
len = 1;
}
else if (cp < 0x800) {
utf8[0] = (char)(0xC0 | (cp >> 6));
utf8[1] = (char)(0x80 | (cp & 0x3F));
len = 2;
}
else if (cp < 0x10000) {
utf8[0] = (char)(0xE0 | (cp >> 12));
utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
utf8[2] = (char)(0x80 | ( cp & 0x3F));
len = 3;
}
else {
utf8[0] = (char)(0xF0 | (cp >> 18));
utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
utf8[3] = (char)(0x80 | ( cp & 0x3F));
len = 4;
}
return mrb_str_new(mrb, utf8, len);
}
void void
mrb_mruby_string_utf8_gem_init(mrb_state* mrb) mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
{ {
...@@ -290,6 +325,8 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb) ...@@ -290,6 +325,8 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY());
mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE());
mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE());
mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
} }
void void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment