Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mruby
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Libraries
mruby
Commits
e968bdf6
Unverified
Commit
e968bdf6
authored
Jul 23, 2019
by
Yukihiro "Matz" Matsumoto
Committed by
GitHub
Jul 23, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #4593 from shuujii/add-encoding-argument-to-Integral-chr
Add encoding argument to `Integral#chr`
parents
5166e316
e86aa61f
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
156 additions
and
45 deletions
+156
-45
mrbgems/mruby-string-ext/mrblib/string.rb
mrbgems/mruby-string-ext/mrblib/string.rb
+1
-1
mrbgems/mruby-string-ext/src/string.c
mrbgems/mruby-string-ext/src/string.c
+123
-43
mrbgems/mruby-string-ext/test/numeric.rb
mrbgems/mruby-string-ext/test/numeric.rb
+24
-0
mrbgems/mruby-string-ext/test/string.rb
mrbgems/mruby-string-ext/test/string.rb
+8
-1
No files found.
mrbgems/mruby-string-ext/mrblib/string.rb
View file @
e968bdf6
...
...
@@ -414,7 +414,7 @@ class String
e
=
max
.
ord
while
c
<=
e
break
if
exclusive
and
c
==
e
yield
c
.
chr
yield
c
.
chr
(
__ENCODING__
)
c
+=
1
end
return
self
...
...
mrbgems/mruby-string-ext/src/string.c
View file @
e968bdf6
...
...
@@ -5,6 +5,91 @@
#include <mruby/string.h>
#include <mruby/range.h>
#define ENC_ASCII_8BIT "ASCII-8BIT"
#define ENC_BINARY "BINARY"
#define ENC_UTF8 "UTF-8"
#define ENC_COMP_P(enc, enc_lit) \
str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1)
#ifdef MRB_WITHOUT_FLOAT
# define mrb_float_p(o) FALSE
#endif
static
mrb_bool
str_casecmp_p
(
const
char
*
s1
,
mrb_int
len1
,
const
char
*
s2
,
mrb_int
len2
)
{
const
char
*
e1
,
*
e2
;
if
(
len1
!=
len2
)
return
FALSE
;
e1
=
s1
+
len1
;
e2
=
s2
+
len2
;
while
(
s1
<
e1
&&
s2
<
e2
)
{
if
(
*
s1
!=
*
s2
&&
TOUPPER
(
*
s1
)
!=
TOUPPER
(
*
s2
))
return
FALSE
;
++
s1
;
++
s2
;
}
return
TRUE
;
}
static
mrb_value
int_chr_binary
(
mrb_state
*
mrb
,
mrb_value
num
)
{
mrb_int
cp
=
mrb_int
(
mrb
,
num
);
char
c
;
mrb_value
str
;
if
(
cp
<
0
||
0xff
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
}
c
=
(
char
)
cp
;
str
=
mrb_str_new
(
mrb
,
&
c
,
1
);
RSTR_SET_ASCII_FLAG
(
mrb_str_ptr
(
str
));
return
str
;
}
#ifdef MRB_UTF8_STRING
static
mrb_value
int_chr_utf8
(
mrb_state
*
mrb
,
mrb_value
num
)
{
mrb_int
cp
=
mrb_int
(
mrb
,
num
);
char
utf8
[
4
];
mrb_int
len
;
mrb_value
str
;
uint32_t
ascii_flag
=
0
;
if
(
cp
<
0
||
0x10FFFF
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
}
if
(
cp
<
0x80
)
{
utf8
[
0
]
=
(
char
)
cp
;
len
=
1
;
ascii_flag
=
MRB_STR_ASCII
;
}
else
if
(
cp
<
0x800
)
{
utf8
[
0
]
=
(
char
)(
0xC0
|
(
cp
>>
6
));
utf8
[
1
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
2
;
}
else
if
(
cp
<
0x10000
)
{
utf8
[
0
]
=
(
char
)(
0xE0
|
(
cp
>>
12
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
3
;
}
else
{
utf8
[
0
]
=
(
char
)(
0xF0
|
(
cp
>>
18
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
12
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
3
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
4
;
}
str
=
mrb_str_new
(
mrb
,
utf8
,
len
);
mrb_str_ptr
(
str
)
->
flags
|=
ascii_flag
;
return
str
;
}
#endif
static
mrb_value
mrb_str_getbyte
(
mrb_state
*
mrb
,
mrb_value
str
)
{
...
...
@@ -125,8 +210,6 @@ mrb_str_swapcase(mrb_state *mrb, mrb_value self)
return
str
;
}
static
mrb_value
mrb_int_chr
(
mrb_state
*
mrb
,
mrb_value
num
);
/*
* call-seq:
* str << integer -> str
...
...
@@ -136,7 +219,8 @@ static mrb_value mrb_int_chr(mrb_state *mrb, mrb_value num);
*
* Append---Concatenates the given object to <i>str</i>. If the object is a
* <code>Integer</code>, it is considered as a codepoint, and is converted
* to a character before concatenation.
* to a character before concatenation
* (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>).
*
* a = "hello "
* a << "world" #=> "hello world"
...
...
@@ -148,8 +232,12 @@ mrb_str_concat_m(mrb_state *mrb, mrb_value self)
mrb_value
str
;
mrb_get_args
(
mrb
,
"o"
,
&
str
);
if
(
mrb_fixnum_p
(
str
))
str
=
mrb_int_chr
(
mrb
,
str
);
if
(
mrb_fixnum_p
(
str
)
||
mrb_float_p
(
str
))
#ifdef MRB_UTF8_STRING
str
=
int_chr_utf8
(
mrb
,
str
);
#else
str
=
int_chr_binary
(
mrb
,
str
);
#endif
else
str
=
mrb_ensure_string_type
(
mrb
,
str
);
mrb_str_concat
(
mrb
,
self
,
str
);
...
...
@@ -800,7 +888,7 @@ mrb_str_count(mrb_state *mrb, mrb_value str)
tr_parse_pattern
(
mrb
,
&
pat
,
v_pat
,
TRUE
);
tr_compile_pattern
(
&
pat
,
v_pat
,
bitmap
);
tr_free_pattern
(
mrb
,
&
pat
);
s
=
RSTRING_PTR
(
str
);
len
=
RSTRING_LEN
(
str
);
for
(
i
=
0
;
i
<
len
;
i
++
)
{
...
...
@@ -836,49 +924,40 @@ mrb_str_chr(mrb_state *mrb, mrb_value self)
return
mrb_str_substr
(
mrb
,
self
,
0
,
1
);
}
/*
* call-seq:
* int.chr([encoding]) -> string
*
* Returns a string containing the character represented by the +int+'s value
* according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only
* with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is
* +"ASCII-8BIT"+).
*
* 65.chr #=> "A"
* 230.chr #=> "\xE6"
* 230.chr("ASCII-8BIT") #=> "\xE6"
* 230.chr("UTF-8") #=> "\u00E6"
*/
static
mrb_value
mrb_int_chr
(
mrb_state
*
mrb
,
mrb_value
num
)
{
mrb_int
cp
=
mrb_fixnum
(
num
);
#ifdef MRB_UTF8_STRING
char
utf8
[
4
];
mrb_int
len
;
if
(
cp
<
0
||
0x10FFFF
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
}
if
(
cp
<
0x80
)
{
utf8
[
0
]
=
(
char
)
cp
;
len
=
1
;
mrb_value
enc
;
mrb_bool
enc_given
;
mrb_get_args
(
mrb
,
"|S?"
,
&
enc
,
&
enc_given
);
if
(
!
enc_given
||
ENC_COMP_P
(
enc
,
ENC_ASCII_8BIT
)
||
ENC_COMP_P
(
enc
,
ENC_BINARY
))
{
return
int_chr_binary
(
mrb
,
num
);
}
else
if
(
cp
<
0x800
)
{
utf8
[
0
]
=
(
char
)(
0xC0
|
(
cp
>>
6
));
utf8
[
1
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
2
;
}
else
if
(
cp
<
0x10000
)
{
utf8
[
0
]
=
(
char
)(
0xE0
|
(
cp
>>
12
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
3
;
#ifdef MRB_UTF8_STRING
else
if
(
ENC_COMP_P
(
enc
,
ENC_UTF8
))
{
return
int_chr_utf8
(
mrb
,
num
);
}
#endif
else
{
utf8
[
0
]
=
(
char
)(
0xF0
|
(
cp
>>
18
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
12
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
3
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
4
;
}
return
mrb_str_new
(
mrb
,
utf8
,
len
);
#else
char
c
;
if
(
cp
<
0
||
0xff
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
mrb_raisef
(
mrb
,
E_ARGUMENT_ERROR
,
"unknown encoding name - %S"
,
enc
);
}
c
=
(
char
)
cp
;
return
mrb_str_new
(
mrb
,
&
c
,
1
);
#endif
}
/*
...
...
@@ -1219,7 +1298,8 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
mrb_define_method
(
mrb
,
s
,
"delete_suffix"
,
mrb_str_del_suffix
,
MRB_ARGS_REQ
(
1
));
mrb_define_method
(
mrb
,
s
,
"__lines"
,
mrb_str_lines
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
mrb_module_get
(
mrb
,
"Integral"
),
"chr"
,
mrb_int_chr
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
mrb_module_get
(
mrb
,
"Integral"
),
"chr"
,
mrb_int_chr
,
MRB_ARGS_OPT
(
1
));
}
void
...
...
mrbgems/mruby-string-ext/test/numeric.rb
View file @
e968bdf6
# coding: utf-8
assert
(
'Integer#chr'
)
do
assert_equal
(
"A"
,
65
.
chr
)
assert_equal
(
"B"
,
0x42
.
chr
)
assert_equal
(
"
\xab
"
,
171
.
chr
)
assert_raise
(
RangeError
)
{
-
1
.
chr
}
assert_raise
(
RangeError
)
{
256
.
chr
}
assert_equal
(
"A"
,
65
.
chr
(
"ASCII-8BIT"
))
assert_equal
(
"B"
,
0x42
.
chr
(
"BINARY"
))
assert_equal
(
"
\xab
"
,
171
.
chr
(
"ascii-8bit"
))
assert_raise
(
RangeError
)
{
-
1
.
chr
(
"binary"
)
}
assert_raise
(
RangeError
)
{
256
.
chr
(
"Ascii-8bit"
)
}
assert_raise
(
ArgumentError
)
{
65
.
chr
(
"ASCII"
)
}
assert_raise
(
ArgumentError
)
{
65
.
chr
(
"ASCII-8BIT"
,
2
)
}
assert_raise
(
TypeError
)
{
65
.
chr
(
:BINARY
)
}
if
__ENCODING__
==
"ASCII-8BIT"
assert_raise
(
ArgumentError
)
{
65
.
chr
(
"UTF-8"
)
}
else
assert_equal
(
"A"
,
65
.
chr
(
"UTF-8"
))
assert_equal
(
"B"
,
0x42
.
chr
(
"UTF-8"
))
assert_equal
(
"«"
,
171
.
chr
(
"utf-8"
))
assert_equal
(
"あ"
,
12354
.
chr
(
"Utf-8"
))
assert_raise
(
RangeError
)
{
-
1
.
chr
(
"utf-8"
)
}
assert_raise
(
RangeError
)
{
0x110000
.
chr
.
chr
(
"UTF-8"
)
}
end
end
mrbgems/mruby-string-ext/test/string.rb
View file @
e968bdf6
...
...
@@ -167,8 +167,15 @@ end
assert
(
'String#concat'
)
do
assert_equal
"Hello World!"
,
"Hello "
<<
"World"
<<
33
assert_equal
"Hello World!"
,
"Hello "
.
concat
(
"World"
).
concat
(
33
)
assert_raise
(
TypeError
)
{
""
.
concat
(
Object
.
new
)
}
if
UTF8STRING
assert_equal
"H«"
,
"H"
<<
0xab
assert_equal
"Hは"
,
"H"
<<
12399
else
assert_equal
"H
\xab
"
,
"H"
<<
0xab
assert_raise
(
RangeError
)
{
"H"
<<
12399
}
end
end
assert
(
'String#casecmp'
)
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment