Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mruby
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Libraries
mruby
Commits
798ec3af
Commit
798ec3af
authored
Sep 22, 2015
by
Yukihiro "Matz" Matsumoto
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
UTF-8 string support in core
define MRB_UTF8_STRING (in mrbconf.h) to enable UTF-8 support.
parent
101ec5eb
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
893 additions
and
1332 deletions
+893
-1332
include/mrbconf.h
include/mrbconf.h
+3
-0
mrbgems/mruby-string-ext/mrblib/string.rb
mrbgems/mruby-string-ext/mrblib/string.rb
+26
-0
mrbgems/mruby-string-ext/src/string.c
mrbgems/mruby-string-ext/src/string.c
+114
-0
mrbgems/mruby-string-ext/test/string.rb
mrbgems/mruby-string-ext/test/string.rb
+99
-2
mrbgems/mruby-string-utf8/mrbgem.rake
mrbgems/mruby-string-utf8/mrbgem.rake
+0
-6
mrbgems/mruby-string-utf8/src/string.c
mrbgems/mruby-string-utf8/src/string.c
+0
-731
mrbgems/mruby-string-utf8/test/string.rb
mrbgems/mruby-string-utf8/test/string.rb
+0
-110
src/string.c
src/string.c
+584
-466
test/t/string.rb
test/t/string.rb
+67
-17
No files found.
include/mrbconf.h
View file @
798ec3af
...
@@ -26,6 +26,9 @@
...
@@ -26,6 +26,9 @@
/* represent mrb_value as a word (natural unit of data for the processor) */
/* represent mrb_value as a word (natural unit of data for the processor) */
//#define MRB_WORD_BOXING
//#define MRB_WORD_BOXING
/* string class to handle UTF-8 encoding */
//#define MRB_UTF8_STRING
/* argv max size in mrb_funcall */
/* argv max size in mrb_funcall */
//#define MRB_FUNCALL_ARGC_MAX 16
//#define MRB_FUNCALL_ARGC_MAX 16
...
...
mrbgems/mruby-string-ext/mrblib/string.rb
View file @
798ec3af
...
@@ -310,4 +310,30 @@ class String
...
@@ -310,4 +310,30 @@ class String
return
self
if
excl
&&
str
==
other_str
return
self
if
excl
&&
str
==
other_str
end
end
end
end
def
chars
(
&
block
)
if
block_given?
self
.
split
(
''
).
map
do
|
i
|
block
.
call
(
i
)
end
self
else
self
.
split
(
''
)
end
end
alias
each_char
chars
def
codepoints
(
&
block
)
len
=
self
.
size
if
block_given?
self
.
split
(
''
).
map
do
|
x
|
block
.
call
(
x
.
ord
)
end
self
else
self
.
split
(
''
).
map
{
|
x
|
x
.
ord
}
end
end
alias
each_codepoint
codepoints
end
end
mrbgems/mruby-string-ext/src/string.c
View file @
798ec3af
...
@@ -245,6 +245,51 @@ mrb_str_chr(mrb_state *mrb, mrb_value self)
...
@@ -245,6 +245,51 @@ mrb_str_chr(mrb_state *mrb, mrb_value self)
return
mrb_str_substr
(
mrb
,
self
,
0
,
1
);
return
mrb_str_substr
(
mrb
,
self
,
0
,
1
);
}
}
static
mrb_value
mrb_fixnum_chr
(
mrb_state
*
mrb
,
mrb_value
num
)
{
mrb_int
cp
=
mrb_fixnum
(
num
);
#ifdef MRB_UTF8_STRING
char
utf8
[
4
];
mrb_int
len
;
if
(
cp
<
0
||
0x10FFFF
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
}
if
(
cp
<
0x80
)
{
utf8
[
0
]
=
(
char
)
cp
;
len
=
1
;
}
else
if
(
cp
<
0x800
)
{
utf8
[
0
]
=
(
char
)(
0xC0
|
(
cp
>>
6
));
utf8
[
1
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
2
;
}
else
if
(
cp
<
0x10000
)
{
utf8
[
0
]
=
(
char
)(
0xE0
|
(
cp
>>
12
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
3
;
}
else
{
utf8
[
0
]
=
(
char
)(
0xF0
|
(
cp
>>
18
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
12
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
3
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
4
;
}
return
mrb_str_new
(
mrb
,
utf8
,
len
);
#else
char
c
;
if
(
cp
<
0
||
0xff
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
}
c
=
(
char
)
cp
;
return
mrb_str_new
(
mrb
,
&
c
,
1
);
#endif
}
/*
/*
* call-seq:
* call-seq:
* string.lines -> array of string
* string.lines -> array of string
...
@@ -422,6 +467,72 @@ mrb_str_prepend(mrb_state *mrb, mrb_value self)
...
@@ -422,6 +467,72 @@ mrb_str_prepend(mrb_state *mrb, mrb_value self)
return
self
;
return
self
;
}
}
#ifdef MRB_UTF8_STRING
static
const
char
utf8len_codepage_zero
[
256
]
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
};
static
mrb_int
utf8code
(
unsigned
char
*
p
)
{
mrb_int
len
;
if
(
p
[
0
]
<
0x80
)
return
p
[
0
];
len
=
utf8len_codepage_zero
[
p
[
0
]];
if
(
len
>
1
&&
(
p
[
1
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
2
)
return
((
p
[
0
]
&
0x1f
)
<<
6
)
+
(
p
[
1
]
&
0x3f
);
if
((
p
[
2
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
3
)
return
((
p
[
0
]
&
0x0f
)
<<
12
)
+
((
p
[
1
]
&
0x3f
)
<<
6
)
+
(
p
[
2
]
&
0x3f
);
if
((
p
[
3
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
4
)
return
((
p
[
0
]
&
0x07
)
<<
18
)
+
((
p
[
1
]
&
0x3f
)
<<
12
)
+
((
p
[
2
]
&
0x3f
)
<<
6
)
+
(
p
[
3
]
&
0x3f
);
if
((
p
[
4
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
5
)
return
((
p
[
0
]
&
0x03
)
<<
24
)
+
((
p
[
1
]
&
0x3f
)
<<
18
)
+
((
p
[
2
]
&
0x3f
)
<<
12
)
+
((
p
[
3
]
&
0x3f
)
<<
6
)
+
(
p
[
4
]
&
0x3f
);
if
((
p
[
5
]
&
0xc0
)
==
0x80
&&
len
==
6
)
return
((
p
[
0
]
&
0x01
)
<<
30
)
+
((
p
[
1
]
&
0x3f
)
<<
24
)
+
((
p
[
2
]
&
0x3f
)
<<
18
)
+
((
p
[
3
]
&
0x3f
)
<<
12
)
+
((
p
[
4
]
&
0x3f
)
<<
6
)
+
(
p
[
5
]
&
0x3f
);
}
}
}
}
return
p
[
0
];
}
static
mrb_value
mrb_str_ord
(
mrb_state
*
mrb
,
mrb_value
str
)
{
if
(
RSTRING_LEN
(
str
)
==
0
)
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"empty string"
);
return
mrb_fixnum_value
(
utf8code
((
unsigned
char
*
)
RSTRING_PTR
(
str
)));
}
#else
static
mrb_value
mrb_str_ord
(
mrb_state
*
mrb
,
mrb_value
str
)
{
if
(
RSTRING_LEN
(
str
)
==
0
)
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"empty string"
);
return
mrb_fixnum_value
(
RSTRING_PTR
(
str
)[
0
]);
}
#endif
void
void
mrb_mruby_string_ext_gem_init
(
mrb_state
*
mrb
)
mrb_mruby_string_ext_gem_init
(
mrb_state
*
mrb
)
{
{
...
@@ -446,6 +557,9 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
...
@@ -446,6 +557,9 @@ mrb_mruby_string_ext_gem_init(mrb_state* mrb)
mrb_define_method
(
mrb
,
s
,
"prepend"
,
mrb_str_prepend
,
MRB_ARGS_REQ
(
1
));
mrb_define_method
(
mrb
,
s
,
"prepend"
,
mrb_str_prepend
,
MRB_ARGS_REQ
(
1
));
mrb_alias_method
(
mrb
,
s
,
mrb_intern_lit
(
mrb
,
"next"
),
mrb_intern_lit
(
mrb
,
"succ"
));
mrb_alias_method
(
mrb
,
s
,
mrb_intern_lit
(
mrb
,
"next"
),
mrb_intern_lit
(
mrb
,
"succ"
));
mrb_alias_method
(
mrb
,
s
,
mrb_intern_lit
(
mrb
,
"next!"
),
mrb_intern_lit
(
mrb
,
"succ!"
));
mrb_alias_method
(
mrb
,
s
,
mrb_intern_lit
(
mrb
,
"next!"
),
mrb_intern_lit
(
mrb
,
"succ!"
));
mrb_define_method
(
mrb
,
s
,
"ord"
,
mrb_str_ord
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
mrb
->
fixnum_class
,
"chr"
,
mrb_fixnum_chr
,
MRB_ARGS_NONE
());
}
}
void
void
...
...
mrbgems/mruby-string-ext/test/string.rb
View file @
798ec3af
##
##
# String(Ext) Test
# String(Ext) Test
UTF8STRING
=
(
"
\343\201\202
"
.
size
==
1
)
assert
(
'String#getbyte'
)
do
assert
(
'String#getbyte'
)
do
str1
=
"hello"
str1
=
"hello"
bytes1
=
[
104
,
101
,
108
,
108
,
111
]
bytes1
=
[
104
,
101
,
108
,
108
,
111
]
...
@@ -180,6 +182,8 @@ end
...
@@ -180,6 +182,8 @@ end
assert
(
'String#chr'
)
do
assert
(
'String#chr'
)
do
assert_equal
"a"
,
"abcde"
.
chr
assert_equal
"a"
,
"abcde"
.
chr
# test Fixnum#chr as well
assert_equal
"a"
,
97
.
chr
end
end
assert
(
'String#lines'
)
do
assert
(
'String#lines'
)
do
...
@@ -374,8 +378,8 @@ assert('String#succ') do
...
@@ -374,8 +378,8 @@ assert('String#succ') do
assert_equal
"-b-"
,
a
assert_equal
"-b-"
,
a
a
=
"-z-"
;
a
.
succ!
a
=
"-z-"
;
a
.
succ!
assert_equal
"-aa-"
,
a
assert_equal
"-aa-"
,
a
a
=
"あ
a
"
;
a
.
succ!
a
=
"あ
b
"
;
a
.
succ!
assert_equal
"あ
b
"
,
a
assert_equal
"あ
c
"
,
a
a
=
"あaz"
;
a
.
succ!
a
=
"あaz"
;
a
.
succ!
assert_equal
"あba"
,
a
assert_equal
"あba"
,
a
end
end
...
@@ -471,3 +475,96 @@ assert('String#upto') do
...
@@ -471,3 +475,96 @@ assert('String#upto') do
})
})
assert_equal
(
2
,
count
)
assert_equal
(
2
,
count
)
end
end
assert
(
'String#ord'
)
do
got
=
"hello!"
.
split
(
''
).
map
{
|
x
|
x
.
ord
}
expect
=
[
104
,
101
,
108
,
108
,
111
,
33
]
assert_equal
expect
,
got
end
assert
(
'String#ord(UTF-8)'
)
do
got
=
"こんにちは世界!"
.
split
(
''
).
map
{
|
x
|
x
.
ord
}
expect
=
[
0x3053
,
0x3093
,
0x306b
,
0x3061
,
0x306f
,
0x4e16
,
0x754c
,
0x21
]
assert_equal
expect
,
got
end
if
UTF8STRING
assert
(
'String#chr'
)
do
assert_equal
"h"
,
"hello!"
.
chr
end
assert
(
'String#chr(UTF-8)'
)
do
assert_equal
"こ"
,
"こんにちは世界!"
.
chr
end
if
UTF8STRING
assert
(
'String#chars'
)
do
expect
=
[
"h"
,
"e"
,
"l"
,
"l"
,
"o"
,
"!"
]
assert_equal
expect
,
"hello!"
.
chars
s
=
""
"hello!"
.
chars
do
|
x
|
s
+=
x
end
assert_equal
"hello!"
,
s
end
assert
(
'String#chars(UTF-8)'
)
do
expect
=
[
'こ'
,
'ん'
,
'に'
,
'ち'
,
'は'
,
'世'
,
'界'
,
'!'
]
assert_equal
expect
,
"こんにちは世界!"
.
chars
s
=
""
"こんにちは世界!"
.
chars
do
|
x
|
s
+=
x
end
assert_equal
"こんにちは世界!"
,
s
end
if
UTF8STRING
assert
(
'String#each_char'
)
do
s
=
""
"hello!"
.
each_char
do
|
x
|
s
+=
x
end
assert_equal
"hello!"
,
s
end
assert
(
'String#each_char(UTF-8)'
)
do
s
=
""
"こんにちは世界!"
.
each_char
do
|
x
|
s
+=
x
end
assert_equal
"こんにちは世界!"
,
s
end
if
UTF8STRING
assert
(
'String#codepoints'
)
do
expect
=
[
104
,
101
,
108
,
108
,
111
,
33
]
assert_equal
expect
,
"hello!"
.
codepoints
cp
=
[]
"hello!"
.
codepoints
do
|
x
|
cp
<<
x
end
assert_equal
expect
,
cp
end
assert
(
'String#codepoints(UTF-8)'
)
do
expect
=
[
12371
,
12435
,
12395
,
12385
,
12399
,
19990
,
30028
,
33
]
assert_equal
expect
,
"こんにちは世界!"
.
codepoints
cp
=
[]
"こんにちは世界!"
.
codepoints
do
|
x
|
cp
<<
x
end
assert_equal
expect
,
cp
end
if
UTF8STRING
assert
(
'String#each_codepoint'
)
do
expect
=
[
104
,
101
,
108
,
108
,
111
,
33
]
cp
=
[]
"hello!"
.
each_codepoint
do
|
x
|
cp
<<
x
end
assert_equal
expect
,
cp
end
assert
(
'String#each_codepoint(UTF-8)'
)
do
expect
=
[
12371
,
12435
,
12395
,
12385
,
12399
,
19990
,
30028
,
33
]
cp
=
[]
"こんにちは世界!"
.
each_codepoint
do
|
x
|
cp
<<
x
end
assert_equal
expect
,
cp
end
if
UTF8STRING
mrbgems/mruby-string-utf8/mrbgem.rake
deleted
100644 → 0
View file @
101ec5eb
MRuby
::
Gem
::
Specification
.
new
(
'mruby-string-utf8'
)
do
|
spec
|
spec
.
license
=
'MIT'
spec
.
author
=
'mruby developers'
spec
.
summary
=
'UTF-8 support in String class'
spec
.
add_dependency
(
'mruby-string-ext'
,
:core
=>
'mruby-string-ext'
)
end
mrbgems/mruby-string-utf8/src/string.c
deleted
100644 → 0
View file @
101ec5eb
#include "mruby.h"
#include "mruby/array.h"
#include "mruby/class.h"
#include "mruby/string.h"
#include "mruby/range.h"
#include "mruby/numeric.h"
#include "mruby/re.h"
#include <string.h>
static
const
char
utf8len_codepage
[
256
]
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
};
static
const
char
utf8len_codepage_zero
[
256
]
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
};
static
mrb_int
utf8code
(
unsigned
char
*
p
)
{
mrb_int
len
;
if
(
p
[
0
]
<
0x80
)
return
p
[
0
];
len
=
utf8len_codepage_zero
[
p
[
0
]];
if
(
len
>
1
&&
(
p
[
1
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
2
)
return
((
p
[
0
]
&
0x1f
)
<<
6
)
+
(
p
[
1
]
&
0x3f
);
if
((
p
[
2
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
3
)
return
((
p
[
0
]
&
0x0f
)
<<
12
)
+
((
p
[
1
]
&
0x3f
)
<<
6
)
+
(
p
[
2
]
&
0x3f
);
if
((
p
[
3
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
4
)
return
((
p
[
0
]
&
0x07
)
<<
18
)
+
((
p
[
1
]
&
0x3f
)
<<
12
)
+
((
p
[
2
]
&
0x3f
)
<<
6
)
+
(
p
[
3
]
&
0x3f
);
if
((
p
[
4
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
5
)
return
((
p
[
0
]
&
0x03
)
<<
24
)
+
((
p
[
1
]
&
0x3f
)
<<
18
)
+
((
p
[
2
]
&
0x3f
)
<<
12
)
+
((
p
[
3
]
&
0x3f
)
<<
6
)
+
(
p
[
4
]
&
0x3f
);
if
((
p
[
5
]
&
0xc0
)
==
0x80
&&
len
==
6
)
return
((
p
[
0
]
&
0x01
)
<<
30
)
+
((
p
[
1
]
&
0x3f
)
<<
24
)
+
((
p
[
2
]
&
0x3f
)
<<
18
)
+
((
p
[
3
]
&
0x3f
)
<<
12
)
+
((
p
[
4
]
&
0x3f
)
<<
6
)
+
(
p
[
5
]
&
0x3f
);
}
}
}
}
return
p
[
0
];
}
static
mrb_value
mrb_fixnum_chr
(
mrb_state
*
,
mrb_value
);
static
mrb_int
utf8len
(
unsigned
char
*
p
)
{
mrb_int
len
;
mrb_int
i
;
if
(
*
p
==
0
)
return
1
;
len
=
utf8len_codepage
[
*
p
];
for
(
i
=
1
;
i
<
len
;
++
i
)
if
((
p
[
i
]
&
0xc0
)
!=
0x80
)
return
1
;
return
len
;
}
static
mrb_int
mrb_utf8_strlen
(
mrb_value
str
,
mrb_int
len
)
{
mrb_int
total
=
0
;
unsigned
char
*
p
=
(
unsigned
char
*
)
RSTRING_PTR
(
str
);
unsigned
char
*
e
=
p
;
e
+=
len
<
0
?
RSTRING_LEN
(
str
)
:
len
;
while
(
p
<
e
)
{
p
+=
utf8len
(
p
);
total
++
;
}
return
total
;
}
static
mrb_value
mrb_str_size
(
mrb_state
*
mrb
,
mrb_value
str
)
{
return
mrb_fixnum_value
(
mrb_utf8_strlen
(
str
,
-
1
));
}
#define RSTRING_LEN_UTF8(s) mrb_utf8_strlen(s, -1)
static
inline
mrb_int
mrb_memsearch_qs
(
const
unsigned
char
*
xs
,
mrb_int
m
,
const
unsigned
char
*
ys
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
xs
,
*
xe
=
xs
+
m
;
const
unsigned
char
*
y
=
ys
;
int
i
,
qstable
[
256
];
/* Preprocessing */
for
(
i
=
0
;
i
<
256
;
++
i
)
qstable
[
i
]
=
m
+
1
;
for
(;
x
<
xe
;
++
x
)
qstable
[
*
x
]
=
xe
-
x
;
/* Searching */
for
(;
y
+
m
<=
ys
+
n
;
y
+=
*
(
qstable
+
y
[
m
]))
{
if
(
*
xs
==
*
y
&&
memcmp
(
xs
,
y
,
m
)
==
0
)
return
y
-
ys
;
}
return
-
1
;
}
static
mrb_int
mrb_memsearch
(
const
void
*
x0
,
mrb_int
m
,
const
void
*
y0
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
(
const
unsigned
char
*
)
x0
,
*
y
=
(
const
unsigned
char
*
)
y0
;
if
(
m
>
n
)
return
-
1
;
else
if
(
m
==
n
)
{
return
memcmp
(
x0
,
y0
,
m
)
==
0
?
0
:
-
1
;
}
else
if
(
m
<
1
)
{
return
0
;
}
else
if
(
m
==
1
)
{
const
unsigned
char
*
ys
=
y
,
*
ye
=
ys
+
n
;
for
(;
y
<
ye
;
++
y
)
{
if
(
*
x
==
*
y
)
return
y
-
ys
;
}
return
-
1
;
}
return
mrb_memsearch_qs
((
const
unsigned
char
*
)
x0
,
m
,
(
const
unsigned
char
*
)
y0
,
n
);
}
static
mrb_value
str_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
mrb_int
i
;
unsigned
char
*
p
=
(
unsigned
char
*
)
RSTRING_PTR
(
str
),
*
t
;
unsigned
char
*
e
=
p
+
RSTRING_LEN
(
str
);
for
(
i
=
0
;
i
<
beg
&&
p
<
e
;
i
++
)
{
p
+=
utf8len
(
p
);
}
t
=
p
;
for
(
i
=
0
;
i
<
len
&&
t
<
e
;
i
++
)
{
t
+=
utf8len
(
t
);
}
return
mrb_str_new
(
mrb
,
(
const
char
*
)
p
,
(
size_t
)(
t
-
p
));
}
static
mrb_value
str_substr
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
mrb_value
str2
;
mrb_int
len8
=
RSTRING_LEN_UTF8
(
str
);
if
(
len
<
0
)
return
mrb_nil_value
();
if
(
len8
==
0
)
{
len
=
0
;
}
else
if
(
beg
<
0
)
{
beg
=
len8
+
beg
;
}
if
(
beg
>
len8
)
return
mrb_nil_value
();
if
(
beg
<
0
)
{
beg
+=
len8
;
if
(
beg
<
0
)
return
mrb_nil_value
();
}
if
(
beg
+
len
>
len8
)
len
=
len8
-
beg
;
if
(
len
<=
0
)
{
len
=
0
;
}
str2
=
str_subseq
(
mrb
,
str
,
beg
,
len
);
return
str2
;
}
static
mrb_int
str_index
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
offset
)
{
mrb_int
pos
;
char
*
s
,
*
sptr
;
mrb_int
len
,
slen
;
len
=
RSTRING_LEN
(
str
);
slen
=
RSTRING_LEN
(
sub
);
if
(
offset
<
0
)
{
offset
+=
len
;
if
(
offset
<
0
)
return
-
1
;
}
if
(
len
-
offset
<
slen
)
return
-
1
;
s
=
RSTRING_PTR
(
str
);
if
(
offset
)
{
s
+=
offset
;
}
if
(
slen
==
0
)
return
offset
;
/* need proceed one character at a time */
sptr
=
RSTRING_PTR
(
sub
);
slen
=
RSTRING_LEN
(
sub
);
len
=
RSTRING_LEN
(
str
)
-
offset
;
pos
=
mrb_memsearch
(
sptr
,
slen
,
s
,
len
);
if
(
pos
<
0
)
return
pos
;
return
pos
+
offset
;
}
static
mrb_int
str_rindex
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
pos
)
{
char
*
s
,
*
sbeg
,
*
t
;
struct
RString
*
ps
=
mrb_str_ptr
(
str
);
mrb_int
len
=
RSTRING_LEN
(
sub
);
/* substring longer than string */
if
(
RSTR_LEN
(
ps
)
<
len
)
return
-
1
;
if
(
RSTR_LEN
(
ps
)
-
pos
<
len
)
{
pos
=
RSTR_LEN
(
ps
)
-
len
;
}
sbeg
=
RSTR_PTR
(
ps
);
s
=
RSTR_PTR
(
ps
)
+
pos
;
t
=
RSTRING_PTR
(
sub
);
if
(
len
)
{
while
(
sbeg
<=
s
)
{
if
(
memcmp
(
s
,
t
,
len
)
==
0
)
{
return
s
-
RSTR_PTR
(
ps
);
}
s
--
;
}
return
-
1
;
}
else
{
return
pos
;
}
}
static
mrb_value
mrb_str_aref
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
indx
)
{
mrb_int
idx
;
mrb_regexp_check
(
mrb
,
indx
);
switch
(
mrb_type
(
indx
))
{
case
MRB_TT_FLOAT
:
indx
=
mrb_flo_to_fixnum
(
mrb
,
indx
);
/* fall through */
case
MRB_TT_FIXNUM
:
idx
=
mrb_fixnum
(
indx
);
num_index:
str
=
str_substr
(
mrb
,
str
,
idx
,
1
);
if
(
!
mrb_nil_p
(
str
)
&&
RSTRING_LEN
(
str
)
==
0
)
return
mrb_nil_value
();
return
str
;
case
MRB_TT_STRING
:
if
(
str_index
(
mrb
,
str
,
indx
,
0
)
!=
-
1
)
return
mrb_str_dup
(
mrb
,
indx
);
return
mrb_nil_value
();
case
MRB_TT_RANGE
:
/* check if indx is Range */
{
mrb_int
beg
,
len
;
mrb_value
tmp
;
len
=
RSTRING_LEN_UTF8
(
str
);
if
(
mrb_range_beg_len
(
mrb
,
indx
,
&
beg
,
&
len
,
len
))
{
tmp
=
str_subseq
(
mrb
,
str
,
beg
,
len
);
return
tmp
;
}
else
{
return
mrb_nil_value
();
}
}
default:
idx
=
mrb_fixnum
(
indx
);
goto
num_index
;
}
return
mrb_nil_value
();
/* not reached */
}
static
mrb_value
mrb_str_aref_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_value
a1
,
a2
;
int
argc
;
argc
=
mrb_get_args
(
mrb
,
"o|o"
,
&
a1
,
&
a2
);
if
(
argc
==
2
)
{
mrb_regexp_check
(
mrb
,
a1
);
return
str_substr
(
mrb
,
str
,
mrb_fixnum
(
a1
),
mrb_fixnum
(
a2
));
}
if
(
argc
!=
1
)
{
mrb_raisef
(
mrb
,
E_ARGUMENT_ERROR
,
"wrong number of arguments (%S for 1)"
,
mrb_fixnum_value
(
argc
));
}
return
mrb_str_aref
(
mrb
,
str
,
a1
);
}
static
mrb_value
mrb_str_index_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_value
*
argv
;
mrb_int
argc
;
mrb_value
sub
;
mrb_int
pos
;
mrb_get_args
(
mrb
,
"*"
,
&
argv
,
&
argc
);
if
(
argc
==
2
)
{
pos
=
mrb_fixnum
(
argv
[
1
]);
sub
=
argv
[
0
];
}
else
{
pos
=
0
;
if
(
argc
>
0
)
sub
=
argv
[
0
];
else
sub
=
mrb_nil_value
();
}
mrb_regexp_check
(
mrb
,
sub
);
if
(
pos
<
0
)
{
pos
+=
RSTRING_LEN
(
str
);
if
(
pos
<
0
)
{
return
mrb_nil_value
();
}
}
if
(
mrb_type
(
sub
)
==
MRB_TT_FIXNUM
)
{
sub
=
mrb_fixnum_chr
(
mrb
,
sub
);
}
switch
(
mrb_type
(
sub
))
{
default:
{
mrb_value
tmp
;
tmp
=
mrb_check_string_type
(
mrb
,
sub
);
if
(
mrb_nil_p
(
tmp
))
{
mrb_raisef
(
mrb
,
E_TYPE_ERROR
,
"type mismatch: %S given"
,
sub
);
}
sub
=
tmp
;
}
/* fall through */
case
MRB_TT_STRING
:
pos
=
str_index
(
mrb
,
str
,
sub
,
pos
);
break
;
}
if
(
pos
==
-
1
)
return
mrb_nil_value
();
return
mrb_fixnum_value
(
mrb_utf8_strlen
(
str
,
pos
));
}
static
mrb_value
mrb_str_reverse_bang
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_int
utf8_len
=
mrb_utf8_strlen
(
str
,
-
1
);
if
(
utf8_len
>
1
)
{
mrb_int
len
;
char
*
buf
;
unsigned
char
*
p
,
*
e
,
*
r
;
mrb_str_modify
(
mrb
,
mrb_str_ptr
(
str
));
len
=
RSTRING_LEN
(
str
);
buf
=
(
char
*
)
mrb_malloc
(
mrb
,
(
size_t
)
len
);
p
=
(
unsigned
char
*
)
buf
;
e
=
(
unsigned
char
*
)
buf
+
len
;
memcpy
(
buf
,
RSTRING_PTR
(
str
),
len
);
r
=
(
unsigned
char
*
)
RSTRING_PTR
(
str
)
+
len
;
while
(
p
<
e
)
{
mrb_int
clen
=
utf8len
(
p
);
r
-=
clen
;
memcpy
(
r
,
p
,
clen
);
p
+=
clen
;
}
mrb_free
(
mrb
,
buf
);
}
return
str
;
}
static
mrb_value
mrb_str_rindex_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_value
*
argv
;
mrb_int
argc
;
mrb_value
sub
;
mrb_value
vpos
;
mrb_int
pos
,
len
=
RSTRING_LEN
(
str
);
mrb_get_args
(
mrb
,
"*"
,
&
argv
,
&
argc
);
if
(
argc
==
2
)
{
sub
=
argv
[
0
];
vpos
=
argv
[
1
];
pos
=
mrb_fixnum
(
vpos
);
if
(
pos
<
0
)
{
pos
+=
len
;
if
(
pos
<
0
)
{
mrb_regexp_check
(
mrb
,
sub
);
return
mrb_nil_value
();
}
}
if
(
pos
>
len
)
pos
=
len
;
}
else
{
pos
=
len
;
if
(
argc
>
0
)
sub
=
argv
[
0
];
else
sub
=
mrb_nil_value
();
}
mrb_regexp_check
(
mrb
,
sub
);
if
(
mrb_type
(
sub
)
==
MRB_TT_FIXNUM
)
{
sub
=
mrb_fixnum_chr
(
mrb
,
sub
);
}
switch
(
mrb_type
(
sub
))
{
default:
{
mrb_value
tmp
;
tmp
=
mrb_check_string_type
(
mrb
,
sub
);
if
(
mrb_nil_p
(
tmp
))
{
mrb_raisef
(
mrb
,
E_TYPE_ERROR
,
"type mismatch: %S given"
,
sub
);
}
sub
=
tmp
;
}
/* fall through */
case
MRB_TT_STRING
:
pos
=
str_rindex
(
mrb
,
str
,
sub
,
pos
);
break
;
}
if
(
pos
==
-
1
)
return
mrb_nil_value
();
return
mrb_fixnum_value
(
mrb_utf8_strlen
(
str
,
pos
));
}
static
mrb_value
mrb_str_reverse
(
mrb_state
*
mrb
,
mrb_value
str
)
{
return
mrb_str_reverse_bang
(
mrb
,
mrb_str_dup
(
mrb
,
str
));
}
static
mrb_value
mrb_fixnum_chr
(
mrb_state
*
mrb
,
mrb_value
num
)
{
mrb_int
cp
=
mrb_fixnum
(
num
);
char
utf8
[
4
];
mrb_int
len
;
if
(
cp
<
0
||
0x10FFFF
<
cp
)
{
mrb_raisef
(
mrb
,
E_RANGE_ERROR
,
"%S out of char range"
,
num
);
}
if
(
cp
<
0x80
)
{
utf8
[
0
]
=
(
char
)
cp
;
len
=
1
;
}
else
if
(
cp
<
0x800
)
{
utf8
[
0
]
=
(
char
)(
0xC0
|
(
cp
>>
6
));
utf8
[
1
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
2
;
}
else
if
(
cp
<
0x10000
)
{
utf8
[
0
]
=
(
char
)(
0xE0
|
(
cp
>>
12
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
3
;
}
else
{
utf8
[
0
]
=
(
char
)(
0xF0
|
(
cp
>>
18
));
utf8
[
1
]
=
(
char
)(
0x80
|
((
cp
>>
12
)
&
0x3F
));
utf8
[
2
]
=
(
char
)(
0x80
|
((
cp
>>
6
)
&
0x3F
));
utf8
[
3
]
=
(
char
)(
0x80
|
(
cp
&
0x3F
));
len
=
4
;
}
return
mrb_str_new
(
mrb
,
utf8
,
len
);
}
static
mrb_value
mrb_str_ord
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_int
len
=
RSTRING_LEN
(
str
);
if
(
len
==
0
)
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"empty string"
);
return
mrb_fixnum_value
(
utf8code
((
unsigned
char
*
)
RSTRING_PTR
(
str
)));
}
static
mrb_value
mrb_str_split_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
int
argc
;
mrb_value
spat
=
mrb_nil_value
();
enum
{
awk
,
string
,
regexp
}
split_type
=
string
;
long
i
=
0
,
lim_p
;
mrb_int
beg
;
mrb_int
end
;
mrb_int
lim
=
0
;
mrb_value
result
,
tmp
;
argc
=
mrb_get_args
(
mrb
,
"|oi"
,
&
spat
,
&
lim
);
lim_p
=
(
lim
>
0
&&
argc
==
2
);
if
(
argc
==
2
)
{
if
(
lim
==
1
)
{
if
(
RSTRING_LEN
(
str
)
==
0
)
return
mrb_ary_new_capa
(
mrb
,
0
);
return
mrb_ary_new_from_values
(
mrb
,
1
,
&
str
);
}
i
=
1
;
}
if
(
argc
==
0
||
mrb_nil_p
(
spat
))
{
split_type
=
awk
;
}
else
{
if
(
mrb_string_p
(
spat
))
{
split_type
=
string
;
if
(
RSTRING_LEN
(
spat
)
==
1
&&
RSTRING_PTR
(
spat
)[
0
]
==
' '
){
split_type
=
awk
;
}
}
else
{
mrb_noregexp
(
mrb
,
str
);
}
}
result
=
mrb_ary_new
(
mrb
);
beg
=
0
;
if
(
split_type
==
awk
)
{
char
*
ptr
=
RSTRING_PTR
(
str
);
char
*
eptr
=
RSTRING_END
(
str
);
char
*
bptr
=
ptr
;
int
skip
=
1
;
unsigned
int
c
;
end
=
beg
;
while
(
ptr
<
eptr
)
{
int
ai
=
mrb_gc_arena_save
(
mrb
);
c
=
(
unsigned
char
)
*
ptr
++
;
if
(
skip
)
{
if
(
ISSPACE
(
c
))
{
beg
=
ptr
-
bptr
;
}
else
{
end
=
ptr
-
bptr
;
skip
=
0
;
if
(
lim_p
&&
lim
<=
i
)
break
;
}
}
else
if
(
ISSPACE
(
c
))
{
mrb_ary_push
(
mrb
,
result
,
str_subseq
(
mrb
,
str
,
beg
,
end
-
beg
));
mrb_gc_arena_restore
(
mrb
,
ai
);
skip
=
1
;
beg
=
ptr
-
bptr
;
if
(
lim_p
)
++
i
;
}
else
{
end
=
ptr
-
bptr
;
}
}
}
else
if
(
split_type
==
string
)
{
char
*
ptr
=
RSTRING_PTR
(
str
);
// s->as.ary
char
*
temp
=
ptr
;
char
*
eptr
=
RSTRING_END
(
str
);
mrb_int
slen
=
RSTRING_LEN
(
spat
);
if
(
slen
==
0
)
{
int
ai
=
mrb_gc_arena_save
(
mrb
);
while
(
ptr
<
eptr
)
{
mrb_ary_push
(
mrb
,
result
,
str_subseq
(
mrb
,
str
,
ptr
-
temp
,
1
));
mrb_gc_arena_restore
(
mrb
,
ai
);
ptr
++
;
if
(
lim_p
&&
lim
<=
++
i
)
break
;
}
}
else
{
char
*
sptr
=
RSTRING_PTR
(
spat
);
int
ai
=
mrb_gc_arena_save
(
mrb
);
while
(
ptr
<
eptr
&&
(
end
=
mrb_memsearch
(
sptr
,
slen
,
ptr
,
eptr
-
ptr
))
>=
0
)
{
/* mrb_ary_push(mrb, result, str_subseq(mrb, str, ptr - temp, end)); */
mrb_ary_push
(
mrb
,
result
,
mrb_str_new
(
mrb
,
ptr
,
end
));
mrb_gc_arena_restore
(
mrb
,
ai
);
ptr
+=
end
+
slen
;
if
(
lim_p
&&
lim
<=
++
i
)
break
;
}
}
beg
=
ptr
-
temp
;
}
else
{
mrb_noregexp
(
mrb
,
str
);
}
if
(
RSTRING_LEN
(
str
)
>
0
&&
(
lim_p
||
RSTRING_LEN
(
str
)
>
beg
||
lim
<
0
))
{
if
(
RSTRING_LEN
(
str
)
==
beg
)
{
tmp
=
mrb_str_new_lit
(
mrb
,
""
);
}
else
{
tmp
=
mrb_str_new
(
mrb
,
RSTRING_PTR
(
str
)
+
beg
,
RSTRING_LEN
(
str
)
-
beg
);
}
mrb_ary_push
(
mrb
,
result
,
tmp
);
}
if
(
!
lim_p
&&
lim
==
0
)
{
mrb_int
len
;
while
((
len
=
RARRAY_LEN
(
result
))
>
0
&&
(
tmp
=
RARRAY_PTR
(
result
)[
len
-
1
],
RSTRING_LEN
(
tmp
)
==
0
))
mrb_ary_pop
(
mrb
,
result
);
}
return
result
;
}
static
mrb_value
mrb_str_chr
(
mrb_state
*
mrb
,
mrb_value
self
)
{
return
str_substr
(
mrb
,
self
,
0
,
1
);
}
static
mrb_value
mrb_str_chars
(
mrb_state
*
mrb
,
mrb_value
self
)
{
mrb_value
result
;
mrb_value
blk
;
int
ai
;
mrb_int
len
;
mrb_value
arg
;
char
*
p
=
RSTRING_PTR
(
self
);
char
*
e
=
p
+
RSTRING_LEN
(
self
);
mrb_get_args
(
mrb
,
"&"
,
&
blk
);
result
=
mrb_ary_new
(
mrb
);
if
(
!
mrb_nil_p
(
blk
))
{
while
(
p
<
e
)
{
len
=
utf8len
((
unsigned
char
*
)
p
);
arg
=
mrb_str_new
(
mrb
,
p
,
len
);
mrb_yield_argv
(
mrb
,
blk
,
1
,
&
arg
);
p
+=
len
;
}
return
self
;
}
while
(
p
<
e
)
{
ai
=
mrb_gc_arena_save
(
mrb
);
len
=
utf8len
((
unsigned
char
*
)
p
);
mrb_ary_push
(
mrb
,
result
,
mrb_str_new
(
mrb
,
p
,
len
));
mrb_gc_arena_restore
(
mrb
,
ai
);
p
+=
len
;
}
return
result
;
}
static
mrb_value
mrb_str_codepoints
(
mrb_state
*
mrb
,
mrb_value
self
)
{
mrb_value
result
;
mrb_value
blk
;
int
ai
;
mrb_int
len
;
mrb_value
arg
;
char
*
p
=
RSTRING_PTR
(
self
);
char
*
e
=
p
+
RSTRING_LEN
(
self
);
mrb_get_args
(
mrb
,
"&"
,
&
blk
);
result
=
mrb_ary_new
(
mrb
);
if
(
!
mrb_nil_p
(
blk
))
{
while
(
p
<
e
)
{
len
=
utf8len
((
unsigned
char
*
)
p
);
arg
=
mrb_fixnum_value
(
utf8code
((
unsigned
char
*
)
p
));
mrb_yield_argv
(
mrb
,
blk
,
1
,
&
arg
);
p
+=
len
;
}
return
self
;
}
while
(
p
<
e
)
{
ai
=
mrb_gc_arena_save
(
mrb
);
len
=
utf8len
((
unsigned
char
*
)
p
);
mrb_ary_push
(
mrb
,
result
,
mrb_fixnum_value
(
utf8code
((
unsigned
char
*
)
p
)));
mrb_gc_arena_restore
(
mrb
,
ai
);
p
+=
len
;
}
return
result
;
}
void
mrb_mruby_string_utf8_gem_init
(
mrb_state
*
mrb
)
{
struct
RClass
*
s
=
mrb
->
string_class
;
mrb_define_method
(
mrb
,
s
,
"size"
,
mrb_str_size
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"length"
,
mrb_str_size
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"index"
,
mrb_str_index_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"[]"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"ord"
,
mrb_str_ord
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"slice"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"split"
,
mrb_str_split_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"reverse"
,
mrb_str_reverse
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"reverse!"
,
mrb_str_reverse_bang
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"rindex"
,
mrb_str_rindex_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"chr"
,
mrb_str_chr
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"chars"
,
mrb_str_chars
,
MRB_ARGS_NONE
());
mrb_alias_method
(
mrb
,
s
,
mrb_intern_lit
(
mrb
,
"each_char"
),
mrb_intern_lit
(
mrb
,
"chars"
));
mrb_define_method
(
mrb
,
s
,
"codepoints"
,
mrb_str_codepoints
,
MRB_ARGS_NONE
());
mrb_alias_method
(
mrb
,
s
,
mrb_intern_lit
(
mrb
,
"each_codepoint"
),
mrb_intern_lit
(
mrb
,
"codepoints"
));
mrb_define_method
(
mrb
,
mrb
->
fixnum_class
,
"chr"
,
mrb_fixnum_chr
,
MRB_ARGS_NONE
());
}
void
mrb_mruby_string_utf8_gem_final
(
mrb_state
*
mrb
)
{
}
mrbgems/mruby-string-utf8/test/string.rb
deleted
100644 → 0
View file @
101ec5eb
# -*- coding: utf-8 -*-
##
# String(utf8) Test
assert
(
'String#[]'
)
do
assert_equal
"ち"
,
"こんにちは世界"
[
3
]
assert_equal
nil
,
"こんにちは世界"
[
20
]
assert_equal
"世"
,
"こんにちは世界"
[
-
2
]
assert_equal
"世界"
,
"こんにちは世界"
[
-
2
..-
1
]
assert_equal
"んに"
,
"こんにちは世界"
[
1
,
2
]
assert_equal
"世"
,
"こんにちは世界"
[
"世"
]
assert_equal
'b'
,
'abc'
[
1.1
]
end
assert
(
'String#reverse'
,
'15.2.10.5.29'
)
do
a
=
'こんにちは世界!'
a
.
reverse
assert_equal
'こんにちは世界!'
,
a
assert_equal
'!界世はちにんこ'
,
'こんにちは世界!'
.
reverse
end
assert
(
'String#reverse!'
,
'15.2.10.5.30'
)
do
a
=
'こんにちは世界!'
a
.
reverse!
assert_equal
'!界世はちにんこ'
,
a
assert_equal
'!界世はちにんこ'
,
'こんにちは世界!'
.
reverse!
end
assert
(
'Invalid sequence'
)
do
assert_equal
5
,
"
\xF8\x88\x80\x80\x80
"
.
size
assert_equal
6
,
"
\xFC\x84\x80\x80\x80\x80
"
.
size
end
assert
(
'String#size'
)
do
str
=
'こんにちは世界!'
assert_equal
8
,
str
.
size
assert_not_equal
str
.
bytesize
,
str
.
size
assert_equal
2
,
str
[
1
,
2
].
size
end
assert
(
'String#index'
)
do
str
=
"こんにちは世界!
\n
こんにちは世界!"
assert_nil
str
.
index
(
'さ'
)
assert_equal
3
,
str
.
index
(
'ち'
)
assert_equal
12
,
str
.
index
(
'ち'
,
10
)
assert_equal
nil
,
str
.
index
(
"さ"
)
end
assert
(
'String#ord'
)
do
got
=
"こんにちは世界!"
.
split
(
''
).
map
{
|
x
|
x
.
ord
}
expect
=
[
0x3053
,
0x3093
,
0x306b
,
0x3061
,
0x306f
,
0x4e16
,
0x754c
,
0x21
]
assert_equal
expect
,
got
end
assert
(
'String#split'
)
do
got
=
"こんにちは世界!"
.
split
(
''
)
assert_equal
[
'こ'
,
'ん'
,
'に'
,
'ち'
,
'は'
,
'世'
,
'界'
,
'!'
],
got
got
=
"こんにちは世界!"
.
split
(
'に'
)
assert_equal
[
'こん'
,
'ちは世界!'
],
got
end
assert
(
'String#rindex'
)
do
str
=
"こんにちは世界!
\n
こんにちは世界!"
assert_nil
str
.
index
(
'さ'
)
assert_equal
12
,
str
.
rindex
(
'ち'
)
assert_equal
3
,
str
.
rindex
(
'ち'
,
10
)
end
assert
(
'String#chr(utf-8)'
)
do
assert_equal
"こ"
,
"こんにちは世界!"
.
chr
end
assert
(
'String#chars'
)
do
expect
=
[
'こ'
,
'ん'
,
'に'
,
'ち'
,
'は'
,
'世'
,
'界'
,
'!'
]
assert_equal
expect
,
"こんにちは世界!"
.
chars
s
=
""
"こんにちは世界!"
.
chars
do
|
x
|
s
+=
x
end
assert_equal
"こんにちは世界!"
,
s
end
assert
(
'String#each_char'
)
do
expect
=
[
'こ'
,
'ん'
,
'に'
,
'ち'
,
'は'
,
'世'
,
'界'
,
'!'
]
s
=
""
"こんにちは世界!"
.
each_char
do
|
x
|
s
+=
x
end
assert_equal
"こんにちは世界!"
,
s
end
assert
(
'String#codepoints'
)
do
expect
=
[
12371
,
12435
,
12395
,
12385
,
12399
,
19990
,
30028
,
33
]
assert_equal
expect
,
"こんにちは世界!"
.
codepoints
cp
=
[]
"こんにちは世界!"
.
codepoints
do
|
x
|
cp
<<
x
end
assert_equal
expect
,
cp
end
assert
(
'String#each_codepoint'
)
do
expect
=
[
12371
,
12435
,
12395
,
12385
,
12399
,
19990
,
30028
,
33
]
cp
=
[]
"こんにちは世界!"
.
each_codepoint
do
|
x
|
cp
<<
x
end
assert_equal
expect
,
cp
end
src/string.c
View file @
798ec3af
...
@@ -16,8 +16,6 @@
...
@@ -16,8 +16,6 @@
#include "mruby/string.h"
#include "mruby/string.h"
#include "mruby/re.h"
#include "mruby/re.h"
const
char
mrb_digitmap
[]
=
"0123456789abcdefghijklmnopqrstuvwxyz"
;
typedef
struct
mrb_shared_string
{
typedef
struct
mrb_shared_string
{
mrb_bool
nofree
:
1
;
mrb_bool
nofree
:
1
;
int
refcnt
;
int
refcnt
;
...
@@ -25,198 +23,7 @@ typedef struct mrb_shared_string {
...
@@ -25,198 +23,7 @@ typedef struct mrb_shared_string {
mrb_int
len
;
mrb_int
len
;
}
mrb_shared_string
;
}
mrb_shared_string
;
static
mrb_value
str_replace
(
mrb_state
*
mrb
,
struct
RString
*
s1
,
struct
RString
*
s2
);
const
char
mrb_digitmap
[]
=
"0123456789abcdefghijklmnopqrstuvwxyz"
;
static
mrb_value
mrb_str_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
);
MRB_API
mrb_int
mrb_str_strlen
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
mrb_int
i
,
max
=
RSTR_LEN
(
s
);
char
*
p
=
RSTR_PTR
(
s
);
if
(
!
p
)
return
0
;
for
(
i
=
0
;
i
<
max
;
i
++
)
{
if
(
p
[
i
]
==
'\0'
)
{
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"string contains null byte"
);
}
}
return
max
;
}
#ifdef _WIN32
#include <windows.h>
char
*
mrb_utf8_from_locale
(
const
char
*
str
,
size_t
len
)
{
wchar_t
*
wcsp
;
char
*
mbsp
;
size_t
mbssize
,
wcssize
;
if
(
len
==
0
)
return
strdup
(
""
);
if
(
len
==
-
1
)
len
=
strlen
(
str
);
wcssize
=
MultiByteToWideChar
(
GetACP
(),
0
,
str
,
len
,
NULL
,
0
);
wcsp
=
(
wchar_t
*
)
malloc
((
wcssize
+
1
)
*
sizeof
(
wchar_t
));
if
(
!
wcsp
)
return
NULL
;
wcssize
=
MultiByteToWideChar
(
GetACP
(),
0
,
str
,
len
,
wcsp
,
wcssize
+
1
);
wcsp
[
wcssize
]
=
0
;
mbssize
=
WideCharToMultiByte
(
CP_UTF8
,
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
NULL
,
0
,
NULL
,
NULL
);
mbsp
=
(
char
*
)
malloc
((
mbssize
+
1
));
if
(
!
mbsp
)
{
free
(
wcsp
);
return
NULL
;
}
mbssize
=
WideCharToMultiByte
(
CP_UTF8
,
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
mbsp
,
mbssize
,
NULL
,
NULL
);
mbsp
[
mbssize
]
=
0
;
free
(
wcsp
);
return
mbsp
;
}
char
*
mrb_locale_from_utf8
(
const
char
*
utf8
,
size_t
len
)
{
wchar_t
*
wcsp
;
char
*
mbsp
;
size_t
mbssize
,
wcssize
;
if
(
len
==
0
)
return
strdup
(
""
);
if
(
len
==
-
1
)
len
=
strlen
(
utf8
);
wcssize
=
MultiByteToWideChar
(
CP_UTF8
,
0
,
utf8
,
len
,
NULL
,
0
);
wcsp
=
(
wchar_t
*
)
malloc
((
wcssize
+
1
)
*
sizeof
(
wchar_t
));
if
(
!
wcsp
)
return
NULL
;
wcssize
=
MultiByteToWideChar
(
CP_UTF8
,
0
,
utf8
,
len
,
wcsp
,
wcssize
+
1
);
wcsp
[
wcssize
]
=
0
;
mbssize
=
WideCharToMultiByte
(
GetACP
(),
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
NULL
,
0
,
NULL
,
NULL
);
mbsp
=
(
char
*
)
malloc
((
mbssize
+
1
));
if
(
!
mbsp
)
{
free
(
wcsp
);
return
NULL
;
}
mbssize
=
WideCharToMultiByte
(
GetACP
(),
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
mbsp
,
mbssize
,
NULL
,
NULL
);
mbsp
[
mbssize
]
=
0
;
free
(
wcsp
);
return
mbsp
;
}
#endif
static
inline
void
resize_capa
(
mrb_state
*
mrb
,
struct
RString
*
s
,
mrb_int
capacity
)
{
if
(
RSTR_EMBED_P
(
s
))
{
if
(
RSTRING_EMBED_LEN_MAX
<
capacity
)
{
char
*
const
tmp
=
(
char
*
)
mrb_malloc
(
mrb
,
capacity
+
1
);
const
mrb_int
len
=
RSTR_EMBED_LEN
(
s
);
memcpy
(
tmp
,
s
->
as
.
ary
,
len
);
RSTR_UNSET_EMBED_FLAG
(
s
);
s
->
as
.
heap
.
ptr
=
tmp
;
s
->
as
.
heap
.
len
=
len
;
s
->
as
.
heap
.
aux
.
capa
=
capacity
;
}
}
else
{
s
->
as
.
heap
.
ptr
=
(
char
*
)
mrb_realloc
(
mrb
,
RSTR_PTR
(
s
),
capacity
+
1
);
s
->
as
.
heap
.
aux
.
capa
=
capacity
;
}
}
static
void
str_decref
(
mrb_state
*
mrb
,
mrb_shared_string
*
shared
)
{
shared
->
refcnt
--
;
if
(
shared
->
refcnt
==
0
)
{
if
(
!
shared
->
nofree
)
{
mrb_free
(
mrb
,
shared
->
ptr
);
}
mrb_free
(
mrb
,
shared
);
}
}
static
void
check_frozen
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
if
(
RSTR_FROZEN_P
(
s
))
{
mrb_raise
(
mrb
,
E_RUNTIME_ERROR
,
"can't modify frozen string"
);
}
}
MRB_API
void
mrb_str_modify
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
check_frozen
(
mrb
,
s
);
if
(
RSTR_SHARED_P
(
s
))
{
mrb_shared_string
*
shared
=
s
->
as
.
heap
.
aux
.
shared
;
if
(
shared
->
refcnt
==
1
&&
s
->
as
.
heap
.
ptr
==
shared
->
ptr
)
{
s
->
as
.
heap
.
ptr
=
shared
->
ptr
;
s
->
as
.
heap
.
aux
.
capa
=
shared
->
len
;
RSTR_PTR
(
s
)[
s
->
as
.
heap
.
len
]
=
'\0'
;
mrb_free
(
mrb
,
shared
);
}
else
{
char
*
ptr
,
*
p
;
mrb_int
len
;
p
=
RSTR_PTR
(
s
);
len
=
s
->
as
.
heap
.
len
;
ptr
=
(
char
*
)
mrb_malloc
(
mrb
,
(
size_t
)
len
+
1
);
if
(
p
)
{
memcpy
(
ptr
,
p
,
len
);
}
ptr
[
len
]
=
'\0'
;
s
->
as
.
heap
.
ptr
=
ptr
;
s
->
as
.
heap
.
aux
.
capa
=
len
;
str_decref
(
mrb
,
shared
);
}
RSTR_UNSET_SHARED_FLAG
(
s
);
return
;
}
if
(
RSTR_NOFREE_P
(
s
))
{
char
*
p
=
s
->
as
.
heap
.
ptr
;
s
->
as
.
heap
.
ptr
=
(
char
*
)
mrb_malloc
(
mrb
,
(
size_t
)
s
->
as
.
heap
.
len
+
1
);
if
(
p
)
{
memcpy
(
RSTR_PTR
(
s
),
p
,
s
->
as
.
heap
.
len
);
}
RSTR_PTR
(
s
)[
s
->
as
.
heap
.
len
]
=
'\0'
;
s
->
as
.
heap
.
aux
.
capa
=
s
->
as
.
heap
.
len
;
RSTR_UNSET_NOFREE_FLAG
(
s
);
return
;
}
}
static
mrb_value
mrb_str_freeze
(
mrb_state
*
mrb
,
mrb_value
str
)
{
struct
RString
*
s
=
mrb_str_ptr
(
str
);
RSTR_SET_FROZEN_FLAG
(
s
);
return
str
;
}
MRB_API
mrb_value
mrb_str_resize
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
len
)
{
mrb_int
slen
;
struct
RString
*
s
=
mrb_str_ptr
(
str
);
mrb_str_modify
(
mrb
,
s
);
slen
=
RSTR_LEN
(
s
);
if
(
len
!=
slen
)
{
if
(
slen
<
len
||
slen
-
len
>
256
)
{
resize_capa
(
mrb
,
s
,
len
);
}
RSTR_SET_LEN
(
s
,
len
);
RSTR_PTR
(
s
)[
len
]
=
'\0'
;
/* sentinel */
}
return
str
;
}
#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
...
@@ -307,6 +114,26 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa)
...
@@ -307,6 +114,26 @@ mrb_str_buf_new(mrb_state *mrb, size_t capa)
return
mrb_obj_value
(
s
);
return
mrb_obj_value
(
s
);
}
}
static
inline
void
resize_capa
(
mrb_state
*
mrb
,
struct
RString
*
s
,
mrb_int
capacity
)
{
if
(
RSTR_EMBED_P
(
s
))
{
if
(
RSTRING_EMBED_LEN_MAX
<
capacity
)
{
char
*
const
tmp
=
(
char
*
)
mrb_malloc
(
mrb
,
capacity
+
1
);
const
mrb_int
len
=
RSTR_EMBED_LEN
(
s
);
memcpy
(
tmp
,
s
->
as
.
ary
,
len
);
RSTR_UNSET_EMBED_FLAG
(
s
);
s
->
as
.
heap
.
ptr
=
tmp
;
s
->
as
.
heap
.
len
=
len
;
s
->
as
.
heap
.
aux
.
capa
=
capacity
;
}
}
else
{
s
->
as
.
heap
.
ptr
=
(
char
*
)
mrb_realloc
(
mrb
,
RSTR_PTR
(
s
),
capacity
+
1
);
s
->
as
.
heap
.
aux
.
capa
=
capacity
;
}
}
static
void
static
void
str_buf_cat
(
mrb_state
*
mrb
,
struct
RString
*
s
,
const
char
*
ptr
,
size_t
len
)
str_buf_cat
(
mrb_state
*
mrb
,
struct
RString
*
s
,
const
char
*
ptr
,
size_t
len
)
{
{
...
@@ -386,6 +213,18 @@ mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
...
@@ -386,6 +213,18 @@ mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
return
mrb_obj_value
(
s
);
return
mrb_obj_value
(
s
);
}
}
static
void
str_decref
(
mrb_state
*
mrb
,
mrb_shared_string
*
shared
)
{
shared
->
refcnt
--
;
if
(
shared
->
refcnt
==
0
)
{
if
(
!
shared
->
nofree
)
{
mrb_free
(
mrb
,
shared
->
ptr
);
}
mrb_free
(
mrb
,
shared
);
}
}
void
void
mrb_gc_free_str
(
mrb_state
*
mrb
,
struct
RString
*
str
)
mrb_gc_free_str
(
mrb_state
*
mrb
,
struct
RString
*
str
)
{
{
...
@@ -397,31 +236,136 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str)
...
@@ -397,31 +236,136 @@ mrb_gc_free_str(mrb_state *mrb, struct RString *str)
mrb_free
(
mrb
,
str
->
as
.
heap
.
ptr
);
mrb_free
(
mrb
,
str
->
as
.
heap
.
ptr
);
}
}
MRB_API
char
*
#ifdef MRB_UTF8_STRING
mrb_str_to_cstr
(
mrb_state
*
mrb
,
mrb_value
str0
)
static
const
char
utf8len_codepage
[
256
]
=
{
{
struct
RString
*
s
;
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
};
if
(
!
mrb_string_p
(
str0
))
{
static
mrb_int
mrb_raise
(
mrb
,
E_TYPE_ERROR
,
"expected String"
);
utf8len
(
unsigned
char
*
p
)
{
mrb_int
len
;
mrb_int
i
;
if
(
*
p
==
0
)
return
1
;
len
=
utf8len_codepage
[
*
p
];
for
(
i
=
1
;
i
<
len
;
++
i
)
if
((
p
[
i
]
&
0xc0
)
!=
0x80
)
return
1
;
return
len
;
}
static
mrb_int
utf8_strlen
(
mrb_value
str
,
mrb_int
len
)
{
mrb_int
total
=
0
;
unsigned
char
*
p
=
(
unsigned
char
*
)
RSTRING_PTR
(
str
);
unsigned
char
*
e
=
p
;
e
+=
len
<
0
?
RSTRING_LEN
(
str
)
:
len
;
while
(
p
<
e
)
{
p
+=
utf8len
(
p
);
total
++
;
}
}
return
total
;
}
s
=
str_new
(
mrb
,
RSTRING_PTR
(
str0
),
RSTRING_LEN
(
str0
));
#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
if
((
strlen
(
RSTR_PTR
(
s
))
^
RSTR_LEN
(
s
))
!=
0
)
{
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"string contains null byte"
);
/* map character index to byte offset index */
static
mrb_int
chars2bytes
(
char
*
p
,
mrb_int
idx
)
{
mrb_int
i
,
b
,
n
;
for
(
b
=
i
=
0
;
i
<
idx
;
i
++
)
{
n
=
utf8len
((
unsigned
char
*
)
p
);
b
+=
n
;
p
+=
n
;
}
}
return
RSTR_PTR
(
s
)
;
return
b
;
}
}
static
void
/* map byte offset to character index */
str_make_shared
(
mrb_state
*
mrb
,
struct
RString
*
s
)
static
mrb_int
bytes2chars
(
char
*
p
,
mrb_int
bi
)
{
{
if
(
!
RSTR_SHARED_P
(
s
))
{
mrb_int
i
,
b
,
n
;
mrb_shared_string
*
shared
=
(
mrb_shared_string
*
)
mrb_malloc
(
mrb
,
sizeof
(
mrb_shared_string
));
shared
->
refcnt
=
1
;
for
(
b
=
i
=
0
;
b
<
bi
;
i
++
)
{
if
(
RSTR_EMBED_P
(
s
))
{
n
=
utf8len
((
unsigned
char
*
)
p
);
const
mrb_int
len
=
RSTR_EMBED_LEN
(
s
);
b
+=
n
;
p
+=
n
;
}
return
i
;
}
#else
#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
#define chars2bytes(p, ci) (ci)
#define bytes2chars(p, bi) (bi)
#endif
static
inline
mrb_int
mrb_memsearch_qs
(
const
unsigned
char
*
xs
,
mrb_int
m
,
const
unsigned
char
*
ys
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
xs
,
*
xe
=
xs
+
m
;
const
unsigned
char
*
y
=
ys
;
int
i
,
qstable
[
256
];
/* Preprocessing */
for
(
i
=
0
;
i
<
256
;
++
i
)
qstable
[
i
]
=
m
+
1
;
for
(;
x
<
xe
;
++
x
)
qstable
[
*
x
]
=
xe
-
x
;
/* Searching */
for
(;
y
+
m
<=
ys
+
n
;
y
+=
*
(
qstable
+
y
[
m
]))
{
if
(
*
xs
==
*
y
&&
memcmp
(
xs
,
y
,
m
)
==
0
)
return
y
-
ys
;
}
return
-
1
;
}
static
mrb_int
mrb_memsearch
(
const
void
*
x0
,
mrb_int
m
,
const
void
*
y0
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
(
const
unsigned
char
*
)
x0
,
*
y
=
(
const
unsigned
char
*
)
y0
;
if
(
m
>
n
)
return
-
1
;
else
if
(
m
==
n
)
{
return
memcmp
(
x0
,
y0
,
m
)
==
0
?
0
:
-
1
;
}
else
if
(
m
<
1
)
{
return
0
;
}
else
if
(
m
==
1
)
{
const
unsigned
char
*
ys
=
y
,
*
ye
=
ys
+
n
;
for
(;
y
<
ye
;
++
y
)
{
if
(
*
x
==
*
y
)
return
y
-
ys
;
}
return
-
1
;
}
return
mrb_memsearch_qs
((
const
unsigned
char
*
)
x0
,
m
,
(
const
unsigned
char
*
)
y0
,
n
);
}
static
void
str_make_shared
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
if
(
!
RSTR_SHARED_P
(
s
))
{
mrb_shared_string
*
shared
=
(
mrb_shared_string
*
)
mrb_malloc
(
mrb
,
sizeof
(
mrb_shared_string
));
shared
->
refcnt
=
1
;
if
(
RSTR_EMBED_P
(
s
))
{
const
mrb_int
len
=
RSTR_EMBED_LEN
(
s
);
char
*
const
tmp
=
(
char
*
)
mrb_malloc
(
mrb
,
len
+
1
);
char
*
const
tmp
=
(
char
*
)
mrb_malloc
(
mrb
,
len
+
1
);
memcpy
(
tmp
,
s
->
as
.
ary
,
len
);
memcpy
(
tmp
,
s
->
as
.
ary
,
len
);
tmp
[
len
]
=
'\0'
;
tmp
[
len
]
=
'\0'
;
...
@@ -445,10 +389,343 @@ str_make_shared(mrb_state *mrb, struct RString *s)
...
@@ -445,10 +389,343 @@ str_make_shared(mrb_state *mrb, struct RString *s)
shared
->
ptr
=
s
->
as
.
heap
.
ptr
;
shared
->
ptr
=
s
->
as
.
heap
.
ptr
;
}
}
}
}
shared
->
len
=
s
->
as
.
heap
.
len
;
shared
->
len
=
s
->
as
.
heap
.
len
;
s
->
as
.
heap
.
aux
.
shared
=
shared
;
s
->
as
.
heap
.
aux
.
shared
=
shared
;
RSTR_SET_SHARED_FLAG
(
s
);
RSTR_SET_SHARED_FLAG
(
s
);
}
}
static
mrb_value
byte_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
struct
RString
*
orig
,
*
s
;
mrb_shared_string
*
shared
;
orig
=
mrb_str_ptr
(
str
);
if
(
RSTR_EMBED_P
(
orig
))
{
s
=
str_new
(
mrb
,
orig
->
as
.
ary
+
beg
,
len
);
}
else
{
str_make_shared
(
mrb
,
orig
);
shared
=
orig
->
as
.
heap
.
aux
.
shared
;
s
=
mrb_obj_alloc_string
(
mrb
);
s
->
as
.
heap
.
ptr
=
orig
->
as
.
heap
.
ptr
+
beg
;
s
->
as
.
heap
.
len
=
len
;
s
->
as
.
heap
.
aux
.
shared
=
shared
;
RSTR_SET_SHARED_FLAG
(
s
);
shared
->
refcnt
++
;
}
return
mrb_obj_value
(
s
);
}
#ifdef MRB_UTF8_STRING
static
inline
mrb_value
str_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
beg
=
chars2bytes
(
RSTRING_PTR
(
str
),
beg
);
len
=
chars2bytes
(
RSTRING_PTR
(
str
)
+
beg
,
len
);
return
byte_subseq
(
mrb
,
str
,
beg
,
len
);
}
#else
#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
#endif
static
mrb_value
str_substr
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
mrb_int
clen
=
RSTRING_CHAR_LEN
(
str
);
if
(
len
<
0
)
return
mrb_nil_value
();
if
(
clen
==
0
)
{
len
=
0
;
}
else
if
(
beg
<
0
)
{
beg
=
clen
+
beg
;
}
if
(
beg
>
clen
)
return
mrb_nil_value
();
if
(
beg
<
0
)
{
beg
+=
clen
;
if
(
beg
<
0
)
return
mrb_nil_value
();
}
if
(
beg
+
len
>
clen
)
len
=
clen
-
beg
;
if
(
len
<=
0
)
{
len
=
0
;
}
return
str_subseq
(
mrb
,
str
,
beg
,
len
);
}
static
mrb_int
str_index
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
offset
)
{
mrb_int
pos
;
char
*
s
,
*
sptr
;
mrb_int
len
,
slen
;
len
=
RSTRING_LEN
(
str
);
slen
=
RSTRING_LEN
(
sub
);
if
(
offset
<
0
)
{
offset
+=
len
;
if
(
offset
<
0
)
return
-
1
;
}
if
(
len
-
offset
<
slen
)
return
-
1
;
s
=
RSTRING_PTR
(
str
);
if
(
offset
)
{
s
+=
offset
;
}
if
(
slen
==
0
)
return
offset
;
/* need proceed one character at a time */
sptr
=
RSTRING_PTR
(
sub
);
slen
=
RSTRING_LEN
(
sub
);
len
=
RSTRING_LEN
(
str
)
-
offset
;
pos
=
mrb_memsearch
(
sptr
,
slen
,
s
,
len
);
if
(
pos
<
0
)
return
pos
;
return
pos
+
offset
;
}
static
void
check_frozen
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
if
(
RSTR_FROZEN_P
(
s
))
{
mrb_raise
(
mrb
,
E_RUNTIME_ERROR
,
"can't modify frozen string"
);
}
}
static
mrb_value
str_replace
(
mrb_state
*
mrb
,
struct
RString
*
s1
,
struct
RString
*
s2
)
{
long
len
;
check_frozen
(
mrb
,
s1
);
len
=
RSTR_LEN
(
s2
);
if
(
RSTR_SHARED_P
(
s1
))
{
str_decref
(
mrb
,
s1
->
as
.
heap
.
aux
.
shared
);
}
else
if
(
!
RSTR_EMBED_P
(
s1
)
&&
!
RSTR_NOFREE_P
(
s1
))
{
mrb_free
(
mrb
,
s1
->
as
.
heap
.
ptr
);
}
RSTR_UNSET_NOFREE_FLAG
(
s1
);
if
(
RSTR_SHARED_P
(
s2
))
{
L_SHARE:
RSTR_UNSET_EMBED_FLAG
(
s1
);
s1
->
as
.
heap
.
ptr
=
s2
->
as
.
heap
.
ptr
;
s1
->
as
.
heap
.
len
=
len
;
s1
->
as
.
heap
.
aux
.
shared
=
s2
->
as
.
heap
.
aux
.
shared
;
RSTR_SET_SHARED_FLAG
(
s1
);
s1
->
as
.
heap
.
aux
.
shared
->
refcnt
++
;
}
else
{
if
(
len
<=
RSTRING_EMBED_LEN_MAX
)
{
RSTR_UNSET_SHARED_FLAG
(
s1
);
RSTR_SET_EMBED_FLAG
(
s1
);
memcpy
(
s1
->
as
.
ary
,
RSTR_PTR
(
s2
),
len
);
RSTR_SET_EMBED_LEN
(
s1
,
len
);
}
else
{
str_make_shared
(
mrb
,
s2
);
goto
L_SHARE
;
}
}
return
mrb_obj_value
(
s1
);
}
static
mrb_int
str_rindex
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
pos
)
{
char
*
s
,
*
sbeg
,
*
t
;
struct
RString
*
ps
=
mrb_str_ptr
(
str
);
mrb_int
len
=
RSTRING_LEN
(
sub
);
/* substring longer than string */
if
(
RSTR_LEN
(
ps
)
<
len
)
return
-
1
;
if
(
RSTR_LEN
(
ps
)
-
pos
<
len
)
{
pos
=
RSTR_LEN
(
ps
)
-
len
;
}
sbeg
=
RSTR_PTR
(
ps
);
s
=
RSTR_PTR
(
ps
)
+
pos
;
t
=
RSTRING_PTR
(
sub
);
if
(
len
)
{
while
(
sbeg
<=
s
)
{
if
(
memcmp
(
s
,
t
,
len
)
==
0
)
{
return
s
-
RSTR_PTR
(
ps
);
}
s
--
;
}
return
-
1
;
}
else
{
return
pos
;
}
}
MRB_API
mrb_int
mrb_str_strlen
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
mrb_int
i
,
max
=
RSTR_LEN
(
s
);
char
*
p
=
RSTR_PTR
(
s
);
if
(
!
p
)
return
0
;
for
(
i
=
0
;
i
<
max
;
i
++
)
{
if
(
p
[
i
]
==
'\0'
)
{
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"string contains null byte"
);
}
}
return
max
;
}
#ifdef _WIN32
#include <windows.h>
char
*
mrb_utf8_from_locale
(
const
char
*
str
,
size_t
len
)
{
wchar_t
*
wcsp
;
char
*
mbsp
;
size_t
mbssize
,
wcssize
;
if
(
len
==
0
)
return
strdup
(
""
);
if
(
len
==
-
1
)
len
=
strlen
(
str
);
wcssize
=
MultiByteToWideChar
(
GetACP
(),
0
,
str
,
len
,
NULL
,
0
);
wcsp
=
(
wchar_t
*
)
malloc
((
wcssize
+
1
)
*
sizeof
(
wchar_t
));
if
(
!
wcsp
)
return
NULL
;
wcssize
=
MultiByteToWideChar
(
GetACP
(),
0
,
str
,
len
,
wcsp
,
wcssize
+
1
);
wcsp
[
wcssize
]
=
0
;
mbssize
=
WideCharToMultiByte
(
CP_UTF8
,
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
NULL
,
0
,
NULL
,
NULL
);
mbsp
=
(
char
*
)
malloc
((
mbssize
+
1
));
if
(
!
mbsp
)
{
free
(
wcsp
);
return
NULL
;
}
mbssize
=
WideCharToMultiByte
(
CP_UTF8
,
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
mbsp
,
mbssize
,
NULL
,
NULL
);
mbsp
[
mbssize
]
=
0
;
free
(
wcsp
);
return
mbsp
;
}
char
*
mrb_locale_from_utf8
(
const
char
*
utf8
,
size_t
len
)
{
wchar_t
*
wcsp
;
char
*
mbsp
;
size_t
mbssize
,
wcssize
;
if
(
len
==
0
)
return
strdup
(
""
);
if
(
len
==
-
1
)
len
=
strlen
(
utf8
);
wcssize
=
MultiByteToWideChar
(
CP_UTF8
,
0
,
utf8
,
len
,
NULL
,
0
);
wcsp
=
(
wchar_t
*
)
malloc
((
wcssize
+
1
)
*
sizeof
(
wchar_t
));
if
(
!
wcsp
)
return
NULL
;
wcssize
=
MultiByteToWideChar
(
CP_UTF8
,
0
,
utf8
,
len
,
wcsp
,
wcssize
+
1
);
wcsp
[
wcssize
]
=
0
;
mbssize
=
WideCharToMultiByte
(
GetACP
(),
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
NULL
,
0
,
NULL
,
NULL
);
mbsp
=
(
char
*
)
malloc
((
mbssize
+
1
));
if
(
!
mbsp
)
{
free
(
wcsp
);
return
NULL
;
}
mbssize
=
WideCharToMultiByte
(
GetACP
(),
0
,
(
LPCWSTR
)
wcsp
,
-
1
,
mbsp
,
mbssize
,
NULL
,
NULL
);
mbsp
[
mbssize
]
=
0
;
free
(
wcsp
);
return
mbsp
;
}
#endif
MRB_API
void
mrb_str_modify
(
mrb_state
*
mrb
,
struct
RString
*
s
)
{
check_frozen
(
mrb
,
s
);
if
(
RSTR_SHARED_P
(
s
))
{
mrb_shared_string
*
shared
=
s
->
as
.
heap
.
aux
.
shared
;
if
(
shared
->
refcnt
==
1
&&
s
->
as
.
heap
.
ptr
==
shared
->
ptr
)
{
s
->
as
.
heap
.
ptr
=
shared
->
ptr
;
s
->
as
.
heap
.
aux
.
capa
=
shared
->
len
;
RSTR_PTR
(
s
)[
s
->
as
.
heap
.
len
]
=
'\0'
;
mrb_free
(
mrb
,
shared
);
}
else
{
char
*
ptr
,
*
p
;
mrb_int
len
;
p
=
RSTR_PTR
(
s
);
len
=
s
->
as
.
heap
.
len
;
ptr
=
(
char
*
)
mrb_malloc
(
mrb
,
(
size_t
)
len
+
1
);
if
(
p
)
{
memcpy
(
ptr
,
p
,
len
);
}
ptr
[
len
]
=
'\0'
;
s
->
as
.
heap
.
ptr
=
ptr
;
s
->
as
.
heap
.
aux
.
capa
=
len
;
str_decref
(
mrb
,
shared
);
}
RSTR_UNSET_SHARED_FLAG
(
s
);
return
;
}
if
(
RSTR_NOFREE_P
(
s
))
{
char
*
p
=
s
->
as
.
heap
.
ptr
;
s
->
as
.
heap
.
ptr
=
(
char
*
)
mrb_malloc
(
mrb
,
(
size_t
)
s
->
as
.
heap
.
len
+
1
);
if
(
p
)
{
memcpy
(
RSTR_PTR
(
s
),
p
,
s
->
as
.
heap
.
len
);
}
RSTR_PTR
(
s
)[
s
->
as
.
heap
.
len
]
=
'\0'
;
s
->
as
.
heap
.
aux
.
capa
=
s
->
as
.
heap
.
len
;
RSTR_UNSET_NOFREE_FLAG
(
s
);
return
;
}
}
static
mrb_value
mrb_str_freeze
(
mrb_state
*
mrb
,
mrb_value
str
)
{
struct
RString
*
s
=
mrb_str_ptr
(
str
);
RSTR_SET_FROZEN_FLAG
(
s
);
return
str
;
}
MRB_API
mrb_value
mrb_str_resize
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
len
)
{
mrb_int
slen
;
struct
RString
*
s
=
mrb_str_ptr
(
str
);
mrb_str_modify
(
mrb
,
s
);
slen
=
RSTR_LEN
(
s
);
if
(
len
!=
slen
)
{
if
(
slen
<
len
||
slen
-
len
>
256
)
{
resize_capa
(
mrb
,
s
,
len
);
}
RSTR_SET_LEN
(
s
,
len
);
RSTR_PTR
(
s
)[
len
]
=
'\0'
;
/* sentinel */
}
return
str
;
}
MRB_API
char
*
mrb_str_to_cstr
(
mrb_state
*
mrb
,
mrb_value
str0
)
{
struct
RString
*
s
;
if
(
!
mrb_string_p
(
str0
))
{
mrb_raise
(
mrb
,
E_TYPE_ERROR
,
"expected String"
);
}
s
=
str_new
(
mrb
,
RSTRING_PTR
(
str0
),
RSTRING_LEN
(
str0
));
if
((
strlen
(
RSTR_PTR
(
s
))
^
RSTR_LEN
(
s
))
!=
0
)
{
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"string contains null byte"
);
}
}
return
RSTR_PTR
(
s
);
}
}
/*
/*
...
@@ -519,15 +796,22 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self)
...
@@ -519,15 +796,22 @@ mrb_str_plus_m(mrb_state *mrb, mrb_value self)
/* 15.2.10.5.33 */
/* 15.2.10.5.33 */
/*
/*
* call-seq:
* call-seq:
*
len = strlen(String("abcd"))
*
"abcd".size => int
*
*
* Returns the length of string.
* Returns the length of string.
*/
*/
static
mrb_value
static
mrb_value
mrb_str_size
(
mrb_state
*
mrb
,
mrb_value
self
)
mrb_str_size
(
mrb_state
*
mrb
,
mrb_value
self
)
{
{
struct
RString
*
s
=
mrb_str_ptr
(
self
);
mrb_int
len
=
RSTRING_CHAR_LEN
(
self
);
return
mrb_fixnum_value
(
RSTR_LEN
(
s
));
return
mrb_fixnum_value
(
len
);
}
static
mrb_value
mrb_str_bytesize
(
mrb_state
*
mrb
,
mrb_value
self
)
{
mrb_int
len
=
RSTRING_LEN
(
self
);
return
mrb_fixnum_value
(
len
);
}
}
/* 15.2.10.5.1 */
/* 15.2.10.5.1 */
...
@@ -742,77 +1026,6 @@ mrb_regexp_check(mrb_state *mrb, mrb_value obj)
...
@@ -742,77 +1026,6 @@ mrb_regexp_check(mrb_state *mrb, mrb_value obj)
}
}
}
}
static
inline
mrb_int
mrb_memsearch_qs
(
const
unsigned
char
*
xs
,
mrb_int
m
,
const
unsigned
char
*
ys
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
xs
,
*
xe
=
xs
+
m
;
const
unsigned
char
*
y
=
ys
;
int
i
,
qstable
[
256
];
/* Preprocessing */
for
(
i
=
0
;
i
<
256
;
++
i
)
qstable
[
i
]
=
m
+
1
;
for
(;
x
<
xe
;
++
x
)
qstable
[
*
x
]
=
xe
-
x
;
/* Searching */
for
(;
y
+
m
<=
ys
+
n
;
y
+=
*
(
qstable
+
y
[
m
]))
{
if
(
*
xs
==
*
y
&&
memcmp
(
xs
,
y
,
m
)
==
0
)
return
y
-
ys
;
}
return
-
1
;
}
static
mrb_int
mrb_memsearch
(
const
void
*
x0
,
mrb_int
m
,
const
void
*
y0
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
(
const
unsigned
char
*
)
x0
,
*
y
=
(
const
unsigned
char
*
)
y0
;
if
(
m
>
n
)
return
-
1
;
else
if
(
m
==
n
)
{
return
memcmp
(
x0
,
y0
,
m
)
==
0
?
0
:
-
1
;
}
else
if
(
m
<
1
)
{
return
0
;
}
else
if
(
m
==
1
)
{
const
unsigned
char
*
ys
=
y
,
*
ye
=
ys
+
n
;
for
(;
y
<
ye
;
++
y
)
{
if
(
*
x
==
*
y
)
return
y
-
ys
;
}
return
-
1
;
}
return
mrb_memsearch_qs
((
const
unsigned
char
*
)
x0
,
m
,
(
const
unsigned
char
*
)
y0
,
n
);
}
static
mrb_int
mrb_str_index
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
offset
)
{
mrb_int
pos
;
char
*
s
,
*
sptr
;
mrb_int
len
,
slen
;
len
=
RSTRING_LEN
(
str
);
slen
=
RSTRING_LEN
(
sub
);
if
(
offset
<
0
)
{
offset
+=
len
;
if
(
offset
<
0
)
return
-
1
;
}
if
(
len
-
offset
<
slen
)
return
-
1
;
s
=
RSTRING_PTR
(
str
);
if
(
offset
)
{
s
+=
offset
;
}
if
(
slen
==
0
)
return
offset
;
/* need proceed one character at a time */
sptr
=
RSTRING_PTR
(
sub
);
slen
=
RSTRING_LEN
(
sub
);
len
=
RSTRING_LEN
(
str
)
-
offset
;
pos
=
mrb_memsearch
(
sptr
,
slen
,
s
,
len
);
if
(
pos
<
0
)
return
pos
;
return
pos
+
offset
;
}
MRB_API
mrb_value
MRB_API
mrb_value
mrb_str_dup
(
mrb_state
*
mrb
,
mrb_value
str
)
mrb_str_dup
(
mrb_state
*
mrb
,
mrb_value
str
)
{
{
...
@@ -834,12 +1047,12 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
...
@@ -834,12 +1047,12 @@ mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
idx
=
mrb_fixnum
(
indx
);
idx
=
mrb_fixnum
(
indx
);
num_index:
num_index:
str
=
mrb_
str_substr
(
mrb
,
str
,
idx
,
1
);
str
=
str_substr
(
mrb
,
str
,
idx
,
1
);
if
(
!
mrb_nil_p
(
str
)
&&
RSTRING_LEN
(
str
)
==
0
)
return
mrb_nil_value
();
if
(
!
mrb_nil_p
(
str
)
&&
RSTRING_LEN
(
str
)
==
0
)
return
mrb_nil_value
();
return
str
;
return
str
;
case
MRB_TT_STRING
:
case
MRB_TT_STRING
:
if
(
mrb_
str_index
(
mrb
,
str
,
indx
,
0
)
!=
-
1
)
if
(
str_index
(
mrb
,
str
,
indx
,
0
)
!=
-
1
)
return
mrb_str_dup
(
mrb
,
indx
);
return
mrb_str_dup
(
mrb
,
indx
);
return
mrb_nil_value
();
return
mrb_nil_value
();
...
@@ -848,9 +1061,9 @@ num_index:
...
@@ -848,9 +1061,9 @@ num_index:
{
{
mrb_int
beg
,
len
;
mrb_int
beg
,
len
;
len
=
RSTRING_LEN
(
str
);
len
=
RSTRING_
CHAR_
LEN
(
str
);
if
(
mrb_range_beg_len
(
mrb
,
indx
,
&
beg
,
&
len
,
len
))
{
if
(
mrb_range_beg_len
(
mrb
,
indx
,
&
beg
,
&
len
,
len
))
{
return
mrb_
str_subseq
(
mrb
,
str
,
beg
,
len
);
return
str_subseq
(
mrb
,
str
,
beg
,
len
);
}
}
else
{
else
{
return
mrb_nil_value
();
return
mrb_nil_value
();
...
@@ -917,7 +1130,7 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
...
@@ -917,7 +1130,7 @@ mrb_str_aref_m(mrb_state *mrb, mrb_value str)
argc
=
mrb_get_args
(
mrb
,
"o|o"
,
&
a1
,
&
a2
);
argc
=
mrb_get_args
(
mrb
,
"o|o"
,
&
a1
,
&
a2
);
if
(
argc
==
2
)
{
if
(
argc
==
2
)
{
mrb_regexp_check
(
mrb
,
a1
);
mrb_regexp_check
(
mrb
,
a1
);
return
mrb_
str_substr
(
mrb
,
str
,
mrb_fixnum
(
a1
),
mrb_fixnum
(
a2
));
return
str_substr
(
mrb
,
str
,
mrb_fixnum
(
a1
),
mrb_fixnum
(
a2
));
}
}
if
(
argc
!=
1
)
{
if
(
argc
!=
1
)
{
mrb_raisef
(
mrb
,
E_ARGUMENT_ERROR
,
"wrong number of arguments (%S for 1)"
,
mrb_fixnum_value
(
argc
));
mrb_raisef
(
mrb
,
E_ARGUMENT_ERROR
,
"wrong number of arguments (%S for 1)"
,
mrb_fixnum_value
(
argc
));
...
@@ -987,7 +1200,7 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self)
...
@@ -987,7 +1200,7 @@ mrb_str_capitalize(mrb_state *mrb, mrb_value self)
/* 15.2.10.5.10 */
/* 15.2.10.5.10 */
/*
/*
* call-seq:
* call-seq:
* str.chomp!(separator=
$/
) => str or nil
* str.chomp!(separator=
"\n"
) => str or nil
*
*
* Modifies <i>str</i> in place as described for <code>String#chomp</code>,
* Modifies <i>str</i> in place as described for <code>String#chomp</code>,
* returning <i>str</i>, or <code>nil</code> if no modifications were made.
* returning <i>str</i>, or <code>nil</code> if no modifications were made.
...
@@ -1061,7 +1274,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
...
@@ -1061,7 +1274,7 @@ mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
/* 15.2.10.5.9 */
/* 15.2.10.5.9 */
/*
/*
* call-seq:
* call-seq:
* str.chomp(separator=
$/
) => new_str
* str.chomp(separator=
"\n"
) => new_str
*
*
* Returns a new <code>String</code> with the given record separator removed
* Returns a new <code>String</code> with the given record separator removed
* from the end of <i>str</i> (if present). If <code>$/</code> has not been
* from the end of <i>str</i> (if present). If <code>$/</code> has not been
...
@@ -1232,47 +1445,10 @@ mrb_str_eql(mrb_state *mrb, mrb_value self)
...
@@ -1232,47 +1445,10 @@ mrb_str_eql(mrb_state *mrb, mrb_value self)
return
mrb_bool_value
(
eql_p
);
return
mrb_bool_value
(
eql_p
);
}
}
static
mrb_value
mrb_str_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
struct
RString
*
orig
,
*
s
;
mrb_shared_string
*
shared
;
orig
=
mrb_str_ptr
(
str
);
if
(
RSTR_EMBED_P
(
orig
))
{
s
=
str_new
(
mrb
,
orig
->
as
.
ary
+
beg
,
len
);
}
else
{
str_make_shared
(
mrb
,
orig
);
shared
=
orig
->
as
.
heap
.
aux
.
shared
;
s
=
mrb_obj_alloc_string
(
mrb
);
s
->
as
.
heap
.
ptr
=
orig
->
as
.
heap
.
ptr
+
beg
;
s
->
as
.
heap
.
len
=
len
;
s
->
as
.
heap
.
aux
.
shared
=
shared
;
RSTR_SET_SHARED_FLAG
(
s
);
shared
->
refcnt
++
;
}
return
mrb_obj_value
(
s
);
}
MRB_API
mrb_value
MRB_API
mrb_value
mrb_str_substr
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
mrb_str_substr
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
{
if
(
len
<
0
)
return
mrb_nil_value
();
return
str_substr
(
mrb
,
str
,
beg
,
len
);
if
(
!
RSTRING_LEN
(
str
))
{
len
=
0
;
}
if
(
beg
>
RSTRING_LEN
(
str
))
return
mrb_nil_value
();
if
(
beg
<
0
)
{
beg
+=
RSTRING_LEN
(
str
);
if
(
beg
<
0
)
return
mrb_nil_value
();
}
if
(
beg
+
len
>
RSTRING_LEN
(
str
))
len
=
RSTRING_LEN
(
str
)
-
beg
;
if
(
len
<=
0
)
{
len
=
0
;
}
return
mrb_str_subseq
(
mrb
,
str
,
beg
,
len
);
}
}
mrb_int
mrb_int
...
@@ -1331,7 +1507,7 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
...
@@ -1331,7 +1507,7 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
}
}
else
{
else
{
str2
=
mrb_str_to_str
(
mrb
,
str2
);
str2
=
mrb_str_to_str
(
mrb
,
str2
);
i
=
mrb_
str_index
(
mrb
,
self
,
str2
,
0
);
i
=
str_index
(
mrb
,
self
,
str2
,
0
);
include_p
=
(
i
!=
-
1
);
include_p
=
(
i
!=
-
1
);
}
}
...
@@ -1361,12 +1537,12 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
...
@@ -1361,12 +1537,12 @@ mrb_str_include(mrb_state *mrb, mrb_value self)
* "hello".index(/[aeiou]/, -3) #=> 4
* "hello".index(/[aeiou]/, -3) #=> 4
*/
*/
static
mrb_value
static
mrb_value
mrb_str_index
_m
(
mrb_state
*
mrb
,
mrb_value
str
)
mrb_str_index
(
mrb_state
*
mrb
,
mrb_value
str
)
{
{
mrb_value
*
argv
;
mrb_value
*
argv
;
mrb_int
argc
;
mrb_int
argc
;
mrb_value
sub
;
mrb_value
sub
;
mrb_int
pos
;
mrb_int
pos
,
clen
;
mrb_get_args
(
mrb
,
"*"
,
&
argv
,
&
argc
);
mrb_get_args
(
mrb
,
"*"
,
&
argv
,
&
argc
);
if
(
argc
==
2
)
{
if
(
argc
==
2
)
{
...
@@ -1381,12 +1557,15 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
...
@@ -1381,12 +1557,15 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
sub
=
mrb_nil_value
();
sub
=
mrb_nil_value
();
}
}
mrb_regexp_check
(
mrb
,
sub
);
mrb_regexp_check
(
mrb
,
sub
);
clen
=
RSTRING_CHAR_LEN
(
str
);
if
(
pos
<
0
)
{
if
(
pos
<
0
)
{
pos
+=
RSTRING_LEN
(
str
)
;
pos
+=
clen
;
if
(
pos
<
0
)
{
if
(
pos
<
0
)
{
return
mrb_nil_value
();
return
mrb_nil_value
();
}
}
}
}
if
(
pos
>=
clen
)
return
mrb_nil_value
();
pos
=
chars2bytes
(
RSTRING_PTR
(
str
),
pos
);
switch
(
mrb_type
(
sub
))
{
switch
(
mrb_type
(
sub
))
{
default:
{
default:
{
...
@@ -1400,57 +1579,17 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
...
@@ -1400,57 +1579,17 @@ mrb_str_index_m(mrb_state *mrb, mrb_value str)
}
}
/* fall through */
/* fall through */
case
MRB_TT_STRING
:
case
MRB_TT_STRING
:
pos
=
mrb_
str_index
(
mrb
,
str
,
sub
,
pos
);
pos
=
str_index
(
mrb
,
str
,
sub
,
pos
);
break
;
break
;
}
}
if
(
pos
==
-
1
)
return
mrb_nil_value
();
if
(
pos
==
-
1
)
return
mrb_nil_value
();
pos
=
bytes2chars
(
RSTRING_PTR
(
str
),
pos
);
return
mrb_fixnum_value
(
pos
);
return
mrb_fixnum_value
(
pos
);
}
}
#define STR_REPLACE_SHARED_MIN 10
#define STR_REPLACE_SHARED_MIN 10
static
mrb_value
str_replace
(
mrb_state
*
mrb
,
struct
RString
*
s1
,
struct
RString
*
s2
)
{
long
len
;
check_frozen
(
mrb
,
s1
);
len
=
RSTR_LEN
(
s2
);
if
(
RSTR_SHARED_P
(
s1
))
{
str_decref
(
mrb
,
s1
->
as
.
heap
.
aux
.
shared
);
}
else
if
(
!
RSTR_EMBED_P
(
s1
)
&&
!
RSTR_NOFREE_P
(
s1
))
{
mrb_free
(
mrb
,
s1
->
as
.
heap
.
ptr
);
}
RSTR_UNSET_NOFREE_FLAG
(
s1
);
if
(
RSTR_SHARED_P
(
s2
))
{
L_SHARE:
RSTR_UNSET_EMBED_FLAG
(
s1
);
s1
->
as
.
heap
.
ptr
=
s2
->
as
.
heap
.
ptr
;
s1
->
as
.
heap
.
len
=
len
;
s1
->
as
.
heap
.
aux
.
shared
=
s2
->
as
.
heap
.
aux
.
shared
;
RSTR_SET_SHARED_FLAG
(
s1
);
s1
->
as
.
heap
.
aux
.
shared
->
refcnt
++
;
}
else
{
if
(
len
<=
RSTRING_EMBED_LEN_MAX
)
{
RSTR_UNSET_SHARED_FLAG
(
s1
);
RSTR_SET_EMBED_FLAG
(
s1
);
memcpy
(
s1
->
as
.
ary
,
RSTR_PTR
(
s2
),
len
);
RSTR_SET_EMBED_LEN
(
s1
,
len
);
}
else
{
str_make_shared
(
mrb
,
s2
);
goto
L_SHARE
;
}
}
return
mrb_obj_value
(
s1
);
}
/* 15.2.10.5.24 */
/* 15.2.10.5.24 */
/* 15.2.10.5.28 */
/* 15.2.10.5.28 */
/*
/*
...
@@ -1570,107 +1709,81 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str)
...
@@ -1570,107 +1709,81 @@ mrb_check_string_type(mrb_state *mrb, mrb_value str)
return
mrb_check_convert_type
(
mrb
,
str
,
MRB_TT_STRING
,
"String"
,
"to_str"
);
return
mrb_check_convert_type
(
mrb
,
str
,
MRB_TT_STRING
,
"String"
,
"to_str"
);
}
}
/* ---------------------------------- */
/* 15.2.10.5.30 */
/* 15.2.10.5.29 */
/*
/*
* call-seq:
* call-seq:
* str.reverse => new_str
* str.reverse! => str
*
* Returns a new string with the characters from <i>str</i> in reverse order.
*
*
*
"stressed".reverse #=> "desserts"
*
Reverses <i>str</i> in place.
*/
*/
static
mrb_value
static
mrb_value
mrb_str_reverse
(
mrb_state
*
mrb
,
mrb_value
str
)
mrb_str_reverse
_bang
(
mrb_state
*
mrb
,
mrb_value
str
)
{
{
struct
RString
*
s2
;
#ifdef MRB_UTF8_STRING
char
*
s
,
*
e
,
*
p
;
mrb_int
utf8_len
=
RSTRING_CHAR_LEN
(
str
);
mrb_int
len
=
RSTRING_LEN
(
str
);
if
(
utf8_len
==
len
)
goto
bytes
;
if
(
utf8_len
>
1
)
{
char
*
buf
;
char
*
p
,
*
e
,
*
r
;
if
(
RSTRING_LEN
(
str
)
<=
1
)
return
mrb_str_dup
(
mrb
,
str
);
mrb_str_modify
(
mrb
,
mrb_str_ptr
(
str
));
len
=
RSTRING_LEN
(
str
);
buf
=
mrb_malloc
(
mrb
,
(
size_t
)
len
);
p
=
buf
;
e
=
buf
+
len
;
s2
=
str_new
(
mrb
,
0
,
RSTRING_LEN
(
str
));
memcpy
(
buf
,
RSTRING_PTR
(
str
),
len
);
str_with_class
(
mrb
,
s2
,
str
);
r
=
RSTRING_PTR
(
str
)
+
len
;
s
=
RSTRING_PTR
(
str
);
e
=
RSTRING_END
(
str
)
-
1
;
p
=
RSTR_PTR
(
s2
);
while
(
e
>=
s
)
{
while
(
p
<
e
)
{
*
p
++
=
*
e
--
;
mrb_int
clen
=
utf8len
((
unsigned
char
*
)
p
);
r
-=
clen
;
memcpy
(
r
,
p
,
clen
);
p
+=
clen
;
}
mrb_free
(
mrb
,
buf
);
}
}
return
mrb_obj_value
(
s2
);
return
str
;
}
/* 15.2.10.5.30 */
bytes:
/*
#endif
* call-seq:
{
* str.reverse! => str
struct
RString
*
s
=
mrb_str_ptr
(
str
);
*
char
*
p
,
*
e
;
* Reverses <i>str</i> in place.
char
c
;
*/
static
mrb_value
mrb_str_reverse_bang
(
mrb_state
*
mrb
,
mrb_value
str
)
{
struct
RString
*
s
=
mrb_str_ptr
(
str
);
char
*
p
,
*
e
;
char
c
;
mrb_str_modify
(
mrb
,
s
);
mrb_str_modify
(
mrb
,
s
);
if
(
RSTR_LEN
(
s
)
>
1
)
{
if
(
RSTR_LEN
(
s
)
>
1
)
{
p
=
RSTR_PTR
(
s
);
p
=
RSTR_PTR
(
s
);
e
=
p
+
RSTR_LEN
(
s
)
-
1
;
e
=
p
+
RSTR_LEN
(
s
)
-
1
;
while
(
p
<
e
)
{
while
(
p
<
e
)
{
c
=
*
p
;
c
=
*
p
;
*
p
++
=
*
e
;
*
p
++
=
*
e
;
*
e
--
=
c
;
*
e
--
=
c
;
}
}
}
return
str
;
}
}
return
str
;
}
}
/* ---------------------------------- */
/* 15.2.10.5.29 */
/*
/*
* call-seq:
* call-seq:
* str.rindex(substring [, fixnum]) => fixnum or nil
* str.reverse => new_str
* str.rindex(fixnum [, fixnum]) => fixnum or nil
* str.rindex(regexp [, fixnum]) => fixnum or nil
*
*
* Returns the index of the last occurrence of the given <i>substring</i>,
* Returns a new string with the characters from <i>str</i> in reverse order.
* character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
* <code>nil</code> if not found. If the second parameter is present, it
* specifies the position in the string to end the search---characters beyond
* this point will not be considered.
*
*
* "hello".rindex('e') #=> 1
* "stressed".reverse #=> "desserts"
* "hello".rindex('l') #=> 3
* "hello".rindex('a') #=> nil
* "hello".rindex(101) #=> 1
* "hello".rindex(/[aeiou]/, -2) #=> 1
*/
*/
static
mrb_
int
static
mrb_
value
mrb_str_r
index
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
pos
)
mrb_str_r
everse
(
mrb_state
*
mrb
,
mrb_value
str
)
{
{
char
*
s
,
*
sbeg
,
*
t
;
mrb_value
str2
=
mrb_str_dup
(
mrb
,
str
);
struct
RString
*
ps
=
mrb_str_ptr
(
str
);
mrb_str_reverse_bang
(
mrb
,
str2
);
mrb_int
len
=
RSTRING_LEN
(
sub
);
return
str2
;
/* substring longer than string */
if
(
RSTR_LEN
(
ps
)
<
len
)
return
-
1
;
if
(
RSTR_LEN
(
ps
)
-
pos
<
len
)
{
pos
=
RSTR_LEN
(
ps
)
-
len
;
}
sbeg
=
RSTR_PTR
(
ps
);
s
=
RSTR_PTR
(
ps
)
+
pos
;
t
=
RSTRING_PTR
(
sub
);
if
(
len
)
{
while
(
sbeg
<=
s
)
{
if
(
memcmp
(
s
,
t
,
len
)
==
0
)
{
return
s
-
RSTR_PTR
(
ps
);
}
s
--
;
}
return
-
1
;
}
else
{
return
pos
;
}
}
}
/* 15.2.10.5.31 */
/* 15.2.10.5.31 */
...
@@ -1693,13 +1806,13 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
...
@@ -1693,13 +1806,13 @@ mrb_str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
* "hello".rindex(/[aeiou]/, -2) #=> 1
* "hello".rindex(/[aeiou]/, -2) #=> 1
*/
*/
static
mrb_value
static
mrb_value
mrb_str_rindex
_m
(
mrb_state
*
mrb
,
mrb_value
str
)
mrb_str_rindex
(
mrb_state
*
mrb
,
mrb_value
str
)
{
{
mrb_value
*
argv
;
mrb_value
*
argv
;
mrb_int
argc
;
mrb_int
argc
;
mrb_value
sub
;
mrb_value
sub
;
mrb_value
vpos
;
mrb_value
vpos
;
mrb_int
pos
,
len
=
RSTRING_LEN
(
str
);
mrb_int
pos
,
len
=
RSTRING_
CHAR_
LEN
(
str
);
mrb_get_args
(
mrb
,
"*"
,
&
argv
,
&
argc
);
mrb_get_args
(
mrb
,
"*"
,
&
argv
,
&
argc
);
if
(
argc
==
2
)
{
if
(
argc
==
2
)
{
...
@@ -1722,6 +1835,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
...
@@ -1722,6 +1835,8 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
else
else
sub
=
mrb_nil_value
();
sub
=
mrb_nil_value
();
}
}
pos
=
chars2bytes
(
RSTRING_PTR
(
str
),
pos
);
len
=
chars2bytes
(
RSTRING_PTR
(
str
)
+
pos
,
len
);
mrb_regexp_check
(
mrb
,
sub
);
mrb_regexp_check
(
mrb
,
sub
);
switch
(
mrb_type
(
sub
))
{
switch
(
mrb_type
(
sub
))
{
...
@@ -1736,8 +1851,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
...
@@ -1736,8 +1851,11 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
}
}
/* fall through */
/* fall through */
case
MRB_TT_STRING
:
case
MRB_TT_STRING
:
pos
=
mrb_str_rindex
(
mrb
,
str
,
sub
,
pos
);
pos
=
str_rindex
(
mrb
,
str
,
sub
,
pos
);
if
(
pos
>=
0
)
return
mrb_fixnum_value
(
pos
);
if
(
pos
>=
0
)
{
pos
=
bytes2chars
(
RSTRING_PTR
(
str
),
pos
);
return
mrb_fixnum_value
(
pos
);
}
break
;
break
;
}
/* end of switch (TYPE(sub)) */
}
/* end of switch (TYPE(sub)) */
...
@@ -1748,7 +1866,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
...
@@ -1748,7 +1866,7 @@ mrb_str_rindex_m(mrb_state *mrb, mrb_value str)
/*
/*
* call-seq:
* call-seq:
* str.split(pattern=
$;
, [limit]) => anArray
* str.split(pattern=
"\n"
, [limit]) => anArray
*
*
* Divides <i>str</i> into substrings based on a delimiter, returning an array
* Divides <i>str</i> into substrings based on a delimiter, returning an array
* of these substrings.
* of these substrings.
...
@@ -1846,7 +1964,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
...
@@ -1846,7 +1964,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
}
}
}
}
else
if
(
ISSPACE
(
c
))
{
else
if
(
ISSPACE
(
c
))
{
mrb_ary_push
(
mrb
,
result
,
mrb_str
_subseq
(
mrb
,
str
,
beg
,
end
-
beg
));
mrb_ary_push
(
mrb
,
result
,
byte
_subseq
(
mrb
,
str
,
beg
,
end
-
beg
));
mrb_gc_arena_restore
(
mrb
,
ai
);
mrb_gc_arena_restore
(
mrb
,
ai
);
skip
=
TRUE
;
skip
=
TRUE
;
beg
=
idx
;
beg
=
idx
;
...
@@ -1868,9 +1986,9 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
...
@@ -1868,9 +1986,9 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
end
=
mrb_memsearch
(
RSTRING_PTR
(
spat
),
pat_len
,
RSTRING_PTR
(
str
)
+
idx
,
str_len
-
idx
);
end
=
mrb_memsearch
(
RSTRING_PTR
(
spat
),
pat_len
,
RSTRING_PTR
(
str
)
+
idx
,
str_len
-
idx
);
if
(
end
<
0
)
break
;
if
(
end
<
0
)
break
;
}
else
{
}
else
{
end
=
1
;
end
=
chars2bytes
(
RSTRING_PTR
(
str
)
+
idx
,
1
)
;
}
}
mrb_ary_push
(
mrb
,
result
,
mrb_str
_subseq
(
mrb
,
str
,
idx
,
end
));
mrb_ary_push
(
mrb
,
result
,
byte
_subseq
(
mrb
,
str
,
idx
,
end
));
mrb_gc_arena_restore
(
mrb
,
ai
);
mrb_gc_arena_restore
(
mrb
,
ai
);
idx
+=
end
+
pat_len
;
idx
+=
end
+
pat_len
;
if
(
lim_p
&&
lim
<=
++
i
)
break
;
if
(
lim_p
&&
lim
<=
++
i
)
break
;
...
@@ -1885,7 +2003,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
...
@@ -1885,7 +2003,7 @@ mrb_str_split_m(mrb_state *mrb, mrb_value str)
tmp
=
mrb_str_new_empty
(
mrb
,
str
);
tmp
=
mrb_str_new_empty
(
mrb
,
str
);
}
}
else
{
else
{
tmp
=
mrb_str
_subseq
(
mrb
,
str
,
beg
,
RSTRING_LEN
(
str
)
-
beg
);
tmp
=
byte
_subseq
(
mrb
,
str
,
beg
,
RSTRING_LEN
(
str
)
-
beg
);
}
}
mrb_ary_push
(
mrb
,
result
,
tmp
);
mrb_ary_push
(
mrb
,
result
,
tmp
);
}
}
...
@@ -2533,7 +2651,7 @@ mrb_init_string(mrb_state *mrb)
...
@@ -2533,7 +2651,7 @@ mrb_init_string(mrb_state *mrb)
s
=
mrb
->
string_class
=
mrb_define_class
(
mrb
,
"String"
,
mrb
->
object_class
);
/* 15.2.10 */
s
=
mrb
->
string_class
=
mrb_define_class
(
mrb
,
"String"
,
mrb
->
object_class
);
/* 15.2.10 */
MRB_SET_INSTANCE_TT
(
s
,
MRB_TT_STRING
);
MRB_SET_INSTANCE_TT
(
s
,
MRB_TT_STRING
);
mrb_define_method
(
mrb
,
s
,
"bytesize"
,
mrb_str_
size
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"bytesize"
,
mrb_str_
bytesize
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"<=>"
,
mrb_str_cmp_m
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.1 */
mrb_define_method
(
mrb
,
s
,
"<=>"
,
mrb_str_cmp_m
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.1 */
mrb_define_method
(
mrb
,
s
,
"=="
,
mrb_str_equal_m
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.2 */
mrb_define_method
(
mrb
,
s
,
"=="
,
mrb_str_equal_m
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.2 */
...
@@ -2553,7 +2671,7 @@ mrb_init_string(mrb_state *mrb)
...
@@ -2553,7 +2671,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method
(
mrb
,
s
,
"hash"
,
mrb_str_hash_m
,
MRB_ARGS_NONE
());
/* 15.2.10.5.20 */
mrb_define_method
(
mrb
,
s
,
"hash"
,
mrb_str_hash_m
,
MRB_ARGS_NONE
());
/* 15.2.10.5.20 */
mrb_define_method
(
mrb
,
s
,
"include?"
,
mrb_str_include
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.21 */
mrb_define_method
(
mrb
,
s
,
"include?"
,
mrb_str_include
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.21 */
mrb_define_method
(
mrb
,
s
,
"index"
,
mrb_str_index
_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.22 */
mrb_define_method
(
mrb
,
s
,
"index"
,
mrb_str_index
,
MRB_ARGS_ANY
());
/* 15.2.10.5.22 */
mrb_define_method
(
mrb
,
s
,
"initialize"
,
mrb_str_init
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.23 */
mrb_define_method
(
mrb
,
s
,
"initialize"
,
mrb_str_init
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.23 */
mrb_define_method
(
mrb
,
s
,
"initialize_copy"
,
mrb_str_replace
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.24 */
mrb_define_method
(
mrb
,
s
,
"initialize_copy"
,
mrb_str_replace
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.24 */
mrb_define_method
(
mrb
,
s
,
"intern"
,
mrb_str_intern
,
MRB_ARGS_NONE
());
/* 15.2.10.5.25 */
mrb_define_method
(
mrb
,
s
,
"intern"
,
mrb_str_intern
,
MRB_ARGS_NONE
());
/* 15.2.10.5.25 */
...
@@ -2561,7 +2679,7 @@ mrb_init_string(mrb_state *mrb)
...
@@ -2561,7 +2679,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method
(
mrb
,
s
,
"replace"
,
mrb_str_replace
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.28 */
mrb_define_method
(
mrb
,
s
,
"replace"
,
mrb_str_replace
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.28 */
mrb_define_method
(
mrb
,
s
,
"reverse"
,
mrb_str_reverse
,
MRB_ARGS_NONE
());
/* 15.2.10.5.29 */
mrb_define_method
(
mrb
,
s
,
"reverse"
,
mrb_str_reverse
,
MRB_ARGS_NONE
());
/* 15.2.10.5.29 */
mrb_define_method
(
mrb
,
s
,
"reverse!"
,
mrb_str_reverse_bang
,
MRB_ARGS_NONE
());
/* 15.2.10.5.30 */
mrb_define_method
(
mrb
,
s
,
"reverse!"
,
mrb_str_reverse_bang
,
MRB_ARGS_NONE
());
/* 15.2.10.5.30 */
mrb_define_method
(
mrb
,
s
,
"rindex"
,
mrb_str_rindex
_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.31 */
mrb_define_method
(
mrb
,
s
,
"rindex"
,
mrb_str_rindex
,
MRB_ARGS_ANY
());
/* 15.2.10.5.31 */
mrb_define_method
(
mrb
,
s
,
"size"
,
mrb_str_size
,
MRB_ARGS_NONE
());
/* 15.2.10.5.33 */
mrb_define_method
(
mrb
,
s
,
"size"
,
mrb_str_size
,
MRB_ARGS_NONE
());
/* 15.2.10.5.33 */
mrb_define_method
(
mrb
,
s
,
"slice"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.34 */
mrb_define_method
(
mrb
,
s
,
"slice"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.34 */
mrb_define_method
(
mrb
,
s
,
"split"
,
mrb_str_split_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.35 */
mrb_define_method
(
mrb
,
s
,
"split"
,
mrb_str_split_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.35 */
...
...
test/t/string.rb
View file @
798ec3af
##
##
# String ISO Test
# String ISO Test
UTF8STRING
=
(
"
\343\201\202
"
.
size
==
1
)
assert
(
'String'
,
'15.2.10'
)
do
assert
(
'String'
,
'15.2.10'
)
do
assert_equal
Class
,
String
.
class
assert_equal
Class
,
String
.
class
end
end
...
@@ -60,23 +62,32 @@ assert('String#[]', '15.2.10.5.6') do
...
@@ -60,23 +62,32 @@ assert('String#[]', '15.2.10.5.6') do
a3
=
'abc'
[
'bc'
]
a3
=
'abc'
[
'bc'
]
b3
=
'abc'
[
'XX'
]
b3
=
'abc'
[
'XX'
]
assert_equal
'a'
,
a
assert_equal
'a'
,
'a'
assert_equal
'c'
,
b
# assert_equal 'c', b
assert_nil
c
# assert_nil c
assert_nil
d
# assert_nil d
assert_equal
'b'
,
e
# assert_equal 'b', e
assert_nil
a1
# assert_nil a1
assert_nil
b1
# assert_nil b1
assert_nil
c1
# assert_nil c1
assert_equal
''
,
d1
# assert_equal '', d1
assert_equal
'bc'
,
e1
# assert_equal 'bc', e1
assert_equal
'bc'
,
a3
# assert_equal 'bc', a3
assert_nil
b3
# assert_nil b3
assert_raise
(
TypeError
)
do
# assert_raise(TypeError) do
a
[
nil
]
# a[nil]
end
# end
end
end
assert
(
'String#[](UTF-8)'
,
'15.2.10.5.6'
)
do
assert_equal
"ち"
,
"こんにちは世界"
[
3
]
assert_equal
nil
,
"こんにちは世界"
[
20
]
assert_equal
"世"
,
"こんにちは世界"
[
-
2
]
assert_equal
"世界"
,
"こんにちは世界"
[
-
2
..-
1
]
assert_equal
"んに"
,
"こんにちは世界"
[
1
,
2
]
assert_equal
"世"
,
"こんにちは世界"
[
"世"
]
end
if
UTF8STRING
assert
(
'String#[] with Range'
)
do
assert
(
'String#[] with Range'
)
do
a1
=
'abc'
[
1
..
0
]
a1
=
'abc'
[
1
..
0
]
...
@@ -411,6 +422,15 @@ assert('String#reverse', '15.2.10.5.29') do
...
@@ -411,6 +422,15 @@ assert('String#reverse', '15.2.10.5.29') do
assert_equal
'cba'
,
'abc'
.
reverse
assert_equal
'cba'
,
'abc'
.
reverse
end
end
assert
(
'String#reverse(UTF-8)'
,
'15.2.10.5.29'
)
do
assert_equal
"ち"
,
"こんにちは世界"
[
3
]
assert_equal
nil
,
"こんにちは世界"
[
20
]
assert_equal
"世"
,
"こんにちは世界"
[
-
2
]
assert_equal
"世界"
,
"こんにちは世界"
[
-
2
..-
1
]
assert_equal
"んに"
,
"こんにちは世界"
[
1
,
2
]
assert_equal
"世"
,
"こんにちは世界"
[
"世"
]
end
if
UTF8STRING
assert
(
'String#reverse!'
,
'15.2.10.5.30'
)
do
assert
(
'String#reverse!'
,
'15.2.10.5.30'
)
do
a
=
'abc'
a
=
'abc'
a
.
reverse!
a
.
reverse!
...
@@ -419,6 +439,14 @@ assert('String#reverse!', '15.2.10.5.30') do
...
@@ -419,6 +439,14 @@ assert('String#reverse!', '15.2.10.5.30') do
assert_equal
'cba'
,
'abc'
.
reverse!
assert_equal
'cba'
,
'abc'
.
reverse!
end
end
assert
(
'String#reverse!(UTF-8)'
,
'15.2.10.5.30'
)
do
a
=
'こんにちは世界!'
a
.
reverse!
assert_equal
'!界世はちにんこ'
,
a
assert_equal
'!界世はちにんこ'
,
'こんにちは世界!'
.
reverse!
end
if
UTF8STRING
assert
(
'String#rindex'
,
'15.2.10.5.31'
)
do
assert
(
'String#rindex'
,
'15.2.10.5.31'
)
do
assert_equal
0
,
'abc'
.
rindex
(
'a'
)
assert_equal
0
,
'abc'
.
rindex
(
'a'
)
assert_nil
'abc'
.
rindex
(
'd'
)
assert_nil
'abc'
.
rindex
(
'd'
)
...
@@ -426,12 +454,27 @@ assert('String#rindex', '15.2.10.5.31') do
...
@@ -426,12 +454,27 @@ assert('String#rindex', '15.2.10.5.31') do
assert_equal
3
,
'abcabc'
.
rindex
(
'a'
,
4
)
assert_equal
3
,
'abcabc'
.
rindex
(
'a'
,
4
)
end
end
assert
(
'String#rindex(UTF-8)'
,
'15.2.10.5.31'
)
do
str
=
"こんにちは世界!
\n
こんにちは世界!"
assert_nil
str
.
index
(
'さ'
)
assert_equal
3
,
str
.
index
(
'ち'
)
assert_equal
12
,
str
.
index
(
'ち'
,
10
)
assert_equal
nil
,
str
.
index
(
"さ"
)
end
if
UTF8STRING
# 'String#scan', '15.2.10.5.32' will be tested in mrbgems.
# 'String#scan', '15.2.10.5.32' will be tested in mrbgems.
assert
(
'String#size'
,
'15.2.10.5.33'
)
do
assert
(
'String#size'
,
'15.2.10.5.33'
)
do
assert_equal
3
,
'abc'
.
size
assert_equal
3
,
'abc'
.
size
end
end
assert
(
'String#size(UTF-8)'
,
'15.2.10.5.33'
)
do
str
=
'こんにちは世界!'
assert_equal
8
,
str
.
size
assert_not_equal
str
.
bytesize
,
str
.
size
assert_equal
2
,
str
[
1
,
2
].
size
end
if
UTF8STRING
assert
(
'String#slice'
,
'15.2.10.5.34'
)
do
assert
(
'String#slice'
,
'15.2.10.5.34'
)
do
# length of args is 1
# length of args is 1
a
=
'abc'
.
slice
(
0
)
a
=
'abc'
.
slice
(
0
)
...
@@ -479,6 +522,13 @@ assert('String#split', '15.2.10.5.35') do
...
@@ -479,6 +522,13 @@ assert('String#split', '15.2.10.5.35') do
assert_equal
[
'a'
,
'b'
,
'c'
],
'abc'
.
split
(
""
)
assert_equal
[
'a'
,
'b'
,
'c'
],
'abc'
.
split
(
""
)
end
end
assert
(
'String#split(UTF-8)'
,
'15.2.10.5.35'
)
do
got
=
"こんにちは世界!"
.
split
(
''
)
assert_equal
[
'こ'
,
'ん'
,
'に'
,
'ち'
,
'は'
,
'世'
,
'界'
,
'!'
],
got
got
=
"こんにちは世界!"
.
split
(
'に'
)
assert_equal
[
'こん'
,
'ちは世界!'
],
got
end
if
UTF8STRING
assert
(
'String#sub'
,
'15.2.10.5.36'
)
do
assert
(
'String#sub'
,
'15.2.10.5.36'
)
do
assert_equal
'aBcabc'
,
'abcabc'
.
sub
(
'b'
,
'B'
)
assert_equal
'aBcabc'
,
'abcabc'
.
sub
(
'b'
,
'B'
)
assert_equal
'aBcabc'
,
'abcabc'
.
sub
(
'b'
)
{
|
w
|
w
.
capitalize
}
assert_equal
'aBcabc'
,
'abcabc'
.
sub
(
'b'
)
{
|
w
|
w
.
capitalize
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment