Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mruby
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Libraries
mruby
Commits
ff5ec824
Unverified
Commit
ff5ec824
authored
Jun 29, 2019
by
Yukihiro "Matz" Matsumoto
Committed by
GitHub
Jun 29, 2019
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #4541 from dearblue/replace-string-aset
Replace `String#[]=` method by C implements
parents
4f4e2400
0ad1cacf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
236 additions
and
96 deletions
+236
-96
mrblib/string.rb
mrblib/string.rb
+0
-54
src/string.c
src/string.c
+186
-42
test/t/string.rb
test/t/string.rb
+50
-0
No files found.
mrblib/string.rb
View file @
ff5ec824
...
...
@@ -177,60 +177,6 @@ class String
self
end
##
# Modify +self+ by replacing the content of +self+.
# The portion of the string affected is determined using the same criteria as +String#[]+.
def
[]=
(
*
args
)
anum
=
args
.
size
if
anum
==
2
pos
,
value
=
args
[
0
],
args
[
1
].
__to_str
case
pos
when
String
posnum
=
self
.
index
(
pos
)
if
posnum
b
=
self
[
0
,
posnum
]
a
=
self
[(
posnum
+
pos
.
length
)
..-
1
]
self
.
replace
([
b
,
value
,
a
].
join
(
''
))
else
raise
IndexError
,
"string not matched"
end
when
Range
head
=
pos
.
begin
tail
=
pos
.
end
tail
+=
self
.
length
if
tail
<
0
unless
pos
.
exclude_end?
tail
+=
1
end
return
self
[
head
,
tail
-
head
]
=
value
else
pos
=
pos
.
__to_int
pos
+=
self
.
length
if
pos
<
0
if
pos
<
0
||
pos
>
self
.
length
raise
IndexError
,
"index
#{
args
[
0
]
}
out of string"
end
b
=
self
[
0
,
pos
]
a
=
self
[
pos
+
1
..-
1
]
self
.
replace
([
b
,
value
,
a
].
join
(
''
))
end
return
value
elsif
anum
==
3
pos
,
len
,
value
=
args
[
0
].
__to_int
,
args
[
1
].
__to_int
,
args
[
2
].
__to_str
pos
+=
self
.
length
if
pos
<
0
if
pos
<
0
||
pos
>
self
.
length
raise
IndexError
,
"index
#{
args
[
0
]
}
out of string"
end
if
len
<
0
raise
IndexError
,
"negative length
#{
len
}
"
end
b
=
self
[
0
,
pos
]
a
=
self
[
pos
+
len
..-
1
]
self
.
replace
([
b
,
value
,
a
].
join
(
''
))
return
value
else
raise
ArgumentError
,
"wrong number of arguments (
#{
anum
}
for 2..3)"
end
end
# those two methods requires Regexp that is optional in mruby
##
# ISO 15.2.10.5.3
...
...
src/string.c
View file @
ff5ec824
...
...
@@ -427,13 +427,18 @@ mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
}
return
mrb_obj_value
(
s
);
}
static
void
str_range_to_bytes
(
mrb_value
str
,
mrb_int
*
pos
,
mrb_int
*
len
)
{
*
pos
=
chars2bytes
(
str
,
0
,
*
pos
);
*
len
=
chars2bytes
(
str
,
*
pos
,
*
len
);
}
#ifdef MRB_UTF8_STRING
static
inline
mrb_value
str_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
beg
=
chars2bytes
(
str
,
0
,
beg
);
len
=
chars2bytes
(
str
,
beg
,
len
);
str_range_to_bytes
(
str
,
&
beg
,
&
len
);
return
mrb_str_byte_subseq
(
mrb
,
str
,
beg
,
len
);
}
#else
...
...
@@ -1010,51 +1015,91 @@ mrb_str_dup(mrb_state *mrb, mrb_value str)
return
str_replace
(
mrb
,
dup
,
s
);
}
static
mrb_value
mrb_str_aref
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
indx
)
{
mrb_int
idx
;
enum
str_convert_range
{
/* `beg` and `len` are byte unit in `0 ... str.bytesize` */
STR_BYTE_RANGE_CORRECTED
=
1
,
switch
(
mrb_type
(
indx
))
{
case
MRB_TT_FIXNUM
:
idx
=
mrb_fixnum
(
indx
);
/* `beg` and `len` are char unit in any range */
STR_CHAR_RANGE
=
2
,
num_index:
str
=
str_substr
(
mrb
,
str
,
idx
,
1
);
if
(
!
mrb_nil_p
(
str
)
&&
RSTRING_LEN
(
str
)
==
0
)
return
mrb_nil_value
();
return
str
;
/* `beg` and `len` are char unit in `0 ... str.size` */
STR_CHAR_RANGE_CORRECTED
=
3
,
case
MRB_TT_STRING
:
if
(
str_index_str
(
mrb
,
str
,
indx
,
0
)
!=
-
1
)
return
mrb_str_dup
(
mrb
,
indx
);
return
mrb_nil_value
();
/* `beg` is out of range */
STR_OUT_OF_RANGE
=
-
1
};
static
enum
str_convert_range
str_convert_range
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
indx
,
mrb_value
alen
,
mrb_int
*
beg
,
mrb_int
*
len
)
{
if
(
!
mrb_undef_p
(
alen
))
{
*
beg
=
mrb_int
(
mrb
,
indx
);
*
len
=
mrb_int
(
mrb
,
alen
);
return
STR_CHAR_RANGE
;
}
else
{
switch
(
mrb_type
(
indx
))
{
case
MRB_TT_FIXNUM
:
*
beg
=
mrb_fixnum
(
indx
);
*
len
=
1
;
return
STR_CHAR_RANGE
;
case
MRB_TT_RANGE
:
goto
range_arg
;
case
MRB_TT_STRING
:
*
beg
=
str_index_str
(
mrb
,
str
,
indx
,
0
);
if
(
*
beg
<
0
)
{
break
;
}
*
len
=
RSTRING_LEN
(
indx
);
return
STR_BYTE_RANGE_CORRECTED
;
default:
indx
=
mrb_Integer
(
mrb
,
indx
);
if
(
mrb_nil_p
(
indx
))
{
range_arg:
{
mrb_int
beg
,
len
;
len
=
RSTRING_CHAR_LEN
(
str
);
switch
(
mrb_range_beg_len
(
mrb
,
indx
,
&
beg
,
&
len
,
len
,
TRUE
))
{
case
MRB_TT_RANGE
:
goto
range_arg
;
default:
indx
=
mrb_to_int
(
mrb
,
indx
);
if
(
mrb_fixnum_p
(
indx
))
{
*
beg
=
mrb_fixnum
(
indx
);
*
len
=
1
;
return
STR_CHAR_RANGE
;
}
range_arg:
*
len
=
RSTRING_CHAR_LEN
(
str
);
switch
(
mrb_range_beg_len
(
mrb
,
indx
,
beg
,
len
,
*
len
,
TRUE
))
{
case
MRB_RANGE_OK
:
return
str_subseq
(
mrb
,
str
,
beg
,
len
)
;
return
STR_CHAR_RANGE_CORRECTED
;
case
MRB_RANGE_OUT
:
return
mrb_nil_value
()
;
return
STR_OUT_OF_RANGE
;
default:
break
;
}
}
mrb_raise
(
mrb
,
E_TYPE_ERROR
,
"can't convert to Fixnum"
);
}
}
return
STR_OUT_OF_RANGE
;
}
static
mrb_value
mrb_str_aref
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
indx
,
mrb_value
alen
)
{
mrb_int
beg
,
len
;
switch
(
str_convert_range
(
mrb
,
str
,
indx
,
alen
,
&
beg
,
&
len
))
{
case
STR_CHAR_RANGE_CORRECTED
:
return
str_subseq
(
mrb
,
str
,
beg
,
len
);
case
STR_CHAR_RANGE
:
str
=
str_substr
(
mrb
,
str
,
beg
,
len
);
if
(
mrb_undef_p
(
alen
)
&&
!
mrb_nil_p
(
str
)
&&
RSTRING_LEN
(
str
)
==
0
)
return
mrb_nil_value
();
return
str
;
case
STR_BYTE_RANGE_CORRECTED
:
if
(
mrb_string_p
(
indx
))
{
return
mrb_str_dup
(
mrb
,
indx
);
}
idx
=
mrb_fixnum
(
indx
);
goto
num_index
;
else
{
return
mrb_str_byte_subseq
(
mrb
,
str
,
beg
,
len
);
}
case
STR_OUT_OF_RANGE
:
default:
return
mrb_nil_value
();
}
return
mrb_nil_value
();
/* not reached */
}
/* 15.2.10.5.6 */
...
...
@@ -1101,16 +1146,114 @@ static mrb_value
mrb_str_aref_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_value
a1
,
a2
;
mrb_int
argc
;
argc
=
mrb_get_args
(
mrb
,
"o|o"
,
&
a1
,
&
a2
);
if
(
argc
==
2
)
{
mrb_int
n1
,
n2
;
if
(
mrb_get_args
(
mrb
,
"o|o"
,
&
a1
,
&
a2
)
==
1
)
{
a2
=
mrb_undef_value
();
}
mrb_get_args
(
mrb
,
"ii"
,
&
n1
,
&
n2
);
return
str_substr
(
mrb
,
str
,
n1
,
n2
);
return
mrb_str_aref
(
mrb
,
str
,
a1
,
a2
);
}
static
mrb_noreturn
void
str_out_of_index
(
mrb_state
*
mrb
,
mrb_value
index
)
{
mrb_raisef
(
mrb
,
E_INDEX_ERROR
,
"index %S out of string"
,
index
);
}
static
mrb_value
str_replace_partial
(
mrb_state
*
mrb
,
mrb_value
src
,
mrb_int
pos
,
mrb_int
end
,
mrb_value
rep
)
{
const
mrb_int
shrink_threshold
=
256
;
struct
RString
*
str
=
mrb_str_ptr
(
src
);
mrb_int
len
=
RSTR_LEN
(
str
);
mrb_int
replen
,
newlen
;
char
*
strp
;
if
(
end
>
len
)
{
end
=
len
;
}
if
(
pos
<
0
||
pos
>
len
)
{
str_out_of_index
(
mrb
,
mrb_fixnum_value
(
pos
));
}
replen
=
(
mrb_nil_p
(
rep
)
?
0
:
RSTRING_LEN
(
rep
));
newlen
=
replen
+
len
-
(
end
-
pos
);
if
(
newlen
>=
MRB_INT_MAX
||
newlen
<
replen
/* overflowed */
)
{
mrb_raise
(
mrb
,
E_RUNTIME_ERROR
,
"string size too big"
);
}
mrb_str_modify
(
mrb
,
str
);
if
(
len
<
newlen
||
len
-
newlen
>=
shrink_threshold
)
{
resize_capa
(
mrb
,
str
,
newlen
);
}
strp
=
RSTR_PTR
(
str
);
memmove
(
strp
+
newlen
-
(
len
-
end
),
strp
+
end
,
len
-
end
);
if
(
!
mrb_nil_p
(
rep
))
{
memcpy
(
strp
+
pos
,
RSTRING_PTR
(
rep
),
replen
);
}
return
mrb_str_aref
(
mrb
,
str
,
a1
);
RSTR_SET_LEN
(
str
,
newlen
);
strp
[
newlen
]
=
'\0'
;
return
src
;
}
static
void
mrb_str_aset
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
indx
,
mrb_value
alen
,
mrb_value
replace
)
{
mrb_int
beg
,
len
,
charlen
;
replace
=
mrb_to_str
(
mrb
,
replace
);
switch
(
str_convert_range
(
mrb
,
str
,
indx
,
alen
,
&
beg
,
&
len
))
{
case
STR_OUT_OF_RANGE
:
default:
mrb_raise
(
mrb
,
E_INDEX_ERROR
,
"string not matched"
);
case
STR_CHAR_RANGE
:
if
(
len
<
0
)
{
mrb_raisef
(
mrb
,
E_INDEX_ERROR
,
"negative length %S"
,
alen
);
}
charlen
=
RSTRING_CHAR_LEN
(
str
);
if
(
beg
<
0
)
{
beg
+=
charlen
;
}
if
(
beg
<
0
||
beg
>
charlen
)
{
str_out_of_index
(
mrb
,
indx
);
}
/* fall through */
case
STR_CHAR_RANGE_CORRECTED
:
str_range_to_bytes
(
str
,
&
beg
,
&
len
);
/* fall through */
case
STR_BYTE_RANGE_CORRECTED
:
str_replace_partial
(
mrb
,
str
,
beg
,
beg
+
len
,
replace
);
}
}
/*
* call-seq:
* str[fixnum] = replace
* str[fixnum, fixnum] = replace
* str[range] = replace
* str[regexp] = replace
* str[regexp, fixnum] = replace
* str[other_str] = replace
*
* Modify +self+ by replacing the content of +self+.
* The portion of the string affected is determined using the same criteria as +String#[]+.
*/
static
mrb_value
mrb_str_aset_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_value
indx
,
alen
,
replace
;
switch
(
mrb_get_args
(
mrb
,
"oo|S!"
,
&
indx
,
&
alen
,
&
replace
))
{
case
2
:
replace
=
alen
;
alen
=
mrb_undef_value
();
break
;
case
3
:
break
;
}
mrb_str_aset
(
mrb
,
str
,
indx
,
alen
,
replace
);
return
str
;
}
/* 15.2.10.5.8 */
...
...
@@ -2678,6 +2821,7 @@ mrb_init_string(mrb_state *mrb)
mrb_define_method
(
mrb
,
s
,
"+"
,
mrb_str_plus_m
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.4 */
mrb_define_method
(
mrb
,
s
,
"*"
,
mrb_str_times
,
MRB_ARGS_REQ
(
1
));
/* 15.2.10.5.5 */
mrb_define_method
(
mrb
,
s
,
"[]"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
/* 15.2.10.5.6 */
mrb_define_method
(
mrb
,
s
,
"[]="
,
mrb_str_aset_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"capitalize"
,
mrb_str_capitalize
,
MRB_ARGS_NONE
());
/* 15.2.10.5.7 */
mrb_define_method
(
mrb
,
s
,
"capitalize!"
,
mrb_str_capitalize_bang
,
MRB_ARGS_NONE
());
/* 15.2.10.5.8 */
mrb_define_method
(
mrb
,
s
,
"chomp"
,
mrb_str_chomp
,
MRB_ARGS_ANY
());
/* 15.2.10.5.9 */
...
...
test/t/string.rb
View file @
ff5ec824
...
...
@@ -209,6 +209,56 @@ assert('String#[]=') do
assert_raise
(
TypeError
)
{
'a'
[
0
,
1
]
=
1
}
end
assert
(
'String[]=(UTF-8)'
)
do
a
=
"➀➁➂➃➄"
a
[
3
]
=
"⚃"
assert_equal
"➀➁➂⚃➄"
,
a
b
=
"➀➁➂➃➄"
b
[
3
,
0
]
=
"⛄"
assert_equal
"➀➁➂⛄➃➄"
,
b
c
=
"➀➁➂➃➄"
c
[
3
,
2
]
=
"⚃⚄"
assert_equal
"➀➁➂⚃⚄"
,
c
d
=
"➀➁➂➃➄"
d
[
5
]
=
"⛄"
assert_equal
"➀➁➂➃➄⛄"
,
d
e
=
"➀➁➂➃➄"
e
[
5
,
0
]
=
"⛄"
assert_equal
"➀➁➂➃➄⛄"
,
e
f
=
"➀➁➂➃➄"
f
[
5
,
2
]
=
"⛄"
assert_equal
"➀➁➂➃➄⛄"
,
f
g
=
"➀➁➂➃➄"
assert_raise
(
IndexError
)
{
g
[
6
]
=
"⛄"
}
h
=
"➀➁➂➃➄"
assert_raise
(
IndexError
)
{
h
[
6
,
0
]
=
"⛄"
}
i
=
"➀➁➂➃➄"
assert_raise
(
IndexError
)
{
i
[
6
,
2
]
=
"⛄"
}
j
=
"➀➁➂➃➄"
j
[
"➃"
]
=
"⚃"
assert_equal
"➀➁➂⚃➄"
,
j
k
=
"➀➁➂➃➄"
assert_raise
(
IndexError
)
{
k
[
"⛄"
]
=
"⛇"
}
l
=
"➀➁➂➃➄"
assert_nothing_raised
{
l
[
"➂"
]
=
""
}
assert_equal
"➀➁➃➄"
,
l
m
=
"➀➁➂➃➄"
assert_raise
(
TypeError
)
{
m
[
"➂"
]
=
nil
}
assert_equal
"➀➁➂➃➄"
,
m
end
if
UTF8STRING
assert
(
'String#capitalize'
,
'15.2.10.5.7'
)
do
a
=
'abc'
a
.
capitalize
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment