Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mruby
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Libraries
mruby
Commits
4918e5ec
Commit
4918e5ec
authored
Apr 18, 2014
by
Yukihiro "Matz" Matsumoto
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2075 from mattn/utf8-string-ord
Implement String#ord, String#split for mruby-string-utf8
parents
7c1b3f8a
d72b8c20
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
205 additions
and
0 deletions
+205
-0
mrbgems/mruby-string-utf8/src/string.c
mrbgems/mruby-string-utf8/src/string.c
+205
-0
No files found.
mrbgems/mruby-string-utf8/src/string.c
View file @
4918e5ec
#include "mruby.h"
#include "mruby/array.h"
#include "mruby/string.h"
#include "mruby/range.h"
#include "mruby/re.h"
...
...
@@ -23,6 +24,75 @@ static const char utf8len_codepage[256] =
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
};
static
char
utf8len_codepage_zero
[
256
]
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
};
static
const
char
isspacetable
[
256
]
=
{
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
static
mrb_int
utf8code
(
unsigned
char
*
p
)
{
mrb_int
len
;
if
(
p
[
0
]
<
0x80
)
return
p
[
0
];
len
=
utf8len_codepage_zero
[
p
[
0
]];
if
(
len
>
1
&&
(
p
[
1
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
2
)
return
((
p
[
0
]
&
0x1f
)
<<
6
)
+
(
p
[
1
]
&
0x3f
);
if
((
p
[
2
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
3
)
return
((
p
[
0
]
&
0x0f
)
<<
12
)
+
((
p
[
1
]
&
0x3f
)
<<
6
)
+
(
p
[
2
]
&
0x3f
);
if
((
p
[
3
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
4
)
return
((
p
[
0
]
&
0x07
)
<<
18
)
+
((
p
[
1
]
&
0x3f
)
<<
12
)
+
((
p
[
2
]
&
0x3f
)
<<
6
)
+
(
p
[
3
]
&
0x3f
);
if
((
p
[
4
]
&
0xc0
)
==
0x80
)
{
if
(
len
==
5
)
return
((
p
[
0
]
&
0x03
)
<<
24
)
+
((
p
[
1
]
&
0x3f
)
<<
18
)
+
((
p
[
2
]
&
0x3f
)
<<
12
)
+
((
p
[
3
]
&
0x3f
)
<<
6
)
+
(
p
[
4
]
&
0x3f
);
if
((
p
[
5
]
&
0xc0
)
==
0x80
&&
len
==
6
)
return
((
p
[
0
]
&
0x01
)
<<
30
)
+
((
p
[
1
]
&
0x3f
)
<<
24
)
+
((
p
[
2
]
&
0x3f
)
<<
18
)
+
((
p
[
3
]
&
0x3f
)
<<
12
)
+
((
p
[
4
]
&
0x3f
)
<<
6
)
+
(
p
[
5
]
&
0x3f
);
}
}
}
}
return
p
[
0
];
}
static
mrb_value
mrb_fixnum_chr
(
mrb_state
*
,
mrb_value
);
static
mrb_int
...
...
@@ -463,6 +533,139 @@ mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
return
mrb_str_new
(
mrb
,
utf8
,
len
);
}
static
mrb_value
mrb_str_ord
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_int
len
=
RSTRING_LEN
(
str
);
if
(
len
==
0
)
mrb_raise
(
mrb
,
E_ARGUMENT_ERROR
,
"empty string"
);
return
mrb_fixnum_value
(
utf8code
((
unsigned
char
*
)
RSTRING_PTR
(
str
)));
}
static
mrb_value
mrb_str_split_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
int
argc
;
mrb_value
spat
=
mrb_nil_value
();
enum
{
awk
,
string
,
regexp
}
split_type
=
string
;
long
i
=
0
,
lim_p
;
mrb_int
beg
;
mrb_int
end
;
mrb_int
lim
=
0
;
mrb_value
result
,
tmp
;
argc
=
mrb_get_args
(
mrb
,
"|oi"
,
&
spat
,
&
lim
);
lim_p
=
(
lim
>
0
&&
argc
==
2
);
if
(
argc
==
2
)
{
if
(
lim
==
1
)
{
if
(
RSTRING_LEN
(
str
)
==
0
)
return
mrb_ary_new_capa
(
mrb
,
0
);
return
mrb_ary_new_from_values
(
mrb
,
1
,
&
str
);
}
i
=
1
;
}
if
(
argc
==
0
||
mrb_nil_p
(
spat
))
{
split_type
=
awk
;
}
else
{
if
(
mrb_string_p
(
spat
))
{
split_type
=
string
;
if
(
RSTRING_LEN
(
spat
)
==
1
&&
RSTRING_PTR
(
spat
)[
0
]
==
' '
){
split_type
=
awk
;
}
}
else
{
noregexp
(
mrb
,
str
);
}
}
result
=
mrb_ary_new
(
mrb
);
beg
=
0
;
if
(
split_type
==
awk
)
{
char
*
ptr
=
RSTRING_PTR
(
str
);
char
*
eptr
=
RSTRING_END
(
str
);
char
*
bptr
=
ptr
;
int
skip
=
1
;
unsigned
int
c
;
end
=
beg
;
while
(
ptr
<
eptr
)
{
int
ai
=
mrb_gc_arena_save
(
mrb
);
c
=
(
unsigned
char
)
*
ptr
++
;
if
(
skip
)
{
if
(
ascii_isspace
(
c
))
{
beg
=
ptr
-
bptr
;
}
else
{
end
=
ptr
-
bptr
;
skip
=
0
;
if
(
lim_p
&&
lim
<=
i
)
break
;
}
}
else
if
(
ascii_isspace
(
c
))
{
mrb_ary_push
(
mrb
,
result
,
str_subseq
(
mrb
,
str
,
beg
,
end
-
beg
));
mrb_gc_arena_restore
(
mrb
,
ai
);
skip
=
1
;
beg
=
ptr
-
bptr
;
if
(
lim_p
)
++
i
;
}
else
{
end
=
ptr
-
bptr
;
}
}
}
else
if
(
split_type
==
string
)
{
char
*
ptr
=
RSTRING_PTR
(
str
);
// s->as.ary
char
*
temp
=
ptr
;
char
*
eptr
=
RSTRING_END
(
str
);
mrb_int
slen
=
RSTRING_LEN
(
spat
);
if
(
slen
==
0
)
{
int
ai
=
mrb_gc_arena_save
(
mrb
);
while
(
ptr
<
eptr
)
{
mrb_ary_push
(
mrb
,
result
,
str_subseq
(
mrb
,
str
,
ptr
-
temp
,
1
));
mrb_gc_arena_restore
(
mrb
,
ai
);
ptr
++
;
if
(
lim_p
&&
lim
<=
++
i
)
break
;
}
}
else
{
char
*
sptr
=
RSTRING_PTR
(
spat
);
int
ai
=
mrb_gc_arena_save
(
mrb
);
while
(
ptr
<
eptr
&&
(
end
=
mrb_memsearch
(
sptr
,
slen
,
ptr
,
eptr
-
ptr
))
>=
0
)
{
mrb_ary_push
(
mrb
,
result
,
str_subseq
(
mrb
,
str
,
ptr
-
temp
,
end
));
mrb_gc_arena_restore
(
mrb
,
ai
);
ptr
+=
end
+
slen
;
if
(
lim_p
&&
lim
<=
++
i
)
break
;
}
}
beg
=
ptr
-
temp
;
}
else
{
noregexp
(
mrb
,
str
);
}
if
(
RSTRING_LEN
(
str
)
>
0
&&
(
lim_p
||
RSTRING_LEN
(
str
)
>
beg
||
lim
<
0
))
{
if
(
RSTRING_LEN
(
str
)
==
beg
)
{
tmp
=
mrb_str_new_lit
(
mrb
,
""
);
}
else
{
tmp
=
str_subseq
(
mrb
,
str
,
beg
,
RSTRING_LEN
(
str
)
-
beg
);
}
mrb_ary_push
(
mrb
,
result
,
tmp
);
}
if
(
!
lim_p
&&
lim
==
0
)
{
mrb_int
len
;
while
((
len
=
RARRAY_LEN
(
result
))
>
0
&&
(
tmp
=
RARRAY_PTR
(
result
)[
len
-
1
],
RSTRING_LEN
(
tmp
)
==
0
))
mrb_ary_pop
(
mrb
,
result
);
}
return
result
;
}
void
mrb_mruby_string_utf8_gem_init
(
mrb_state
*
mrb
)
{
...
...
@@ -472,7 +675,9 @@ mrb_mruby_string_utf8_gem_init(mrb_state* mrb)
mrb_define_method
(
mrb
,
s
,
"length"
,
mrb_str_size
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"index"
,
mrb_str_index_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"[]"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"ord"
,
mrb_str_ord
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"slice"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"split"
,
mrb_str_split_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"reverse"
,
mrb_str_reverse
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"reverse!"
,
mrb_str_reverse_bang
,
MRB_ARGS_NONE
());
mrb_define_method
(
mrb
,
s
,
"rindex"
,
mrb_str_rindex_m
,
MRB_ARGS_ANY
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment