Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mruby
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Libraries
mruby
Commits
0f7d771a
Commit
0f7d771a
authored
Jan 07, 2014
by
Yukihiro "Matz" Matsumoto
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'mruby-string-utf8' of
https://github.com/mattn/mruby
into mattn-mruby-string-utf8
parents
65f6b105
055468bf
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
265 additions
and
0 deletions
+265
-0
mrbgems/mruby-string-utf8/mrbgem.rake
mrbgems/mruby-string-utf8/mrbgem.rake
+4
-0
mrbgems/mruby-string-utf8/src/string.c
mrbgems/mruby-string-utf8/src/string.c
+249
-0
mrbgems/mruby-string-utf8/test/string.rb
mrbgems/mruby-string-utf8/test/string.rb
+12
-0
No files found.
mrbgems/mruby-string-utf8/mrbgem.rake
0 → 100644
View file @
0f7d771a
MRuby
::
Gem
::
Specification
.
new
(
'mruby-string-utf8'
)
do
|
spec
|
spec
.
license
=
'MIT'
spec
.
author
=
'mruby developers'
end
mrbgems/mruby-string-utf8/src/string.c
0 → 100644
View file @
0f7d771a
#include "mruby.h"
#include "mruby/string.h"
#include "mruby/range.h"
#include <ctype.h>
#include <string.h>
/* TODO: duplicate definition in src/re.h */
#define REGEXP_CLASS "Regexp"
static
size_t
utf8len_tab
[
256
]
=
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
4
,
5
,
5
,
5
,
5
,
6
,
6
,
1
,
1
,
};
static
size_t
utf8len
(
unsigned
char
*
p
)
{
size_t
len
;
int
i
;
if
(
*
p
==
0
)
return
0
;
len
=
utf8len_tab
[
*
p
];
for
(
i
=
1
;
i
<
len
;
++
i
)
if
((
p
[
i
]
&
0xc0
)
!=
0x80
)
return
1
;
return
len
;
}
static
size_t
mrb_utf8_strlen
(
mrb_value
str
)
{
size_t
total
=
0
;
unsigned
char
*
p
=
(
unsigned
char
*
)
RSTRING_PTR
(
str
);
while
(
*
p
)
{
p
+=
utf8len
(
p
);
total
++
;
}
return
total
;
}
#define RSTRING_LEN_UTF8(s) mrb_utf8_strlen(s)
static
mrb_value
noregexp
(
mrb_state
*
mrb
,
mrb_value
self
)
{
mrb_raise
(
mrb
,
E_NOTIMP_ERROR
,
"Regexp class not implemented"
);
return
mrb_nil_value
();
}
static
void
regexp_check
(
mrb_state
*
mrb
,
mrb_value
obj
)
{
if
(
!
memcmp
(
mrb_obj_classname
(
mrb
,
obj
),
REGEXP_CLASS
,
sizeof
(
REGEXP_CLASS
)
-
1
))
{
noregexp
(
mrb
,
obj
);
}
}
static
inline
mrb_int
mrb_memsearch_qs
(
const
unsigned
char
*
xs
,
mrb_int
m
,
const
unsigned
char
*
ys
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
xs
,
*
xe
=
xs
+
m
;
const
unsigned
char
*
y
=
ys
;
int
i
,
qstable
[
256
];
/* Preprocessing */
for
(
i
=
0
;
i
<
256
;
++
i
)
qstable
[
i
]
=
m
+
1
;
for
(;
x
<
xe
;
++
x
)
qstable
[
*
x
]
=
xe
-
x
;
/* Searching */
for
(;
y
+
m
<=
ys
+
n
;
y
+=
*
(
qstable
+
y
[
m
]))
{
if
(
*
xs
==
*
y
&&
memcmp
(
xs
,
y
,
m
)
==
0
)
return
y
-
ys
;
}
return
-
1
;
}
static
mrb_int
mrb_memsearch
(
const
void
*
x0
,
mrb_int
m
,
const
void
*
y0
,
mrb_int
n
)
{
const
unsigned
char
*
x
=
(
const
unsigned
char
*
)
x0
,
*
y
=
(
const
unsigned
char
*
)
y0
;
if
(
m
>
n
)
return
-
1
;
else
if
(
m
==
n
)
{
return
memcmp
(
x0
,
y0
,
m
)
==
0
?
0
:
-
1
;
}
else
if
(
m
<
1
)
{
return
0
;
}
else
if
(
m
==
1
)
{
const
unsigned
char
*
ys
=
y
,
*
ye
=
ys
+
n
;
for
(;
y
<
ye
;
++
y
)
{
if
(
*
x
==
*
y
)
return
y
-
ys
;
}
return
-
1
;
}
return
mrb_memsearch_qs
((
const
unsigned
char
*
)
x0
,
m
,
(
const
unsigned
char
*
)
y0
,
n
);
}
static
mrb_value
str_subseq
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
int
i
;
unsigned
char
*
p
=
(
unsigned
char
*
)
RSTRING_PTR
(
str
),
*
t
;
for
(
i
=
0
;
i
<
beg
&&
*
p
;
i
++
)
{
p
+=
utf8len
(
p
);
}
t
=
p
;
for
(
i
=
0
;
i
<
len
&&
*
p
;
i
++
)
{
t
+=
utf8len
(
t
);
}
return
mrb_str_new
(
mrb
,
(
const
char
*
)
p
,
(
int
)(
t
-
p
));
}
static
mrb_value
str_substr
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_int
beg
,
mrb_int
len
)
{
mrb_value
str2
;
int
len8
=
RSTRING_LEN_UTF8
(
str
);
if
(
len
<
0
)
return
mrb_nil_value
();
if
(
len8
==
0
)
{
len
=
0
;
}
else
if
(
beg
<
0
)
{
beg
=
len8
+
beg
;
}
if
(
beg
>
len8
)
return
mrb_nil_value
();
if
(
beg
<
0
)
{
beg
+=
len8
;
if
(
beg
<
0
)
return
mrb_nil_value
();
}
if
(
beg
+
len
>
len8
)
len
=
len8
-
beg
;
if
(
len
<=
0
)
{
len
=
0
;
}
str2
=
str_subseq
(
mrb
,
str
,
beg
,
len
);
return
str2
;
}
static
mrb_int
str_index
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
sub
,
mrb_int
offset
)
{
mrb_int
pos
;
char
*
s
,
*
sptr
;
mrb_int
len
,
slen
;
len
=
RSTRING_LEN
(
str
);
slen
=
RSTRING_LEN
(
sub
);
if
(
offset
<
0
)
{
offset
+=
len
;
if
(
offset
<
0
)
return
-
1
;
}
if
(
len
-
offset
<
slen
)
return
-
1
;
s
=
RSTRING_PTR
(
str
);
if
(
offset
)
{
s
+=
offset
;
}
if
(
slen
==
0
)
return
offset
;
/* need proceed one character at a time */
sptr
=
RSTRING_PTR
(
sub
);
slen
=
RSTRING_LEN
(
sub
);
len
=
RSTRING_LEN
(
str
)
-
offset
;
pos
=
mrb_memsearch
(
sptr
,
slen
,
s
,
len
);
if
(
pos
<
0
)
return
pos
;
return
pos
+
offset
;
}
static
mrb_value
mrb_str_aref
(
mrb_state
*
mrb
,
mrb_value
str
,
mrb_value
indx
)
{
mrb_int
idx
;
regexp_check
(
mrb
,
indx
);
switch
(
mrb_type
(
indx
))
{
case
MRB_TT_FIXNUM
:
idx
=
mrb_fixnum
(
indx
);
num_index:
str
=
str_substr
(
mrb
,
str
,
idx
,
1
);
if
(
!
mrb_nil_p
(
str
)
&&
RSTRING_LEN
(
str
)
==
0
)
return
mrb_nil_value
();
return
str
;
case
MRB_TT_STRING
:
if
(
str_index
(
mrb
,
str
,
indx
,
0
)
!=
-
1
)
return
mrb_str_dup
(
mrb
,
indx
);
return
mrb_nil_value
();
case
MRB_TT_RANGE
:
/* check if indx is Range */
{
mrb_int
beg
,
len
;
mrb_value
tmp
;
len
=
RSTRING_LEN_UTF8
(
str
);
if
(
mrb_range_beg_len
(
mrb
,
indx
,
&
beg
,
&
len
,
len
))
{
tmp
=
str_subseq
(
mrb
,
str
,
beg
,
len
);
return
tmp
;
}
else
{
return
mrb_nil_value
();
}
}
default:
idx
=
mrb_fixnum
(
indx
);
goto
num_index
;
}
return
mrb_nil_value
();
/* not reached */
}
static
mrb_value
mrb_str_aref_m
(
mrb_state
*
mrb
,
mrb_value
str
)
{
mrb_value
a1
,
a2
;
int
argc
;
argc
=
mrb_get_args
(
mrb
,
"o|o"
,
&
a1
,
&
a2
);
if
(
argc
==
2
)
{
regexp_check
(
mrb
,
a1
);
return
str_substr
(
mrb
,
str
,
mrb_fixnum
(
a1
),
mrb_fixnum
(
a2
));
}
if
(
argc
!=
1
)
{
mrb_raisef
(
mrb
,
E_ARGUMENT_ERROR
,
"wrong number of arguments (%S for 1)"
,
mrb_fixnum_value
(
argc
));
}
return
mrb_str_aref
(
mrb
,
str
,
a1
);
}
void
mrb_mruby_string_utf8_gem_init
(
mrb_state
*
mrb
)
{
struct
RClass
*
s
=
mrb
->
string_class
;
mrb_define_method
(
mrb
,
s
,
"[]"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
mrb_define_method
(
mrb
,
s
,
"slice"
,
mrb_str_aref_m
,
MRB_ARGS_ANY
());
}
void
mrb_mruby_string_utf8_gem_final
(
mrb_state
*
mrb
)
{
}
mrbgems/mruby-string-utf8/test/string.rb
0 → 100644
View file @
0f7d771a
##
# String(utf8) Test
assert
(
'String#[]'
)
do
assert_equal
"ち"
,
"こんにちわ世界"
[
3
]
assert_equal
nil
,
"こんにちわ世界"
[
20
]
assert_equal
"世"
,
"こんにちわ世界"
[
-
2
]
assert_equal
"世界"
,
"こんにちわ世界"
[
-
2
..-
1
]
assert_equal
"んに"
,
"こんにちわ世界"
[
1
,
2
]
assert_equal
"世"
,
"こんにちわ世界"
[
"世"
]
assert_equal
"世"
,
"こんにちわ世界"
[
"世"
]
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment