Commit f1cf6ef8 authored by Yukihiro "Matz" Matsumoto's avatar Yukihiro "Matz" Matsumoto Committed by GitHub

Merge pull request #3312 from nobu/feature/multi-unicode-escape

Feature/multi unicode escape
parents 4d807fc6 0f08914a
......@@ -3752,6 +3752,44 @@ scan_hex(const int *start, int len, int *retlen)
return retval;
}
static int32_t
read_escape_unicode(parser_state *p, size_t limit)
{
int32_t c;
int buf[9];
int i;
/* Look for opening brace */
i = 0;
buf[0] = nextc(p);
if (buf[0] < 0) goto eof;
if (ISXDIGIT(buf[0])) {
/* \uxxxx form */
for (i=1; i<limit; i++) {
buf[i] = nextc(p);
if (buf[i] < 0) goto eof;
if (!ISXDIGIT(buf[i])) {
pushback(p, buf[i]);
break;
}
}
}
else {
pushback(p, buf[0]);
}
c = scan_hex(buf, i, &i);
if (i == 0) {
eof:
yyerror(p, "Invalid escape character syntax");
return -1;
}
if (c < 0 || c > 0x10FFFF || (c & 0xFFFFF800) == 0xD800) {
yyerror(p, "Invalid Unicode code point");
return -1;
}
return c;
}
/* Return negative to indicate Unicode code point */
static int32_t
read_escape(parser_state *p)
......@@ -3824,53 +3862,17 @@ read_escape(parser_state *p)
return c;
case 'u': /* Unicode */
{
int buf[9];
int i;
/* Look for opening brace */
i = 0;
buf[0] = nextc(p);
if (buf[0] < 0) goto eof;
if (buf[0] == '{') {
if (peek(p, '{')) {
/* \u{xxxxxxxx} form */
for (i=0; i<9; i++) {
buf[i] = nextc(p);
if (buf[i] < 0) goto eof;
if (buf[i] == '}') {
break;
}
else if (!ISXDIGIT(buf[i])) {
yyerror(p, "Invalid escape character syntax");
pushback(p, buf[i]);
return 0;
}
}
}
else if (ISXDIGIT(buf[0])) {
/* \uxxxx form */
for (i=1; i<4; i++) {
buf[i] = nextc(p);
if (buf[i] < 0) goto eof;
if (!ISXDIGIT(buf[i])) {
pushback(p, buf[i]);
break;
}
}
nextc(p);
c = read_escape_unicode(p, 8);
if (c < 0) return 0;
if (nextc(p) != '}') goto eof;
}
else {
pushback(p, buf[0]);
}
c = scan_hex(buf, i, &i);
if (i == 0) {
yyerror(p, "Invalid escape character syntax");
return 0;
c = read_escape_unicode(p, 4);
if (c < 0) return 0;
}
if (c < 0 || c > 0x10FFFF || (c & 0xFFFFF800) == 0xD800) {
yyerror(p, "Invalid Unicode code point");
return 0;
}
}
return -c;
case 'b':/* backspace */
......@@ -3993,6 +3995,20 @@ parse_string(parser_state *p)
tokadd(p, '\\');
tokadd(p, c);
}
else if (c == 'u' && peek(p, '{')) {
/* \u{xxxx xxxx xxxx} form */
nextc(p);
while (1) {
do c = nextc(p); while (ISSPACE(c));
if (c == '}') break;
pushback(p, c);
c = read_escape_unicode(p, 8);
if (c < 0) break;
tokadd(p, -c);
}
if (hinf)
hinf->line_head = FALSE;
}
else {
pushback(p, c);
tokadd(p, read_escape(p));
......
......@@ -2,34 +2,38 @@
assert('bare \u notation test') do
# Mininum and maximum one byte characters
assert_equal("\u0000", "\x00")
assert_equal("\u007F", "\x7F")
assert_equal("\x00", "\u0000")
assert_equal("\x7F", "\u007F")
# Mininum and maximum two byte characters
assert_equal("\u0080", "\xC2\x80")
assert_equal("\u07FF", "\xDF\xBF")
assert_equal("\xC2\x80", "\u0080")
assert_equal("\xDF\xBF", "\u07FF")
# Mininum and maximum three byte characters
assert_equal("\u0800", "\xE0\xA0\x80")
assert_equal("\uFFFF", "\xEF\xBF\xBF")
assert_equal("\xE0\xA0\x80", "\u0800")
assert_equal("\xEF\xBF\xBF", "\uFFFF")
# Four byte characters require the \U notation
end
assert('braced \u notation test') do
# Mininum and maximum one byte characters
assert_equal("\u{0000}", "\x00")
assert_equal("\u{007F}", "\x7F")
assert_equal("\x00", "\u{0000}")
assert_equal("\x7F", "\u{007F}")
# Mininum and maximum two byte characters
assert_equal("\u{0080}", "\xC2\x80")
assert_equal("\u{07FF}", "\xDF\xBF")
assert_equal("\xC2\x80", "\u{0080}")
assert_equal("\xDF\xBF", "\u{07FF}")
# Mininum and maximum three byte characters
assert_equal("\u{0800}", "\xE0\xA0\x80")
assert_equal("\u{FFFF}", "\xEF\xBF\xBF")
assert_equal("\xE0\xA0\x80", "\u{0800}")
assert_equal("\xEF\xBF\xBF", "\u{FFFF}")
# Mininum and maximum four byte characters
assert_equal("\u{10000}", "\xF0\x90\x80\x80")
assert_equal("\u{10FFFF}", "\xF4\x8F\xBF\xBF")
assert_equal("\xF0\x90\x80\x80", "\u{10000}")
assert_equal("\xF4\x8F\xBF\xBF", "\u{10FFFF}")
end
assert('braced multiple \u notation test') do
assert_equal("ABC", "\u{41 42 43}")
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment