relax string length limitation to 64KB; fix #2725

parent 2e74a931
...@@ -102,7 +102,8 @@ struct mrb_parser_heredoc_info { ...@@ -102,7 +102,8 @@ struct mrb_parser_heredoc_info {
mrb_ast_node *doc; mrb_ast_node *doc;
}; };
#define MRB_PARSER_BUF_SIZE 1024 #define MRB_PARSER_TOKBUF_MAX 65536
#define MRB_PARSER_TOKBUF_SIZE 256
/* parser structure */ /* parser structure */
struct mrb_parser_state { struct mrb_parser_state {
...@@ -130,8 +131,10 @@ struct mrb_parser_state { ...@@ -130,8 +131,10 @@ struct mrb_parser_state {
mrb_ast_node *locals; mrb_ast_node *locals;
mrb_ast_node *pb; mrb_ast_node *pb;
char buf[MRB_PARSER_BUF_SIZE]; char *tokbuf;
int bidx; char buf[MRB_PARSER_TOKBUF_SIZE];
int tidx;
int tsiz;
mrb_ast_node *all_heredocs; /* list of mrb_parser_heredoc_info* */ mrb_ast_node *all_heredocs; /* list of mrb_parser_heredoc_info* */
mrb_ast_node *heredocs_from_nextline; mrb_ast_node *heredocs_from_nextline;
......
...@@ -3573,7 +3573,12 @@ skips(parser_state *p, const char *s) ...@@ -3573,7 +3573,12 @@ skips(parser_state *p, const char *s)
static int static int
newtok(parser_state *p) newtok(parser_state *p)
{ {
p->bidx = 0; if (p->tokbuf != p->buf) {
mrb_free(p->mrb, p->tokbuf);
p->tokbuf = p->buf;
p->tsiz = MRB_PARSER_TOKBUF_SIZE;
}
p->tidx = 0;
return p->column - 1; return p->column - 1;
} }
...@@ -3581,7 +3586,7 @@ static void ...@@ -3581,7 +3586,7 @@ static void
tokadd(parser_state *p, int32_t c) tokadd(parser_state *p, int32_t c)
{ {
char utf8[4]; char utf8[4];
unsigned len; int i, len;
/* mrb_assert(-0x10FFFF <= c && c <= 0xFF); */ /* mrb_assert(-0x10FFFF <= c && c <= 0xFF); */
if (c >= 0) { if (c >= 0) {
...@@ -3615,42 +3620,51 @@ tokadd(parser_state *p, int32_t c) ...@@ -3615,42 +3620,51 @@ tokadd(parser_state *p, int32_t c)
len = 4; len = 4;
} }
} }
if (p->bidx+len <= MRB_PARSER_BUF_SIZE) { if (p->tidx+len >= p->tsiz) {
unsigned i; if (p->tsiz >= MRB_PARSER_TOKBUF_MAX) {
for (i = 0; i < len; i++) { p->tidx += len;
p->buf[p->bidx++] = utf8[i]; return;
}
p->tsiz *= 2;
if (p->tokbuf == p->buf) {
p->tokbuf = (char*)mrb_malloc(p->mrb, p->tsiz);
memcpy(p->tokbuf, p->buf, MRB_PARSER_TOKBUF_SIZE);
}
else {
p->tokbuf = (char*)mrb_realloc(p->mrb, p->tokbuf, p->tsiz);
}
} }
for (i = 0; i < len; i++) {
p->tokbuf[p->tidx++] = utf8[i];
} }
} }
static int static int
toklast(parser_state *p) toklast(parser_state *p)
{ {
return p->buf[p->bidx-1]; return p->tokbuf[p->tidx-1];
} }
static void static void
tokfix(parser_state *p) tokfix(parser_state *p)
{ {
int i = p->bidx, imax = MRB_PARSER_BUF_SIZE - 1; if (p->tidx >= MRB_PARSER_TOKBUF_MAX) {
p->tidx = MRB_PARSER_TOKBUF_MAX-1;
if (i > imax) {
i = imax;
yyerror(p, "string too long (truncated)"); yyerror(p, "string too long (truncated)");
} }
p->buf[i] = '\0'; p->tokbuf[p->tidx] = '\0';
} }
static const char* static const char*
tok(parser_state *p) tok(parser_state *p)
{ {
return p->buf; return p->tokbuf;
} }
static int static int
toklen(parser_state *p) toklen(parser_state *p)
{ {
return p->bidx; return p->tidx;
} }
#define IS_ARG() (p->lstate == EXPR_ARG || p->lstate == EXPR_CMDARG) #define IS_ARG() (p->lstate == EXPR_ARG || p->lstate == EXPR_CMDARG)
...@@ -5196,7 +5210,7 @@ parser_yylex(parser_state *p) ...@@ -5196,7 +5210,7 @@ parser_yylex(parser_state *p)
c = nextc(p); c = nextc(p);
} }
if (c < 0) { if (c < 0) {
if (p->bidx == 1) { if (p->tidx == 1) {
yyerror(p, "incomplete instance variable syntax"); yyerror(p, "incomplete instance variable syntax");
} }
else { else {
...@@ -5205,7 +5219,7 @@ parser_yylex(parser_state *p) ...@@ -5205,7 +5219,7 @@ parser_yylex(parser_state *p)
return 0; return 0;
} }
else if (isdigit(c)) { else if (isdigit(c)) {
if (p->bidx == 1) { if (p->tidx == 1) {
yyerror_i(p, "'@%c' is not allowed as an instance variable name", c); yyerror_i(p, "'@%c' is not allowed as an instance variable name", c);
} }
else { else {
...@@ -5486,6 +5500,8 @@ mrb_parser_new(mrb_state *mrb) ...@@ -5486,6 +5500,8 @@ mrb_parser_new(mrb_state *mrb)
#if defined(PARSER_TEST) || defined(PARSER_DEBUG) #if defined(PARSER_TEST) || defined(PARSER_DEBUG)
yydebug = 1; yydebug = 1;
#endif #endif
p->tsiz = MRB_PARSER_TOKBUF_SIZE;
p->tokbuf = p->buf;
p->lex_strterm = NULL; p->lex_strterm = NULL;
p->all_heredocs = p->parsing_heredoc = NULL; p->all_heredocs = p->parsing_heredoc = NULL;
...@@ -5501,6 +5517,9 @@ mrb_parser_new(mrb_state *mrb) ...@@ -5501,6 +5517,9 @@ mrb_parser_new(mrb_state *mrb)
MRB_API void MRB_API void
mrb_parser_free(parser_state *p) { mrb_parser_free(parser_state *p) {
mrb_pool_close(p->pool); mrb_pool_close(p->pool);
if (p->tokbuf != p->buf) {
mrb_free(p->mrb, p->tokbuf);
}
} }
MRB_API mrbc_context* MRB_API mrbc_context*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment