Commit 546e148b authored by Perry Kundert's avatar Perry Kundert

Further performance improvements, and corrections in get_token_string

o An (-'ve valued, typically -1) EOF must never be allowed in
  token_string, as it be converted to 255 -- a legitimate value.
o Comparing against a specific eof() (-1, typically) is more costly than
  detecting +'ve/-'ve.  Since EOF is the only non-positive value allowed
  we can use the simpler test.
o Removed unnecessary test for token_string size, as it is already
  tested in the method, and must never occur in correct code; used an
  assert instead.
parent 8665e259
...@@ -1397,8 +1397,8 @@ constexpr T static_const<T>::value; ...@@ -1397,8 +1397,8 @@ constexpr T static_const<T>::value;
/// abstract input adapter interface /// abstract input adapter interface
struct input_adapter_protocol struct input_adapter_protocol
{ {
virtual int get_character() = 0; virtual int get_character() = 0; // returns characters in range [0,255], or eof() (a -'ve value)
virtual void unget_character() = 0; virtual void unget_character() = 0; // restore the last non-eof() character to input
virtual ~input_adapter_protocol() = default; virtual ~input_adapter_protocol() = default;
}; };
...@@ -1449,7 +1449,7 @@ class input_stream_adapter : public input_adapter_protocol ...@@ -1449,7 +1449,7 @@ class input_stream_adapter : public input_adapter_protocol
int get_character() override int get_character() override
{ {
int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get(); int c = is.rdbuf()->sbumpc(); // Avoided for performance: int c = is.get();
return c == std::char_traits<char>::eof() ? c : ( c & 0xFF ); return c < 0 ? c : ( c & 0xFF ); // faster than == std::char_traits<char>::eof()
} }
void unget_character() override void unget_character() override
...@@ -2652,12 +2652,24 @@ scan_number_done: ...@@ -2652,12 +2652,24 @@ scan_number_done:
token_string.push_back(static_cast<char>(current)); token_string.push_back(static_cast<char>(current));
} }
/// get a character from the input /*
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case. Stores the scanned characters
for use in error messages.
@return character read from the input
*/
int get() int get()
{ {
++chars_read; ++chars_read;
current = ia->get_character(); current = ia->get_character();
token_string.push_back(static_cast<char>(current)); if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
{
token_string.push_back(static_cast<char>(current));
}
return current; return current;
} }
...@@ -2665,12 +2677,12 @@ scan_number_done: ...@@ -2665,12 +2677,12 @@ scan_number_done:
void unget() void unget()
{ {
--chars_read; --chars_read;
if (JSON_LIKELY(current != std::char_traits<char>::eof())) if (JSON_LIKELY(current >= 0)) // faster than: != std::char_traits<char>::eof()))
{ {
ia->unget_character(); ia->unget_character();
} assert(token_string.size() != 0);
if (! token_string.empty())
token_string.pop_back(); token_string.pop_back();
}
} }
/// add a character to yytext /// add a character to yytext
...@@ -2718,19 +2730,16 @@ scan_number_done: ...@@ -2718,19 +2730,16 @@ scan_number_done:
return chars_read; return chars_read;
} }
/// return the last read token (for errors only) /// return the last read token (for errors only). Will never contain EOF
/// (a -'ve value), because 255 may legitimately occur. May contain NUL, which
/// should be escaped.
std::string get_token_string() const std::string get_token_string() const
{ {
// escape control characters // escape control characters
std::string result; std::string result;
for (auto c : token_string) for (auto c : token_string)
{ {
if (c == '\0' or c == std::char_traits<char>::eof()) if ('\x00' <= c and c <= '\x1f')
{
// ignore EOF
continue;
}
else if ('\x00' <= c and c <= '\x1f')
{ {
// escape control characters // escape control characters
std::stringstream ss; std::stringstream ss;
...@@ -5144,7 +5153,7 @@ class binary_reader ...@@ -5144,7 +5153,7 @@ class binary_reader
@brief get next character from the input @brief get next character from the input
This function provides the interface to the used input adapter. It does This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case. `std::char_traits<char>::eof()` in that case.
@return character read from the input @return character read from the input
...@@ -5448,14 +5457,14 @@ class binary_reader ...@@ -5448,14 +5457,14 @@ class binary_reader
{ {
if (expect_eof) if (expect_eof)
{ {
if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) if (JSON_UNLIKELY(current >= 0 )) // faster than: != std::char_traits<char>::eof()))
{ {
JSON_THROW(parse_error::create(110, chars_read, "expected end of input")); JSON_THROW(parse_error::create(110, chars_read, "expected end of input"));
} }
} }
else else
{ {
if (JSON_UNLIKELY(current == std::char_traits<char>::eof())) if (JSON_UNLIKELY(current < 0)) // faster than: == std::char_traits<char>::eof()))
{ {
JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input")); JSON_THROW(parse_error::create(110, chars_read, "unexpected end of input"));
} }
......
...@@ -215,7 +215,7 @@ TEST_CASE("parser class") ...@@ -215,7 +215,7 @@ TEST_CASE("parser class")
std::string s = "\"1\""; std::string s = "\"1\"";
s[1] = '\0'; s[1] = '\0';
CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&); CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&);
CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"'"); CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"<U+0000>'");
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment