added a SAX parser #971

parent 8968adcd
......@@ -52,6 +52,53 @@ class parser
value
};
struct SAX
{
/// a null value was read
virtual bool null() = 0;
/// a boolean value was read
virtual bool boolean(bool) = 0;
/// an integer number was read
virtual bool number_integer(number_integer_t) = 0;
/// an unsigned integer number was read
virtual bool number_unsigned(number_unsigned_t) = 0;
/// a floating-point number was read
/// the string parameter contains the raw number value
virtual bool number_float(number_float_t, const std::string&) = 0;
/// a string value was read
virtual bool string(const std::string&) = 0;
/// the beginning of an object was read
/// binary formats may report the number of elements
virtual bool start_object(std::size_t elements) = 0;
/// an object key was read
virtual bool key(const std::string&) = 0;
/// the end of an object was read
virtual bool end_object() = 0;
/// the beginning of an array was read
/// binary formats may report the number of elements
virtual bool start_array(std::size_t elements) = 0;
/// the end of an array was read
virtual bool end_array() = 0;
/// a binary value was read
/// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array<uint8t>
virtual bool binary(const std::vector<uint8_t>& vec) = 0;
/// a parse error occurred
/// the byte position and the last token are reported
virtual bool parse_error(int position, const std::string& last_token) = 0;
};
using parser_callback_t =
std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
......@@ -62,6 +109,10 @@ class parser
: callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
{}
parser(detail::input_adapter_t adapter, SAX* s)
: m_lexer(adapter), sax(s)
{}
/*!
@brief public parser interface
......@@ -122,6 +173,14 @@ class parser
return not strict or (get_token() == token_type::end_of_input);
}
bool sax_parse()
{
// read first token
get_token();
return sax_parse_internal();
}
private:
/*!
@brief the actual parser
......@@ -520,6 +579,168 @@ class parser
}
}
bool sax_parse_internal()
{
switch (last_token)
{
case token_type::begin_object:
{
if (not sax->start_object(-1))
{
return false;
}
// read next token
get_token();
// closing } -> we are done
if (last_token == token_type::end_object)
{
return sax->end_object();
}
// parse values
while (true)
{
// parse key
if (last_token != token_type::value_string)
{
if (not sax->key(m_lexer.move_string()))
{
return false;
}
}
// parse separator (:)
get_token();
if (last_token != token_type::name_separator)
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
// parse value
get_token();
if (not sax_parse_internal())
{
return false;
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing }
if (last_token == token_type::end_object)
{
return sax->end_object();
}
else
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
}
}
case token_type::begin_array:
{
if (not sax->start_array(-1))
{
return false;
}
// read next token
get_token();
// closing ] -> we are done
if (last_token == token_type::end_array)
{
return sax->end_array();
}
// parse values
while (true)
{
// parse value
if (not sax_parse_internal())
{
return false;
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing ]
if (last_token == token_type::end_array)
{
return sax->end_array();
}
else
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
}
}
case token_type::value_float:
{
const auto res = m_lexer.get_number_float();
if (JSON_UNLIKELY(not std::isfinite(res)))
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
else
{
return sax->number_float(res, m_lexer.move_string());
}
}
case token_type::literal_false:
{
return sax->boolean(false);
}
case token_type::literal_null:
{
return sax->null();
}
case token_type::literal_true:
{
return sax->boolean(true);
}
case token_type::value_integer:
{
return sax->number_integer(m_lexer.get_number_integer());
}
case token_type::value_string:
{
return sax->string(m_lexer.move_string());
}
case token_type::value_unsigned:
{
return sax->number_unsigned(m_lexer.get_number_unsigned());
}
default: // the last token was unexpected
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
}
}
/// get next token from lexer
token_type get_token()
{
......@@ -584,6 +805,8 @@ class parser
token_type expected = token_type::uninitialized;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
/// associated SAX parse event receiver
SAX* sax = nullptr;
};
}
}
......@@ -1054,6 +1054,8 @@ class basic_json
*/
using parse_event_t = typename parser::parse_event_t;
using SAX = typename parser::SAX;
/*!
@brief per-element parser callback type
......@@ -5925,6 +5927,16 @@ class basic_json
return parser(i).accept(true);
}
static bool sax_parse(detail::input_adapter i, SAX* sax)
{
return parser(i, sax).sax_parse();
}
static bool sax_parse(detail::input_adapter& i, SAX* sax)
{
return parser(i, sax).sax_parse();
}
/*!
@brief deserialize from an iterator range with contiguous storage
......@@ -5994,6 +6006,15 @@ class basic_json
return parser(detail::input_adapter(first, last)).accept(true);
}
template<class IteratorType, typename std::enable_if<
std::is_base_of<
std::random_access_iterator_tag,
typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0>
static bool sax_parse(IteratorType first, IteratorType last, SAX* sax)
{
return parser(detail::input_adapter(first, last), sax).sax_parse();
}
/*!
@brief deserialize from stream
@deprecated This stream operator is deprecated and will be removed in
......
......@@ -3175,6 +3175,53 @@ class parser
value
};
struct SAX
{
/// a null value was read
virtual bool null() = 0;
/// a boolean value was read
virtual bool boolean(bool) = 0;
/// an integer number was read
virtual bool number_integer(number_integer_t) = 0;
/// an unsigned integer number was read
virtual bool number_unsigned(number_unsigned_t) = 0;
/// a floating-point number was read
/// the string parameter contains the raw number value
virtual bool number_float(number_float_t, const std::string&) = 0;
/// a string value was read
virtual bool string(const std::string&) = 0;
/// the beginning of an object was read
/// binary formats may report the number of elements
virtual bool start_object(std::size_t elements) = 0;
/// an object key was read
virtual bool key(const std::string&) = 0;
/// the end of an object was read
virtual bool end_object() = 0;
/// the beginning of an array was read
/// binary formats may report the number of elements
virtual bool start_array(std::size_t elements) = 0;
/// the end of an array was read
virtual bool end_array() = 0;
/// a binary value was read
/// examples are CBOR type 2 strings, MessagePack bin, and maybe UBJSON array<uint8t>
virtual bool binary(const std::vector<uint8_t>& vec) = 0;
/// a parse error occurred
/// the byte position and the last token are reported
virtual bool parse_error(int position, const std::string& last_token) = 0;
};
using parser_callback_t =
std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
......@@ -3185,6 +3232,10 @@ class parser
: callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
{}
parser(detail::input_adapter_t adapter, SAX* s)
: m_lexer(adapter), sax(s)
{}
/*!
@brief public parser interface
......@@ -3245,6 +3296,14 @@ class parser
return not strict or (get_token() == token_type::end_of_input);
}
bool sax_parse()
{
// read first token
get_token();
return sax_parse_internal();
}
private:
/*!
@brief the actual parser
......@@ -3643,6 +3702,168 @@ class parser
}
}
bool sax_parse_internal()
{
switch (last_token)
{
case token_type::begin_object:
{
if (not sax->start_object(-1))
{
return false;
}
// read next token
get_token();
// closing } -> we are done
if (last_token == token_type::end_object)
{
return sax->end_object();
}
// parse values
while (true)
{
// parse key
if (last_token != token_type::value_string)
{
if (not sax->key(m_lexer.move_string()))
{
return false;
}
}
// parse separator (:)
get_token();
if (last_token != token_type::name_separator)
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
// parse value
get_token();
if (not sax_parse_internal())
{
return false;
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing }
if (last_token == token_type::end_object)
{
return sax->end_object();
}
else
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
}
}
case token_type::begin_array:
{
if (not sax->start_array(-1))
{
return false;
}
// read next token
get_token();
// closing ] -> we are done
if (last_token == token_type::end_array)
{
return sax->end_array();
}
// parse values
while (true)
{
// parse value
if (not sax_parse_internal())
{
return false;
}
// comma -> next value
get_token();
if (last_token == token_type::value_separator)
{
get_token();
continue;
}
// closing ]
if (last_token == token_type::end_array)
{
return sax->end_array();
}
else
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
}
}
case token_type::value_float:
{
const auto res = m_lexer.get_number_float();
if (JSON_UNLIKELY(not std::isfinite(res)))
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
else
{
return sax->number_float(res, m_lexer.move_string());
}
}
case token_type::literal_false:
{
return sax->boolean(false);
}
case token_type::literal_null:
{
return sax->null();
}
case token_type::literal_true:
{
return sax->boolean(true);
}
case token_type::value_integer:
{
return sax->number_integer(m_lexer.get_number_integer());
}
case token_type::value_string:
{
return sax->string(m_lexer.move_string());
}
case token_type::value_unsigned:
{
return sax->number_unsigned(m_lexer.get_number_unsigned());
}
default: // the last token was unexpected
{
return sax->parse_error(m_lexer.get_position(), m_lexer.get_token_string());
}
}
}
/// get next token from lexer
token_type get_token()
{
......@@ -3707,6 +3928,8 @@ class parser
token_type expected = token_type::uninitialized;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
/// associated SAX parse event receiver
SAX* sax = nullptr;
};
}
}
......@@ -10652,6 +10875,8 @@ class basic_json
*/
using parse_event_t = typename parser::parse_event_t;
using SAX = typename parser::SAX;
/*!
@brief per-element parser callback type
......@@ -15523,6 +15748,16 @@ class basic_json
return parser(i).accept(true);
}
static bool sax_parse(detail::input_adapter i, SAX* sax)
{
return parser(i, sax).sax_parse();
}
static bool sax_parse(detail::input_adapter& i, SAX* sax)
{
return parser(i, sax).sax_parse();
}
/*!
@brief deserialize from an iterator range with contiguous storage
......@@ -15592,6 +15827,15 @@ class basic_json
return parser(detail::input_adapter(first, last)).accept(true);
}
template<class IteratorType, typename std::enable_if<
std::is_base_of<
std::random_access_iterator_tag,
typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0>
static bool sax_parse(IteratorType first, IteratorType last, SAX* sax)
{
return parser(detail::input_adapter(first, last), sax).sax_parse();
}
/*!
@brief deserialize from stream
@deprecated This stream operator is deprecated and will be removed in
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment