Unverified Commit be4fba7b authored by Niels Lohmann's avatar Niels Lohmann

Merge branch 'develop' of https://github.com/nlohmann/json into develop

parents f193427e 30946404
json_unit
json_benchmarks
json_benchmarks_simple
fuzz-testing
*.dSYM
......
all: json_benchmarks
./json_benchmarks
json_benchmarks: src/benchmarks.cpp ../src/json.hpp number_jsons
#
# Build/run json.hpp benchmarks, eg. CXX=g++-7 make
#
# The existing json_benchmarks did not allow optimization under some compilers
#
all: json_benchmarks json_benchmarks_simple number_jsons
bash -c 'time ./json_benchmarks'
bash -c 'time ./json_benchmarks_simple'
json_benchmarks: src/benchmarks.cpp ../src/json.hpp
$(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@
json_benchmarks_simple: src/benchmarks_simple.cpp ../src/json.hpp
$(CXX) -std=c++11 $(CXXFLAGS) -DNDEBUG -O3 -flto -I../src $(<) $(LDFLAGS) -o $@
number_jsons:
(test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py)
clean:
rm -f json_benchmarks files/numbers/*.json
rm -f json_benchmarks json_benchmarks_simple files/numbers/*.json
......@@ -34,6 +34,19 @@ static void bench(benchpress::context& ctx,
{
// using string streams for benchmarking to factor-out cold-cache disk
// access.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
......@@ -43,11 +56,12 @@ static void bench(benchpress::context& ctx,
// read the stream once
json j;
j << istr;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif
switch (mode)
{
......@@ -62,7 +76,7 @@ static void bench(benchpress::context& ctx,
istr.clear();
istr.seekg(0);
json j;
j << istr;
istr >> j;
}
break;
......@@ -74,7 +88,7 @@ static void bench(benchpress::context& ctx,
{
// create JSON value from input
json j;
j << istr;
istr >> j;
std::stringstream ostr;
ctx.reset_timer();
......
//
// benchmarks_simple.cpp -- a less complex version of benchmarks.cpp, that better reflects actual performance
//
// For some reason, the complexity of benchmarks.cpp doesn't allow
// the compiler to optimize code using json.hpp effectively. The
// exact same tests, with the use of benchpress and cxxopts produces
// much faster code, at least under g++.
//
#include <fstream>
#include <iostream>
#include <chrono>
#include <list>
#include <tuple>
#include <json.hpp>
using json = nlohmann::json;
enum class EMode { input, output, indent };
static double bench(const EMode mode, size_t iters, const std::string& in_path )
{
// using string streams for benchmarking to factor-out cold-cache disk
// access. Define FROMFILE to use file I/O instead.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
std::ifstream input_file(in_path);
istr << input_file.rdbuf();
input_file.close();
// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif
double tps = 0;
switch (mode)
{
// benchmarking input
case EMode::input:
{
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
// clear flags and rewind
istr.clear();
istr.seekg(0);
json j;
istr >> j;
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}
// benchmarking output
case EMode::output:
case EMode::indent:
{
// create JSON value from input
json j;
istr >> j;
std::stringstream ostr;
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
if (mode == EMode::indent)
{
ostr << j;
}
else
{
ostr << std::setw(4) << j;
}
// reset data
ostr.str(std::string());
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}
}
return tps;
}
template <typename T>
struct average {
T _sum { 0 };
size_t _count { 0 };
T operator+=( const T &val_ ) { _sum += val_; +_count++; return val_; }
operator T() { return _sum / _count; }
};
// Execute each test approximately enough times to get near 1
// transaction per second, and compute the average; a single aggregate
// number that gives a performance metric representing both parsing
// and output.
int main( int, char ** )
{
std::list<std::tuple<std::string, EMode, size_t, std::string>> tests {
{ "parse jeopardy.json", EMode::input, 2, "files/jeopardy/jeopardy.json" },
{ "parse canada.json", EMode::input, 30, "files/nativejson-benchmark/canada.json" },
{ "parse citm_catalog.json", EMode::input, 120, "files/nativejson-benchmark/citm_catalog.json" },
{ "parse twitter.json", EMode::input, 225, "files/nativejson-benchmark/twitter.json" },
{ "parse floats.json", EMode::input, 5, "files/numbers/floats.json" },
{ "parse signed_ints.json", EMode::input, 6, "files/numbers/signed_ints.json" },
{ "parse unsigned_ints.json", EMode::input, 6, "files/numbers/unsigned_ints.json" },
{ "dump jeopardy.json", EMode::output, 5, "files/jeopardy/jeopardy.json" },
{ "dump jeopardy.json w/ind.", EMode::indent, 5, "files/jeopardy/jeopardy.json" },
{ "dump floats.json", EMode::output, 2, "files/numbers/floats.json" },
{ "dump signed_ints.json", EMode::output, 20, "files/numbers/signed_ints.json" },
};
average<double> avg;
for ( auto t : tests ) {
std::string name, path;
EMode mode;
size_t iters;
std::tie(name, mode, iters, path) = t;
auto tps = bench( mode, iters, path );
avg += tps;
std::cout
<< std::left
<< std::setw( 30 ) << name
<< std::right
<< " x " << std::setw( 3 ) << iters
<< std::left
<< " == " << std::setw( 10 ) << tps
<< std::right
<< " TPS, " << std::setw( 8 ) << std::round( tps * 1e6 / iters )
<< " ms/op"
<< std::endl;
}
std::cout << std::setw( 40 ) << "" << std::string( 10, '-' ) << std::endl;
std::cout << std::setw( 40 ) << "" << std::setw( 10 ) << std::left << avg << " TPS Average" << std::endl;
return 0;
}
......@@ -1394,123 +1394,97 @@ constexpr T static_const<T>::value;
// input adapters //
////////////////////
/// abstract input adapter interface
/*!
@brief abstract input adapter interface
Produces a stream of std::char_traits<char>::int_type characters from a
std::istream, a buffer, or some other input type. Accepts the return of exactly
one non-EOF character for future input. The int_type characters returned
consist of all valid char values as positive values (typically unsigned char),
plus an EOF value outside that range, specified by the value of the function
std::char_traits<char>::eof(). This value is typically -1, but could be any
arbitrary value which is not a valid char value.
@return Typically [0,255] plus std::char_traits<char>::eof().
*/
struct input_adapter_protocol
{
virtual int get_character() = 0;
virtual std::string read(std::size_t offset, std::size_t length) = 0;
virtual std::char_traits<char>::int_type get_character() = 0;
virtual void unget_character() = 0; // restore the last non-eof() character to input
virtual ~input_adapter_protocol() = default;
};
/// a type to simplify interfaces
using input_adapter_t = std::shared_ptr<input_adapter_protocol>;
/// input adapter for cached stream input
template<std::size_t BufferSize>
class cached_input_stream_adapter : public input_adapter_protocol
/// input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at
/// beginning of input. Does not support changing the underlying std::streambuf
/// in mid-input. Maintains underlying std::istream and std::streambuf to
/// support subsequent use of standard std::istream operations to process any
/// input characters following those used in parsing the JSON input. Clears the
/// std::istream flags; any input errors (eg. EOF) will be detected by the first
/// subsequent call for input from the std::istream.
class input_stream_adapter : public input_adapter_protocol
{
public:
explicit cached_input_stream_adapter(std::istream& i)
: is(i), start_position(is.tellg())
{
fill_buffer();
// skip byte order mark
if (fill_size >= 3 and buffer[0] == '\xEF' and buffer[1] == '\xBB' and buffer[2] == '\xBF')
{
buffer_pos += 3;
processed_chars += 3;
}
}
~cached_input_stream_adapter() override
~input_stream_adapter() override
{
// clear stream flags
is.clear();
// We initially read a lot of characters into the buffer, and we may
// not have processed all of them. Therefore, we need to "rewind" the
// stream after the last processed char.
is.seekg(start_position);
is.ignore(static_cast<std::streamsize>(processed_chars));
// clear stream flags
// clear stream flags; we use underlying streambuf I/O, do not maintain ifstream flags
is.clear();
}
int get_character() override
explicit input_stream_adapter(std::istream& i)
: is(i)
, sb(*i.rdbuf())
{
// check if refilling is necessary and possible
if (buffer_pos == fill_size and not eof)
// Ignore Byte Order Mark at start of input
std::char_traits<char>::int_type c;
if (( c = get_character() ) == 0xEF )
{
fill_buffer();
// check and remember that filling did not yield new input
if (fill_size == 0)
if (( c = get_character() ) == 0xBB )
{
eof = true;
return std::char_traits<char>::eof();
if (( c = get_character() ) == 0xBF )
{
return; // Ignore BOM
}
// the buffer is ready
buffer_pos = 0;
else if ( c != std::char_traits<char>::eof() )
{
is.unget();
}
++processed_chars;
assert(buffer_pos < buffer.size());
return buffer[buffer_pos++] & 0xFF;
is.putback( '\xBB' );
}
std::string read(std::size_t offset, std::size_t length) override
else if ( c != std::char_traits<char>::eof() )
{
// create buffer
std::string result(length, '\0');
// save stream position
const auto current_pos = is.tellg();
// save stream flags
const auto flags = is.rdstate();
// clear stream flags
is.clear();
// set stream position
is.seekg(static_cast<std::streamoff>(offset));
// read bytes
is.read(&result[0], static_cast<std::streamsize>(length));
is.unget();
}
is.putback( '\xEF' );
}
else if ( c != std::char_traits<char>::eof() )
{
is.unget(); // Not BOM. Process as usual.
}
}
// reset stream position
is.seekg(current_pos);
// reset stream flags
is.setstate(flags);
// delete because of pointer members
input_stream_adapter(const input_stream_adapter&) = delete;
input_stream_adapter& operator=(input_stream_adapter&) = delete;
return result;
// std::istream/std::streambuf use std::char_traits<char>::to_int_type, to
// ensure that std::char_traits<char>::eof() and the character 0xff do not
// end up as the same value, eg. 0xffffffff.
std::char_traits<char>::int_type get_character() override
{
return sb.sbumpc();
}
private:
void fill_buffer()
void unget_character() override
{
// fill
is.read(buffer.data(), static_cast<std::streamsize>(buffer.size()));
// store number of bytes in the buffer
fill_size = static_cast<size_t>(is.gcount());
sb.sungetc(); // Avoided for performance: is.unget();
}
private:
/// the associated input stream
std::istream& is;
/// chars returned via get_character()
std::size_t processed_chars = 0;
/// chars processed in the current buffer
std::size_t buffer_pos = 0;
/// whether stream reached eof
bool eof = false;
/// how many chars have been copied to the buffer by last (re)fill
std::size_t fill_size = 0;
/// position of the stream when we started
const std::streampos start_position;
/// internal buffer
std::array<char, BufferSize> buffer{{}};
std::streambuf &sb;
};
/// input adapter for buffer input
......@@ -1531,21 +1505,22 @@ class input_buffer_adapter : public input_adapter_protocol
input_buffer_adapter(const input_buffer_adapter&) = delete;
input_buffer_adapter& operator=(input_buffer_adapter&) = delete;
int get_character() noexcept override
std::char_traits<char>::int_type get_character() noexcept override
{
if (JSON_LIKELY(cursor < limit))
{
return *(cursor++) & 0xFF;
return std::char_traits<char>::to_int_type(*(cursor++));
}
return std::char_traits<char>::eof();
}
std::string read(std::size_t offset, std::size_t length) override
void unget_character() noexcept override
{
if (JSON_LIKELY(cursor > start))
{
// avoid reading too many characters
const auto max_length = static_cast<size_t>(limit - start);
return std::string(start + offset, (std::min)(length, max_length - offset));
--cursor;
}
}
private:
......@@ -1564,11 +1539,11 @@ class input_adapter
/// input adapter for input stream
input_adapter(std::istream& i)
: ia(std::make_shared<cached_input_stream_adapter<16384>>(i)) {}
: ia(std::make_shared<input_stream_adapter>(i)) {}
/// input adapter for input stream
input_adapter(std::istream&& i)
: ia(std::make_shared<cached_input_stream_adapter<16384>>(i)) {}
: ia(std::make_shared<input_stream_adapter>(i)) {}
/// input adapter for buffer
template<typename CharT,
......@@ -1845,9 +1820,9 @@ class lexer
@brief scan a string literal
This function scans a string according to Sect. 7 of RFC 7159. While
scanning, bytes are escaped and copied into buffer yytext. Then the
function returns successfully, yytext is null-terminated and yylen
contains the number of bytes in the string.
scanning, bytes are escaped and copied into buffer yytext. Then the function
returns successfully, yytext is *not* null-terminated (as it may contain \0
bytes), and yytext.size() is the number of bytes in the string.
@return token_type::value_string if string could be successfully scanned,
token_type::parse_error otherwise
......@@ -1878,9 +1853,6 @@ class lexer
// closing quote
case '\"':
{
// terminate yytext
add('\0');
--yylen;
return token_type::value_string;
}
......@@ -2624,12 +2596,7 @@ scan_number_any2:
scan_number_done:
// unget the character after the number (we only read it to know that
// we are done scanning a number)
--chars_read;
next_unget = true;
// terminate token
add('\0');
--yylen;
unget();
char* endptr = nullptr;
errno = 0;
......@@ -2640,7 +2607,7 @@ scan_number_done:
const auto x = std::strtoull(yytext.data(), &endptr, 10);
// we checked the number format before
assert(endptr == yytext.data() + yylen);
assert(endptr == yytext.data() + yytext.size());
if (errno == 0)
{
......@@ -2656,7 +2623,7 @@ scan_number_done:
const auto x = std::strtoll(yytext.data(), &endptr, 10);
// we checked the number format before
assert(endptr == yytext.data() + yylen);
assert(endptr == yytext.data() + yytext.size());
if (errno == 0)
{
......@@ -2673,7 +2640,7 @@ scan_number_done:
strtof(value_float, yytext.data(), &endptr);
// we checked the number format before
assert(endptr == yytext.data() + yylen);
assert(endptr == yytext.data() + yytext.size());
return token_type::value_float;
}
......@@ -2702,32 +2669,51 @@ scan_number_done:
// input management
/////////////////////
/// reset yytext
/// reset yytext; current character is beginning of token
void reset() noexcept
{
yylen = 0;
start_pos = chars_read - 1;
yytext.clear();
token_string.clear();
token_string.push_back(std::char_traits<char>::to_char_type(current));
}
/// get a character from the input
int get()
/*
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns a
`std::char_traits<char>::eof()` in that case. Stores the scanned characters
for use in error messages.
@return character read from the input
*/
std::char_traits<char>::int_type get()
{
++chars_read;
return next_unget ? (next_unget = false, current)
: (current = ia->get_character());
current = ia->get_character();
if (JSON_LIKELY( current != std::char_traits<char>::eof()))
{
token_string.push_back(std::char_traits<char>::to_char_type(current));
}
return current;
}
/// add a character to yytext
void add(int c)
/// unget current character (return it again on next get)
void unget()
{
// resize yytext if necessary; this condition is deemed unlikely,
// because we start with a 1024-byte buffer
if (JSON_UNLIKELY((yylen + 1 > yytext.capacity())))
--chars_read;
if (JSON_LIKELY(current != std::char_traits<char>::eof()))
{
yytext.resize(2 * yytext.capacity(), '\0');
ia->unget_character();
assert(token_string.size() != 0);
token_string.pop_back();
}
assert(yylen < yytext.size());
yytext[yylen++] = static_cast<char>(c);
}
/// add a character to yytext
void add(int c)
{
yytext.push_back(std::char_traits<char>::to_char_type(c));
}
public:
......@@ -2753,12 +2739,10 @@ scan_number_done:
return value_float;
}
/// return string value
const std::string get_string()
/// return current string value (implicitly resets the token; useful only once)
std::string move_string()
{
// yytext cannot be returned as char*, because it may contain a null
// byte (parsed as "\u0000")
return std::string(yytext.data(), yylen);
return std::move( yytext );
}
/////////////////////
......@@ -2771,22 +2755,16 @@ scan_number_done:
return chars_read;
}
/// return the last read token (for errors only)
/// return the last read token (for errors only). Will never contain EOF
/// (an arbitrary value that is not a valid char value, often -1), because
/// 255 may legitimately occur. May contain NUL, which should be escaped.
std::string get_token_string() const
{
// get the raw byte sequence of the last token
std::string s = ia->read(start_pos, chars_read - start_pos);
// escape control characters
std::string result;
for (auto c : s)
for (auto c : token_string)
{
if (c == '\0' or c == std::char_traits<char>::eof())
{
// ignore EOF
continue;
}
else if ('\x00' <= c and c <= '\x1f')
if ('\x00' <= c and c <= '\x1f')
{
// escape control characters
std::stringstream ss;
......@@ -2883,20 +2861,16 @@ scan_number_done:
detail::input_adapter_t ia = nullptr;
/// the current character
int current = std::char_traits<char>::eof();
/// whether get() should return the last character again
bool next_unget = false;
std::char_traits<char>::int_type current = std::char_traits<char>::eof();
/// the number of characters read
std::size_t chars_read = 0;
/// the start position of the current token
std::size_t start_pos = 0;
/// raw input token string (for error messages)
std::vector<char> token_string { };
/// buffer for variable-length tokens (numbers, strings)
std::vector<char> yytext = std::vector<char>(1024, '\0');
/// current index in yytext
std::size_t yylen = 0;
std::string yytext { };
/// a description of occurred lexer errors
const char* error_message = "";
......@@ -3034,12 +3008,20 @@ class parser
{
case token_type::begin_object:
{
if (keep and (not callback or ((keep = callback(depth++, parse_event_t::object_start, result)))))
if (keep)
{
if (callback)
{
keep = callback(depth++, parse_event_t::object_start, result);
}
if (not callback or keep)
{
// explicitly set result to object to cope with {}
result.m_type = value_t::object;
result.m_value = value_t::object;
}
}
// read next token
get_token();
......@@ -3065,7 +3047,7 @@ class parser
{
return;
}
key = m_lexer.get_string();
key = m_lexer.move_string();
bool keep_tag = false;
if (keep)
......@@ -3130,12 +3112,20 @@ class parser
case token_type::begin_array:
{
if (keep and (not callback or ((keep = callback(depth++, parse_event_t::array_start, result)))))
if (keep)
{
if (callback)
{
keep = callback(depth++, parse_event_t::array_start, result);
}
if (not callback or keep)
{
// explicitly set result to object to cope with []
// explicitly set result to array to cope with []
result.m_type = value_t::array;
result.m_value = value_t::array;
}
}
// read next token
get_token();
......@@ -3203,7 +3193,7 @@ class parser
case token_type::value_string:
{
result.m_type = value_t::string;
result.m_value = m_lexer.get_string();
result.m_value = m_lexer.move_string();
break;
}
......@@ -5205,7 +5195,7 @@ class binary_reader
@brief get next character from the input
This function provides the interface to the used input adapter. It does
not throw in case the input reached EOF, but returns
not throw in case the input reached EOF, but returns a -'ve valued
`std::char_traits<char>::eof()` in that case.
@return character read from the input
......@@ -5276,7 +5266,7 @@ class binary_reader
{
get();
check_eof();
return current;
return static_cast<char>(current);
});
return result;
}
......
......@@ -66,6 +66,22 @@ set_target_properties(catch_main PROPERTIES
)
target_include_directories(catch_main PRIVATE "thirdparty/catch")
# https://stackoverflow.com/questions/2368811/how-to-set-warning-level-in-cmake
if(MSVC)
# Force to always compile with W4
if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
# Disable warning C4389: '==': signed/unsigned mismatch
# Disable warning C4309: 'static_cast': truncation of constant value
# Disable warning C4566: character represented by universal-character-name '\uFF01' cannot be represented in the current code page (1252)
# Disable warning C4996: 'nlohmann::basic_json<std::map,std::vector,std::string,bool,int64_t,uint64_t,double,std::allocator,nlohmann::adl_serializer>::operator <<': was declared deprecated
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4389 /wd4309 /wd4566 /wd4996")
endif()
#############################################################################
# one executable for each unit test file
#############################################################################
......
......@@ -215,7 +215,7 @@ TEST_CASE("parser class")
std::string s = "\"1\"";
s[1] = '\0';
CHECK_THROWS_AS(json::parse(s.begin(), s.end()), json::parse_error&);
CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"'");
CHECK_THROWS_WITH(json::parse(s.begin(), s.end()), "[json.exception.parse_error.101] parse error at 2: syntax error - invalid string: control character must be escaped; last read: '\"<U+0000>'");
}
}
......
......@@ -247,7 +247,7 @@ TEST_CASE("constructors")
SECTION("std::pair")
{
std::pair<float, std::string> p{1.0, "string"};
std::pair<float, std::string> p{1.0f, "string"};
json j(p);
CHECK(j.type() == json::value_t::array);
......
......@@ -38,6 +38,11 @@ using nlohmann::json;
#include <unordered_set>
#include <iostream>
#if defined(_MSC_VER)
#pragma warning (push)
#pragma warning (disable : 4189) // local variable is initialized but not referenced
#endif
TEST_CASE("README", "[hide]")
{
{
......@@ -298,3 +303,7 @@ TEST_CASE("README", "[hide]")
std::cout.rdbuf(old_cout_buffer);
}
}
#if defined(_MSC_VER)
#pragma warning (pop)
#endif
......@@ -1233,4 +1233,24 @@ TEST_CASE("regression tests")
"[json.exception.type_error.302] type must be array, but is null");
}
}
SECTION("issue #367 - Behavior of operator>> should more closely resemble that of built-in overloads.")
{
SECTION("example 1")
{
std::istringstream i1_2_3( "{\"first\": \"one\" }{\"second\": \"two\"}3" );
json j1, j2, j3;
i1_2_3 >> j1;
i1_2_3 >> j2;
i1_2_3 >> j3;
std::map<std::string,std::string> m1 = j1;
std::map<std::string,std::string> m2 = j2;
int i3 = j3;
CHECK( m1 == ( std::map<std::string,std::string> {{ "first", "one" }} ));
CHECK( m2 == ( std::map<std::string,std::string> {{ "second", "two" }} ));
CHECK( i3 == 3 );
}
}
}
......@@ -201,7 +201,7 @@ void from_json(const BasicJsonType& j, country& c)
{
{u8"中华人民共和国", country::china},
{"France", country::france},
{"Российская Федерация", country::russia}
{u8"Российская Федерация", country::russia}
};
const auto it = m.find(str);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment