Commit ebcef6dc authored by Wez Furlong's avatar Wez Furlong Committed by facebook-github-bot-0

folly: add bser encode/decode for dynamic

Summary:
To support consuming Watchman from within fbcode and hhvm
in particular, these functions add a BSER serialization for the
folly::dynamic data type.

Reviewed By: bhamiltoncx

Differential Revision: D2876539

fb-gh-sync-id: bc49d6bc453cc66cebda7185a5907a6f70970b24
parent 36c00bd2
......@@ -89,6 +89,7 @@ nobase_follyinclude_HEADERS = \
experimental/EliasFanoCoding.h \
experimental/EventCount.h \
experimental/Instructions.h \
experimental/bser/Bser.h \
experimental/fibers/AddTasks.h \
experimental/fibers/AddTasks-inl.h \
experimental/fibers/Baton.h \
......@@ -395,6 +396,8 @@ libfolly_la_SOURCES = \
TimeoutQueue.cpp \
Uri.cpp \
Version.cpp \
experimental/bser/Dump.cpp \
experimental/bser/Load.cpp \
experimental/fibers/Baton.cpp \
experimental/fibers/Fiber.cpp \
experimental/fibers/FiberManager.cpp \
......
/*
* Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <folly/Optional.h>
#include <folly/dynamic.h>
#include <folly/io/IOBuf.h>
#include <folly/io/IOBufQueue.h>
#include <unordered_map>
/* This is an implementation of the BSER binary serialization scheme.
* BSER was created as a binary, local-system-only representation of
* JSON values. It is more space efficient in its output text than JSON,
* and cheaper to decode.
* It has no requirement that string values be UTF-8.
* BSER was created for use with Watchman.
* https://facebook.github.io/watchman/docs/bser.html
*/
namespace folly {
namespace bser {
class BserDecodeError : public std::runtime_error {
public:
using std::runtime_error::runtime_error;
};
enum class BserType : int8_t {
Array = 0,
Object,
String,
Int8,
Int16,
Int32,
Int64,
Real,
True,
False,
Null,
Template,
Skip,
};
extern const uint8_t kMagic[2];
struct serialization_opts {
serialization_opts();
// Whether to sort keys of object values before serializing them.
// Note that this is potentially slow and that it does not apply
// to templated arrays defined via defineTemplate; its keys are always
// emitted in the order defined by the template.
bool sort_keys;
// incremental growth size for the underlying Appender when allocating
// storage for the encoded output
size_t growth_increment;
// BSER allows generating a more space efficient representation of a list of
// object values. These are stored as an "object template" listing the keys
// of the objects ahead of the objects themselves. The objects are then
// serialized without repeating the key string for each element.
//
// You may use the templates field to associate a template with an
// array. You should construct this map after all mutations have been
// performed on the dynamic instance that you intend to serialize as bser,
// as it captures the address of the dynamic to match at encoding time.
// https://facebook.github.io/watchman/docs/bser.html#array-of-templated-objects
using TemplateMap = std::unordered_map<const folly::dynamic*, folly::dynamic>;
folly::Optional<TemplateMap> templates;
};
// parse a BSER value from a variety of sources.
// The complete BSER data must be present to succeed.
folly::dynamic parseBser(folly::StringPiece);
folly::dynamic parseBser(folly::ByteRange);
folly::dynamic parseBser(const folly::IOBuf*);
// When reading incrementally, it is useful to know how much data to
// read to fully decode a BSER pdu.
// Throws std::out_of_range if more data needs to be read to decode
// the header, or throws a runtime_error if the header is invalid
size_t decodePduLength(const folly::IOBuf*);
folly::fbstring toBser(folly::dynamic const&, const serialization_opts&);
std::unique_ptr<folly::IOBuf> toBserIOBuf(folly::dynamic const&,
const serialization_opts&);
}
}
/* vim:ts=2:sw=2:et:
*/
/*
* Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Bser.h"
#include <folly/io/Cursor.h>
using namespace folly;
using folly::io::QueueAppender;
using folly::bser::serialization_opts;
namespace folly {
namespace bser {
const uint8_t kMagic[2] = {0, 1};
static void bserEncode(dynamic const& dyn,
QueueAppender& appender,
const serialization_opts& opts);
serialization_opts::serialization_opts()
: sort_keys(false), growth_increment(8192) {}
static const dynamic* getTemplate(const serialization_opts& opts,
dynamic const& dynArray) {
if (!opts.templates.hasValue()) {
return nullptr;
}
const auto& templates = opts.templates.value();
const auto it = templates.find(&dynArray);
if (it == templates.end()) {
return nullptr;
}
return &it->second;
}
static void bserEncodeInt(int64_t ival, QueueAppender& appender) {
/* Return the smallest size int that can store the value */
auto size =
((ival == ((int8_t)ival)) ? 1 : (ival == ((int16_t)ival))
? 2
: (ival == ((int32_t)ival)) ? 4 : 8);
switch (size) {
case 1:
appender.write((int8_t)BserType::Int8);
appender.write(int8_t(ival));
return;
case 2:
appender.write((int8_t)BserType::Int16);
appender.write(int16_t(ival));
return;
case 4:
appender.write((int8_t)BserType::Int32);
appender.write(int32_t(ival));
return;
case 8:
appender.write((int8_t)BserType::Int64);
appender.write(ival);
return;
default:
throw std::runtime_error("impossible integer size");
}
}
static void bserEncodeString(folly::StringPiece str, QueueAppender& appender) {
appender.write((int8_t)BserType::String);
bserEncodeInt(str.size(), appender);
appender.push((uint8_t*)str.data(), str.size());
}
static void bserEncodeArraySimple(dynamic const& dyn,
QueueAppender& appender,
const serialization_opts& opts) {
appender.write((int8_t)BserType::Array);
bserEncodeInt(dyn.size(), appender);
for (const auto& ele : dyn) {
bserEncode(ele, appender, opts);
}
}
static void bserEncodeArray(dynamic const& dyn,
QueueAppender& appender,
const serialization_opts& opts) {
auto templ = getTemplate(opts, dyn);
if (UNLIKELY(templ != nullptr)) {
appender.write((int8_t)BserType::Template);
// Emit the list of property names
bserEncodeArraySimple(*templ, appender, opts);
// The number of objects in the array
bserEncodeInt(dyn.size(), appender);
// For each object in the array
for (const auto& ele : dyn) {
// For each key in the template
for (const auto& name : *templ) {
if (auto found = ele.get_ptr(name)) {
if (found->isNull()) {
// Prefer to Skip rather than encode a null value for
// compatibility with the other bser implementations
appender.write((int8_t)BserType::Skip);
} else {
bserEncode(*found, appender, opts);
}
} else {
appender.write((int8_t)BserType::Skip);
}
}
}
return;
}
bserEncodeArraySimple(dyn, appender, opts);
}
static void bserEncodeObject(dynamic const& dyn,
QueueAppender& appender,
const serialization_opts& opts) {
appender.write((int8_t)BserType::Object);
bserEncodeInt(dyn.size(), appender);
if (opts.sort_keys) {
std::vector<std::pair<dynamic, dynamic>> sorted(dyn.items().begin(),
dyn.items().end());
std::sort(sorted.begin(), sorted.end());
for (const auto& item : sorted) {
bserEncode(item.first, appender, opts);
bserEncode(item.second, appender, opts);
}
} else {
for (const auto& item : dyn.items()) {
bserEncode(item.first, appender, opts);
bserEncode(item.second, appender, opts);
}
}
}
static void bserEncode(dynamic const& dyn,
QueueAppender& appender,
const serialization_opts& opts) {
switch (dyn.type()) {
case dynamic::Type::NULLT:
appender.write((int8_t)BserType::Null);
return;
case dynamic::Type::BOOL:
appender.write(
(int8_t)(dyn.getBool() ? BserType::True : BserType::False));
return;
case dynamic::Type::DOUBLE: {
double dval = dyn.getDouble();
appender.write((int8_t)BserType::Real);
appender.write(dval);
return;
}
case dynamic::Type::INT64:
bserEncodeInt(dyn.getInt(), appender);
return;
case dynamic::Type::OBJECT:
bserEncodeObject(dyn, appender, opts);
return;
case dynamic::Type::ARRAY:
bserEncodeArray(dyn, appender, opts);
return;
case dynamic::Type::STRING:
bserEncodeString(dyn.getString(), appender);
return;
}
}
std::unique_ptr<folly::IOBuf> toBserIOBuf(folly::dynamic const& dyn,
const serialization_opts& opts) {
IOBufQueue q(IOBufQueue::cacheChainLength());
uint8_t hdrbuf[sizeof(kMagic) + 1 + sizeof(int64_t)];
// Reserve some headroom for the overall PDU size; we'll fill this in
// after we've serialized the data and know the length
auto firstbuf = IOBuf::create(opts.growth_increment);
firstbuf->advance(sizeof(hdrbuf));
q.append(std::move(firstbuf));
// encode the value
QueueAppender appender(&q, opts.growth_increment);
bserEncode(dyn, appender, opts);
// compute the length
auto len = q.chainLength();
if (len > std::numeric_limits<int64_t>::max()) {
throw std::range_error(folly::to<std::string>(
"serialized data size ", len, " is too large to represent as BSER"));
}
// This is a bit verbose, but it computes a header that is appropriate
// to the size of the serialized data
memcpy(hdrbuf, kMagic, sizeof(kMagic));
size_t hdrlen = sizeof(kMagic) + 1;
auto magicptr = hdrbuf + sizeof(kMagic);
auto lenptr = hdrbuf + hdrlen;
if (len > std::numeric_limits<int32_t>::max()) {
*magicptr = (int8_t)BserType::Int64;
*(int64_t*)lenptr = (int64_t)len;
hdrlen += sizeof(int64_t);
} else if (len > std::numeric_limits<int16_t>::max()) {
*magicptr = (int8_t)BserType::Int32;
*(int32_t*)lenptr = (int32_t)len;
hdrlen += sizeof(int32_t);
} else if (len > std::numeric_limits<int8_t>::max()) {
*magicptr = (int8_t)BserType::Int16;
*(int16_t*)lenptr = (int16_t)len;
hdrlen += sizeof(int16_t);
} else {
*magicptr = (int8_t)BserType::Int8;
*(int8_t*)lenptr = (int8_t)len;
hdrlen += sizeof(int8_t);
}
// and place the data in the headroom
q.prepend(hdrbuf, hdrlen);
return q.move();
}
fbstring toBser(dynamic const& dyn, const serialization_opts& opts) {
auto buf = toBserIOBuf(dyn, opts);
return buf->moveToFbString();
}
}
}
/* vim:ts=2:sw=2:et:
*/
/*
* Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Bser.h"
#include <folly/io/Cursor.h>
#include <folly/String.h>
using namespace folly;
using folly::io::Cursor;
namespace folly {
namespace bser {
static dynamic parseBser(Cursor& curs);
template <typename... ARGS>
static FOLLY_NORETURN void throwDecodeError(Cursor& curs, ARGS&&... args) {
throw BserDecodeError(folly::to<std::string>(std::forward<ARGS>(args)...,
" with ",
curs.length(),
" bytes remaining in cursor"));
}
static int64_t decodeInt(Cursor& curs) {
auto enc = (BserType)curs.read<int8_t>();
switch (enc) {
case BserType::Int8:
return curs.read<int8_t>();
case BserType::Int16:
return curs.read<int16_t>();
case BserType::Int32:
return curs.read<int32_t>();
case BserType::Int64:
return curs.read<int64_t>();
default:
throwDecodeError(
curs, "invalid integer encoding detected (", (int8_t)enc, ")");
}
}
static fbstring decodeString(Cursor& curs) {
auto len = decodeInt(curs);
folly::fbstring str;
if (len < 0) {
throw std::range_error("string length must not be negative");
}
str.reserve(len);
size_t available = curs.length();
while (available < (size_t)len) {
if (available == 0) {
// Saw this case when we decodeHeader was returning the incorrect length
// and we were splitting off too few bytes from the IOBufQueue
throwDecodeError(curs,
"no data available while decoding a string, header was "
"not decoded properly");
}
str.append(reinterpret_cast<const char*>(curs.data()), available);
curs.skipAtMost(available);
len -= available;
available = curs.length();
}
str.append(reinterpret_cast<const char*>(curs.data()), len);
curs.skipAtMost(len);
return str;
}
static dynamic decodeArray(Cursor& curs) {
dynamic arr{};
auto size = decodeInt(curs);
while (size-- > 0) {
arr.push_back(parseBser(curs));
}
return arr;
}
static dynamic decodeObject(Cursor& curs) {
dynamic obj = dynamic::object;
auto size = decodeInt(curs);
while (size-- > 0) {
if ((BserType)curs.read<int8_t>() != BserType::String) {
throwDecodeError(curs, "expected String");
}
auto key = decodeString(curs);
obj[key] = parseBser(curs);
}
return obj;
}
static dynamic decodeTemplate(Cursor& curs) {
std::vector<dynamic> arr;
// List of property names
if ((BserType)curs.read<int8_t>() != BserType::Array) {
throw std::runtime_error("Expected array encoding for property names");
}
auto names = decodeArray(curs);
auto size = decodeInt(curs);
arr.reserve(size);
while (size-- > 0) {
dynamic obj = dynamic::object;
for (auto& name : names) {
auto pair = curs.peek();
if ((BserType)pair.first[0] == BserType::Skip) {
obj[name.getString()] = nullptr;
curs.skipAtMost(1);
continue;
}
obj[name.getString()] = parseBser(curs);
}
arr.emplace_back(std::move(obj));
}
return dynamic(std::move(arr));
}
static dynamic parseBser(Cursor& curs) {
switch ((BserType)curs.read<int8_t>()) {
case BserType::Int8:
return curs.read<int8_t>();
case BserType::Int16:
return curs.read<int16_t>();
case BserType::Int32:
return curs.read<int32_t>();
case BserType::Int64:
return curs.read<int64_t>();
case BserType::Real: {
double dval;
curs.pull((void*)&dval, sizeof(dval));
return dval;
}
case BserType::Null:
return nullptr;
case BserType::True:
return (bool)true;
case BserType::False:
return (bool)false;
case BserType::String:
return decodeString(curs);
case BserType::Array:
return decodeArray(curs);
case BserType::Object:
return decodeObject(curs);
case BserType::Template:
return decodeTemplate(curs);
case BserType::Skip:
throw std::runtime_error(
"Skip not valid at this location in the bser stream");
default:
throw std::runtime_error("invalid bser encoding");
}
}
static size_t decodeHeader(Cursor& curs) {
char header[sizeof(kMagic)];
curs.pull(header, sizeof(header));
if (memcmp(header, kMagic, sizeof(kMagic))) {
throw std::runtime_error("invalid BSER magic header");
}
auto enc = (BserType)curs.peek().first[0];
size_t int_size;
switch (enc) {
case BserType::Int8:
int_size = 1;
break;
case BserType::Int16:
int_size = 2;
break;
case BserType::Int32:
int_size = 4;
break;
case BserType::Int64:
int_size = 8;
break;
default:
int_size = 0;
}
return int_size + 3 /* magic + int type */ + decodeInt(curs);
}
size_t decodePduLength(const folly::IOBuf* buf) {
Cursor curs(buf);
return decodeHeader(curs);
}
folly::dynamic parseBser(const IOBuf* buf) {
Cursor curs(buf);
decodeHeader(curs);
return parseBser(curs);
}
folly::dynamic parseBser(ByteRange str) {
auto buf = IOBuf::wrapBuffer(str.data(), str.size());
return parseBser(&*buf);
}
folly::dynamic parseBser(StringPiece str) {
return parseBser(ByteRange((uint8_t*)str.data(), str.size()));
}
}
}
/* vim:ts=2:sw=2:et:
*/
/*
* Copyright 2016 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/bser/Bser.h>
#include <folly/String.h>
#include <gtest/gtest.h>
using folly::dynamic;
static const dynamic roundtrips[] = {
1,
std::numeric_limits<int8_t>::max(),
std::numeric_limits<int16_t>::max(),
std::numeric_limits<int32_t>::max(),
std::numeric_limits<int64_t>::max(),
std::numeric_limits<int8_t>::min(),
std::numeric_limits<int16_t>::min(),
std::numeric_limits<int32_t>::min(),
std::numeric_limits<int64_t>::min(),
bool(true),
bool(false),
nullptr,
1.5,
"hello",
{1, 2, 3},
dynamic::object("key", "value")("otherkey", "otherval"),
};
// Here's a blob from the watchman test suite
const uint8_t template_blob[] =
"\x00\x01\x03\x28"
"\x0b\x00\x03\x02\x02\x03\x04\x6e\x61\x6d\x65\x02"
"\x03\x03\x61\x67\x65\x03\x03\x02\x03\x04\x66\x72"
"\x65\x64\x03\x14\x02\x03\x04\x70\x65\x74\x65\x03"
"\x1e\x0c\x03\x19";
// and here's what it represents
static const dynamic template_dynamic = {
dynamic::object("name", "fred")("age", 20),
dynamic::object("name", "pete")("age", 30),
dynamic::object("name", nullptr)("age", 25),
};
TEST(Bser, RoundTrip) {
dynamic decoded(nullptr);
folly::fbstring str;
for (const auto& dyn : roundtrips) {
try {
str = folly::bser::toBser(dyn, folly::bser::serialization_opts());
decoded = folly::bser::parseBser(str);
EXPECT_EQ(decoded, dyn);
} catch (const std::exception& err) {
LOG(ERROR) << err.what() << "\nInput: " << dyn.typeName() << ": " << dyn
<< " decoded back as " << decoded.typeName() << ": " << decoded
<< "\n" << folly::hexDump(str.data(), str.size());
throw;
}
}
}
TEST(Bser, Template) {
dynamic decoded(nullptr);
folly::fbstring str;
// Decode the template value provided from elsewhere
decoded = folly::bser::parseBser(
folly::ByteRange(template_blob, sizeof(template_blob) - 1));
EXPECT_EQ(decoded, template_dynamic)
<< "Didn't load template value."
"\nInput: " << template_dynamic.typeName() << ": " << template_dynamic
<< " decoded back as " << decoded.typeName() << ": " << decoded << "\n"
<< folly::hexDump(template_blob, sizeof(template_blob) - 1);
// Now check that we can generate this same data representation
folly::bser::serialization_opts opts;
folly::bser::serialization_opts::TemplateMap templates = {
std::make_pair(&decoded, folly::dynamic{"name", "age"})};
opts.templates = templates;
str = folly::bser::toBser(decoded, opts);
EXPECT_EQ(folly::ByteRange((const uint8_t*)str.data(), str.size()),
folly::ByteRange(template_blob, sizeof(template_blob) - 1))
<< "Expected:\n"
<< folly::hexDump(template_blob, sizeof(template_blob) - 1) << "\nGot:\n"
<< folly::hexDump(str.data(), str.size());
}
TEST(Bser, PduLength) {
EXPECT_THROW([] {
// Try to decode PDU for a short buffer that doesn't even have the
// complete length available
auto buf = folly::IOBuf::wrapBuffer(template_blob, 3);
auto len = folly::bser::decodePduLength(&*buf);
LOG(ERROR) << "managed to return a length, but only had 3 bytes";
}(), std::out_of_range);
auto buf = folly::IOBuf::wrapBuffer(template_blob, sizeof(template_blob));
auto len = folly::bser::decodePduLength(&*buf);
EXPECT_EQ(len, 44) << "PduLength should be 44, got " << len;
}
/* vim:ts=2:sw=2:et:
*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment