Commit 0558d398 authored by Tudor Bosman's avatar Tudor Bosman Committed by Sara Golemon

Varint in folly

Test Plan: test added

Reviewed By: alandau@fb.com

FB internal diff: D928835
parent c55ca702
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_VARINT_H_
#define FOLLY_VARINT_H_
#include "folly/Range.h"
namespace folly {
/**
* Variable-length integer encoding, using a little-endian, base-128
* representation.
*
* The MSb is set on all bytes except the last.
*
* Details:
* https://developers.google.com/protocol-buffers/docs/encoding#varints
*
* If you want to encode multiple values, GroupVarint (in GroupVarint.h)
* is faster and likely smaller.
*/
/**
* Maximum length (in bytes) of the varint encoding of a 32-bit value.
*/
constexpr size_t kMaxVarintLength32 = 5;
/**
* Maximum length (in bytes) of the varint encoding of a 64-bit value.
*/
constexpr size_t kMaxVarintLength64 = 10;
/**
* Encode a value in the given buffer, returning the number of bytes used
* for encoding.
* buf must have enough space to represent the value (at least
* kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
*/
size_t encodeVarint(uint64_t val, uint8_t* buf);
/**
* Decode a value from a given buffer, advances data past the returned value.
*/
uint64_t decodeVarint(ByteRange& data);
/**
* ZigZag encoding that maps signed integers with a small absolute value
* to unsigned integers with a small (positive) values. Without this,
* encoding negative values using Varint would use up 9 or 10 bytes.
*
* if x >= 0, encodeZigZag(x) == 2*x
* if x < 0, encodeZigZag(x) == -2*x + 1
*/
inline uint64_t encodeZigZag(int64_t val) {
// Bit-twiddling magic stolen from the Google protocol buffer document;
// val >> 63 is an arithmetic shift because val is signed
return static_cast<uint64_t>((val << 1) ^ (val >> 63));
}
inline int64_t decodeZigZag(uint64_t val) {
return static_cast<int64_t>((val >> 1) ^ -(val & 1));
}
// Implementation below
inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
uint8_t* p = buf;
while (val >= 128) {
*p++ = 0x80 | (val & 0x7f);
val >>= 7;
}
*p++ = val;
return p - buf;
}
inline uint64_t decodeVarint(ByteRange& data) {
const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
const int8_t* p = begin;
uint64_t val = 0;
if (LIKELY(end - begin >= kMaxVarintLength64)) { // fast path
int64_t b;
do {
b = *p++; val = (b & 0x7f) ; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 7; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 14; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 21; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 28; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 35; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 42; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 49; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 56; if (b >= 0) break;
b = *p++; val |= (b & 0x7f) << 63; if (b >= 0) break;
throw std::invalid_argument("Invalid varint value"); // too big
} while (false);
} else {
int shift = 0;
while (p != end && *p < 0) {
val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
shift += 7;
}
if (p == end) throw std::invalid_argument("Invalid varint value");
val |= static_cast<uint64_t>(*p++) << shift;
}
data.advance(p - begin);
return val;
}
} // namespaces
#endif /* FOLLY_VARINT_H_ */
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "folly/Varint.h"
#include <array>
#include <initializer_list>
#include <random>
#include <vector>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "folly/Benchmark.h"
#include "folly/Random.h"
DEFINE_int32(random_seed, folly::randomNumberSeed(), "random seed");
namespace folly { namespace test {
void testVarint(uint64_t val, std::initializer_list<uint8_t> bytes) {
size_t n = bytes.size();
ByteRange expected(&*bytes.begin(), n);
{
uint8_t buf[kMaxVarintLength64];
EXPECT_EQ(expected.size(), encodeVarint(val, buf));
EXPECT_TRUE(ByteRange(buf, expected.size()) == expected);
}
{
ByteRange r = expected;
uint64_t decoded = decodeVarint(r);
EXPECT_TRUE(r.empty());
EXPECT_EQ(val, decoded);
}
if (n < kMaxVarintLength64) {
// Try from a full buffer too, different code path
uint8_t buf[kMaxVarintLength64];
memcpy(buf, &*bytes.begin(), n);
uint8_t fills[] = {0, 0x7f, 0x80, 0xff};
for (uint8_t fill : fills) {
memset(buf + n, fill, kMaxVarintLength64 - n);
ByteRange r(buf, kMaxVarintLength64);
uint64_t decoded = decodeVarint(r);
EXPECT_EQ(val, decoded);
EXPECT_EQ(kMaxVarintLength64 - n, r.size());
}
}
}
TEST(Varint, Simple) {
testVarint(0, {0});
testVarint(1, {1});
testVarint(127, {127});
testVarint(128, {0x80, 0x01});
testVarint(300, {0xac, 0x02});
testVarint(16383, {0xff, 0x7f});
testVarint(16384, {0x80, 0x80, 0x01});
testVarint(static_cast<uint32_t>(-1),
{0xff, 0xff, 0xff, 0xff, 0x0f});
testVarint(static_cast<uint64_t>(-1),
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01});
}
TEST(ZigZag, Simple) {
EXPECT_EQ(0, encodeZigZag(0));
EXPECT_EQ(1, encodeZigZag(-1));
EXPECT_EQ(2, encodeZigZag(1));
EXPECT_EQ(3, encodeZigZag(-2));
EXPECT_EQ(4, encodeZigZag(2));
EXPECT_EQ(0, decodeZigZag(0));
EXPECT_EQ(-1, decodeZigZag(1));
EXPECT_EQ(1, decodeZigZag(2));
EXPECT_EQ(-2, decodeZigZag(3));
EXPECT_EQ(2, decodeZigZag(4));
}
namespace {
constexpr size_t kNumValues = 1000;
std::vector<uint64_t> gValues;
std::vector<uint64_t> gDecodedValues;
std::vector<uint8_t> gEncoded;
void generateRandomValues() {
LOG(INFO) << "Random seed is " << FLAGS_random_seed;
std::mt19937 rng(FLAGS_random_seed);
// Approximation of power law
std::uniform_int_distribution<int> numBytes(1, 8);
std::uniform_int_distribution<int> byte(0, 255);
gValues.resize(kNumValues);
gDecodedValues.resize(kNumValues);
gEncoded.resize(kNumValues * kMaxVarintLength64);
for (size_t i = 0; i < kNumValues; ++i) {
int n = numBytes(rng);
uint64_t val = 0;
for (size_t j = 0; j < n; ++j) {
val = (val << 8) + byte(rng);
}
gValues[i] = val;
}
}
// Benchmark results (Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz, Linux x86_64)
//
// I0814 19:13:14.466256 7504 VarintTest.cpp:146] Random seed is -1216518886
// ============================================================================
// folly/test/VarintTest.cpp relative time/iter iters/s
// ============================================================================
// VarintEncoding 6.69us 149.37K
// VarintDecoding 6.85us 145.90K
// ============================================================================
//
// Disabling the "fast path" code in decodeVarint hurts performance:
//
// I0814 19:15:13.871467 9550 VarintTest.cpp:156] Random seed is -1216518886
// ============================================================================
// folly/test/VarintTest.cpp relative time/iter iters/s
// ============================================================================
// VarintEncoding 6.75us 148.26K
// VarintDecoding 12.60us 79.37K
// ============================================================================
BENCHMARK(VarintEncoding, iters) {
uint8_t* start = &(*gEncoded.begin());
uint8_t* p = start;
bool empty = (iters == 0);
while (iters--) {
p = start;
for (auto& v : gValues) {
p += encodeVarint(v, p);
}
}
gEncoded.erase(gEncoded.begin() + (p - start), gEncoded.end());
}
BENCHMARK(VarintDecoding, iters) {
while (iters--) {
size_t i = 0;
ByteRange range(&(*gEncoded.begin()), &(*gEncoded.end()));
while (!range.empty()) {
gDecodedValues[i++] = decodeVarint(range);
}
}
}
} // namespace
}} // namespaces
int main(int argc, char *argv[]) {
testing::InitGoogleTest(&argc, argv);
google::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
int ret = RUN_ALL_TESTS();
if (ret == 0) {
folly::test::generateRandomValues();
folly::runBenchmarksOnFlag();
}
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment