Commit 308f7da1 authored by Vignesh Gowda's avatar Vignesh Gowda Committed by Sara Golemon

Implement a BitVector-based list

Summary: Implemented a BitVector Encoder and BitVector Reader with the same interface as EliasFanoCoding.h

Reviewed By: @ot, @philippv

Differential Revision: D2198935
parent cd927a9e
......@@ -74,6 +74,7 @@ nobase_follyinclude_HEADERS = \
EvictingCacheMap.h \
experimental/AutoTimer.h \
experimental/Bits.h \
experimental/BitVectorCoding.h \
experimental/ExecutionObserver.h \
experimental/EliasFanoCoding.h \
experimental/EventCount.h \
......
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
#define FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
#include <cstdlib>
#include <limits>
#include <type_traits>
#include <folly/Bits.h>
#include <folly/Likely.h>
#include <folly/Portability.h>
#include <folly/Range.h>
#include <folly/experimental/Bits.h>
#include <folly/experimental/Instructions.h>
#include <folly/experimental/Select64.h>
#include <glog/logging.h>
#ifndef __GNUC__
#error BitVectorCoding.h requires GCC
#endif
#if !FOLLY_X64
#error BitVectorCoding.h requires x86_64
#endif
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
#error BitVectorCoding.h requires little endianness
#endif
namespace folly { namespace compression {
template <class Pointer>
struct BitVectorCompressedListBase {
BitVectorCompressedListBase() = default;
template <class OtherPointer>
BitVectorCompressedListBase(
const BitVectorCompressedListBase<OtherPointer>& other)
: size(other.size),
upperBound(other.upperBound),
data(other.data),
bits(reinterpret_cast<Pointer>(other.bits)),
skipPointers(reinterpret_cast<Pointer>(other.skipPointers)),
forwardPointers(reinterpret_cast<Pointer>(other.forwardPointers)) {}
void free() { ::free(const_cast<unsigned char*>(data.data())); }
size_t getUpperBound() const { return upperBound; }
size_t size = 0;
size_t upperBound = 0;
folly::Range<Pointer> data;
Pointer bits = nullptr;
Pointer skipPointers = nullptr;
Pointer forwardPointers = nullptr;
};
typedef BitVectorCompressedListBase<const uint8_t*> BitVectorCompressedList;
typedef BitVectorCompressedListBase<uint8_t*> MutableBitVectorCompressedList;
template <class Value,
class SkipValue,
size_t kSkipQuantum = 0,
size_t kForwardQuantum = 0>
struct BitVectorEncoder {
static_assert(std::is_integral<Value>::value &&
std::is_unsigned<Value>::value,
"Value should be unsigned integral");
typedef BitVectorCompressedList CompressedList;
typedef Value ValueType;
typedef SkipValue SkipValueType;
struct Layout;
static constexpr size_t skipQuantum = kSkipQuantum;
static constexpr size_t forwardQuantum = kForwardQuantum;
template <class RandomAccessIterator>
static BitVectorCompressedList encode(RandomAccessIterator begin,
RandomAccessIterator end) {
if (begin == end) {
return BitVectorCompressedList();
}
BitVectorEncoder encoder(end - begin, *(end - 1));
for (; begin != end; ++begin) {
encoder.add(*begin);
}
return encoder.finish();
}
explicit BitVectorEncoder(const MutableBitVectorCompressedList& result)
: bits_(result.bits),
skipPointers_(result.skipPointers),
forwardPointers_(result.forwardPointers),
result_(result) {
memset(result.data.data(), 0, result.data.size());
}
BitVectorEncoder(size_t size, ValueType upperBound)
: BitVectorEncoder(
Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
void add(ValueType value) {
CHECK_GE(value, lastValue_);
auto block = bits_ + (value / 64) * sizeof(uint64_t);
size_t inner = value % 64;
folly::Bits<folly::Unaligned<uint64_t>>::set(
reinterpret_cast<folly::Unaligned<uint64_t>*>(block), inner);
if (skipQuantum != 0) {
size_t nextSkipPointerSize = value / (skipQuantum ?: 1);
while (skipPointersSize_ < nextSkipPointerSize) {
auto pos = skipPointersSize_++;
folly::storeUnaligned<SkipValueType>(
skipPointers_ + pos * sizeof(SkipValueType), size_);
}
}
if (forwardQuantum != 0) {
if ( size_ != 0 && (size_ % (forwardQuantum ?: 1) == 0)) {
const auto pos = size_ / (forwardQuantum ?: 1) - 1;
folly::storeUnaligned<SkipValueType>(
forwardPointers_ + pos * sizeof(SkipValueType), value);
}
}
lastValue_ = value;
++size_;
}
const BitVectorCompressedList& finish() const {
CHECK_EQ(size_, result_.size);
// TODO(ott): Relax this assumption.
CHECK_EQ(result_.getUpperBound(), lastValue_);
return result_;
}
private:
uint8_t* const bits_ = nullptr;
uint8_t* const skipPointers_ = nullptr;
uint8_t* const forwardPointers_ = nullptr;
ValueType lastValue_ = 0;
size_t size_ = 0;
size_t skipPointersSize_ = 0;
BitVectorCompressedList result_;
};
template <class Value,
class SkipValue,
size_t kSkipQuantum,
size_t kForwardQuantum>
struct BitVectorEncoder<Value, SkipValue, kSkipQuantum, kForwardQuantum>::
Layout {
static Layout fromUpperBoundAndSize(size_t upperBound, size_t size) {
Layout layout;
layout.size = size;
layout.upperBound = upperBound;
size_t bitVectorSizeInBytes = (upperBound / 8) + 1;
layout.bits = bitVectorSizeInBytes;
if (skipQuantum != 0) {
size_t numSkipPointers = upperBound / (skipQuantum ?: 1);
layout.skipPointers = numSkipPointers * sizeof(SkipValueType);
}
if (forwardQuantum != 0) {
size_t numForwardPointers = size / (forwardQuantum ?: 1);
layout.forwardPointers = numForwardPointers * sizeof(SkipValueType);
}
CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
return layout;
}
size_t bytes() const { return bits + skipPointers + forwardPointers; }
template <typename Range>
BitVectorCompressedListBase<typename Range::iterator> openList(
Range& buf) const {
BitVectorCompressedListBase<typename Range::iterator> result;
result.size = size;
result.upperBound = upperBound;
result.data = buf.subpiece(0, bytes());
auto advance = [&](size_t n) {
auto begin = buf.data();
buf.advance(n);
return begin;
};
result.bits = advance(bits);
result.skipPointers = advance(skipPointers);
result.forwardPointers = advance(forwardPointers);
CHECK_EQ(buf.data() - result.data.data(), bytes());
return result;
}
MutableBitVectorCompressedList allocList() const {
uint8_t* buf = nullptr;
if (size > 0) {
buf = static_cast<uint8_t*>(malloc(bytes() + 7));
}
folly::MutableByteRange bufRange(buf, bytes());
return openList(bufRange);
}
size_t size = 0;
size_t upperBound = 0;
// Sizes in bytes.
size_t bits = 0;
size_t skipPointers = 0;
size_t forwardPointers = 0;
};
template <class Encoder,
class Instructions = instructions::Default,
bool kUnchecked = false>
class BitVectorReader {
public:
typedef Encoder EncoderType;
typedef typename Encoder::ValueType ValueType;
typedef typename Encoder::SkipValueType SkipValueType;
explicit BitVectorReader(const BitVectorCompressedList& list)
: size_(list.size),
bits_(list.bits),
skipPointers_(list.skipPointers),
forwardPointers_(list.forwardPointers) {
reset();
if (kUnchecked || UNLIKELY(list.size == 0)) {
upperBound_ = 0;
return;
}
upperBound_ = list.getUpperBound();
}
void reset() {
block_ = (bits_ != nullptr) ? folly::loadUnaligned<uint64_t>(bits_) : 0;
outer_ = 0;
inner_ = -1;
position_ = -1;
value_ = 0;
}
bool next() {
if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
return setDone();
}
while (block_ == 0) {
outer_ += sizeof(uint64_t);
block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
}
++position_;
inner_ = Instructions::ctz(block_);
block_ = Instructions::blsr(block_);
return setValue();
}
bool skip(size_t n) {
CHECK_GT(n, 0);
if (!kUnchecked && position() + n >= size_) {
return setDone();
}
// Small skip optimization.
if (LIKELY(n < kLinearScanThreshold)) {
for (size_t i = 0; i < n; ++i) {
next();
}
return true;
}
position_ += n;
// Use forward pointer.
if (Encoder::forwardQuantum > 0 && n > Encoder::forwardQuantum) {
// Workaround to avoid 'division by zero' compile-time error.
constexpr size_t q = Encoder::forwardQuantum ?: 1;
const size_t steps = position_ / q;
const size_t dest = folly::loadUnaligned<SkipValueType>(
forwardPointers_ + (steps - 1) * sizeof(SkipValueType));
reposition(dest);
n = position_ + 1 - steps * q;
// Correct inner_ will be set at the end.
}
size_t cnt;
// Find necessary block.
while ((cnt = Instructions::popcount(block_)) < n) {
n -= cnt;
outer_ += sizeof(uint64_t);
block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
}
// Skip to the n-th one in the block.
DCHECK_GT(n, 0);
inner_ = select64<Instructions>(block_, n - 1);
block_ &= (uint64_t(-1) << inner_) << 1;
return setValue();
}
bool skipTo(ValueType v) {
DCHECK_GE(v, value_);
if (v <= value_) {
return true;
} else if (!kUnchecked && v > upperBound_) {
return setDone();
}
// Small skip optimization.
if (v - value_ < kLinearScanThreshold) {
do {
next();
} while (value() < v);
return true;
}
if (Encoder::skipQuantum > 0 && v - value_ > Encoder::skipQuantum) {
size_t q = v / Encoder::skipQuantum;
position_ = folly::loadUnaligned<SkipValueType>(
skipPointers_ + (q - 1) * sizeof(SkipValueType)) - 1;
reposition(q * Encoder::skipQuantum);
}
// Find the value.
size_t outer = v / 64 * 8;
while (outer_ < outer) {
position_ += Instructions::popcount(block_);
outer_ += sizeof(uint64_t);
block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
}
DCHECK_EQ(outer_, outer);
uint64_t mask = ~((uint64_t(1) << (v % 64)) - 1);
position_ += Instructions::popcount(block_ & ~mask) + 1;
block_ &= mask;
while (block_ == 0) {
outer_ += sizeof(uint64_t);
block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
}
inner_ = Instructions::ctz(block_);
block_ = Instructions::blsr(block_);
setValue();
return true;
}
size_t size() const { return size_; }
size_t position() const { return position_; }
ValueType value() const { return value_; }
bool jump(size_t n) {
reset();
if (n > 0) {
return skip(n);
} else {
return true;
}
}
bool jumpTo(ValueType v) {
reset();
return skipTo(v);
}
bool setDone() {
value_ = std::numeric_limits<ValueType>::max();
position_ = size_;
return false;
}
private:
bool setValue() {
value_ = static_cast<ValueType>(8 * outer_ + inner_);
return true;
}
void reposition(size_t dest) {
outer_ = dest / 64 * 8;
// We maintain the invariant that outer_ is divisible by 8.
block_ = folly::loadUnaligned<uint64_t>(bits_ + outer_);
block_ &= ~((uint64_t(1) << (dest % 64)) - 1);
}
constexpr static size_t kLinearScanThreshold = 4;
size_t outer_;
size_t inner_;
size_t position_;
uint64_t block_;
ValueType value_ = 0;
size_t size_;
ValueType upperBound_;
const uint8_t* const bits_;
const uint8_t* const skipPointers_;
const uint8_t* const forwardPointers_;
};
}} // namespaces
#endif // FOLLY_EXPERIMENTAL_BIT_VECTOR_CODING_H
......@@ -27,15 +27,15 @@
#include <cstdlib>
#include <limits>
#include <type_traits>
#include <glog/logging.h>
#include <folly/Bits.h>
#include <folly/CpuId.h>
#include <folly/Likely.h>
#include <folly/Portability.h>
#include <folly/Range.h>
#include <folly/experimental/Instructions.h>
#include <folly/experimental/Select64.h>
#include <glog/logging.h>
#ifndef __GNUC__
#error EliasFanoCoding.h requires GCC
#endif
......
......@@ -17,6 +17,8 @@
#ifndef FOLLY_EXPERIMENTAL_INSTRUCTIONS_H
#define FOLLY_EXPERIMENTAL_INSTRUCTIONS_H
#include <folly/CpuId.h>
namespace folly { namespace compression { namespace instructions {
// NOTE: It's recommended to compile EF coding with -msse4.2, starting
......
/*
* Copyright 2015 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <numeric>
#include <random>
#include <vector>
#include <folly/Benchmark.h>
#include <folly/experimental/BitVectorCoding.h>
#include <folly/experimental/Select64.h>
#include <folly/experimental/test/CodingTestUtils.h>
using namespace folly::compression;
#ifndef BV_TEST_ARCH
#define BV_TEST_ARCH Default
#endif // BV_TEST_ARCH
class BitVectorCodingTest : public ::testing::Test {
public:
void doTestEmpty() {
typedef BitVectorEncoder<uint32_t, size_t> Encoder;
typedef BitVectorReader<Encoder, instructions::BV_TEST_ARCH> Reader;
testEmpty<Reader, Encoder>();
}
template <size_t kSkipQuantum, size_t kForwardQuantum>
void doTestAll() {
typedef BitVectorEncoder<uint32_t, uint32_t, kSkipQuantum, kForwardQuantum>
Encoder;
typedef BitVectorReader<Encoder> Reader;
testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
}
};
TEST_F(BitVectorCodingTest, Empty) {
doTestEmpty();
}
TEST_F(BitVectorCodingTest, Simple) {
doTestAll<0, 0>();
}
TEST_F(BitVectorCodingTest, SkipPointers) {
doTestAll<128, 0>();
}
TEST_F(BitVectorCodingTest, ForwardPointers) {
doTestAll<0, 128>();
}
TEST_F(BitVectorCodingTest, SkipForwardPointers) {
doTestAll<128, 128>();
}
namespace bm {
constexpr size_t k1M = 1000000;
typedef BitVectorEncoder<uint32_t, uint32_t, 128, 128> Encoder;
typedef BitVectorReader<Encoder> Reader;
std::vector<uint32_t> data;
std::vector<size_t> order;
std::vector<uint32_t> encodeSmallData;
std::vector<uint32_t> encodeLargeData;
typename Encoder::CompressedList list;
void init() {
std::mt19937 gen;
data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
list = Encoder::encode(data.begin(), data.end());
order.resize(data.size());
std::iota(order.begin(), order.end(), size_t());
std::shuffle(order.begin(), order.end(), gen);
encodeSmallData = generateRandomList(10, 100 * 1000, gen);
encodeLargeData = generateRandomList(1000 * 1000, 100 * 1000 * 1000, gen);
}
void free() { list.free(); }
} // namespace bm
BENCHMARK(Next, iters) { bmNext<bm::Reader>(bm::list, bm::data, iters); }
size_t Skip_ForwardQ128(size_t iters, size_t logAvgSkip) {
bmSkip<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
return iters;
}
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1, 0)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 2, 1)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 4_pm_1, 2)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 16_pm_4, 4)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 64_pm_16, 6)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 256_pm_64, 8)
BENCHMARK_NAMED_PARAM_MULTI(Skip_ForwardQ128, 1024_pm_256, 10)
BENCHMARK(Jump_ForwardQ128, iters) {
bmJump<bm::Reader>(bm::list, bm::data, bm::order, iters);
}
BENCHMARK_DRAW_LINE();
size_t SkipTo_SkipQ128(size_t iters, size_t logAvgSkip) {
bmSkipTo<bm::Reader>(bm::list, bm::data, logAvgSkip, iters);
return iters;
}
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1, 0)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 2, 1)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 4_pm_1, 2)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 16_pm_4, 4)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 64_pm_16, 6)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 256_pm_64, 8)
BENCHMARK_NAMED_PARAM_MULTI(SkipTo_SkipQ128, 1024_pm_256, 10)
BENCHMARK(JumpTo_SkipQ128, iters) {
bmJumpTo<bm::Reader>(bm::list, bm::data, bm::order, iters);
}
BENCHMARK_DRAW_LINE();
BENCHMARK(Encode_10) {
auto list = bm::Encoder::encode(bm::encodeSmallData.begin(),
bm::encodeSmallData.end());
list.free();
}
BENCHMARK(Encode) {
auto list = bm::Encoder::encode(bm::encodeLargeData.begin(),
bm::encodeLargeData.end());
list.free();
}
#if 0
Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz (turbo off),
using instructions::Default and GCC 4.8 with --bm_min_usec 100000.
============================================================================
folly/experimental/test/BitVectorCodingTest.cpp relative time/iter iters/s
============================================================================
Next 9.59ns 104.25M
Skip_ForwardQ128(1) 11.56ns 86.53M
Skip_ForwardQ128(2) 23.30ns 42.93M
Skip_ForwardQ128(4_pm_1) 52.99ns 18.87M
Skip_ForwardQ128(16_pm_4) 200.85ns 4.98M
Skip_ForwardQ128(64_pm_16) 733.20ns 1.36M
Skip_ForwardQ128(256_pm_64) 748.35ns 1.34M
Skip_ForwardQ128(1024_pm_256) 742.77ns 1.35M
Jump_ForwardQ128 752.98ns 1.33M
----------------------------------------------------------------------------
SkipTo_SkipQ128(1) 23.47ns 42.62M
SkipTo_SkipQ128(2) 24.48ns 40.85M
SkipTo_SkipQ128(4_pm_1) 22.16ns 45.13M
SkipTo_SkipQ128(16_pm_4) 28.43ns 35.17M
SkipTo_SkipQ128(64_pm_16) 45.51ns 21.97M
SkipTo_SkipQ128(256_pm_64) 44.03ns 22.71M
SkipTo_SkipQ128(1024_pm_256) 45.84ns 21.81M
JumpTo_SkipQ128 15.33ns 65.25M
----------------------------------------------------------------------------
Encode_10 1.60us 624.33K
Encode 16.98ms 58.89
============================================================================
#endif
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
auto ret = RUN_ALL_TESTS();
if (ret == 0 && FLAGS_benchmark) {
bm::init();
folly::runBenchmarks();
bm::free();
}
return ret;
}
......@@ -87,7 +87,6 @@ void init() {
std::mt19937 gen;
data = generateRandomList(100 * 1000, 10 * 1000 * 1000, gen);
//data = loadList("/home/philipp/pl_test_dump.txt");
list = Encoder::encode(data.begin(), data.end());
order.resize(data.size());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment