Commit 928aa5de authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Facebook GitHub Bot

Do not read the end of the list on EliasFanoReader construction

Summary:
Move size and upper bound accounting to `UpperBitsReader`, so we don't need to read the last value on construction, which can be expensive when opening a large number of small lists.

This also makes the maximum `ValueType` representable, and lifts the requirement that the provided upper bound at construction time must be equal to the last element in the list, allowing multiple lists to share the encoding of the upper bound.

Based on initial work by swaroopnm.

Reviewed By: philippv

Differential Revision: D32130075

fbshipit-source-id: e98a053b46b10c435ac0d402ff94a56fcfe095ad
parent 446ac5fa
...@@ -158,7 +158,6 @@ struct EliasFanoEncoderV2 { ...@@ -158,7 +158,6 @@ struct EliasFanoEncoderV2 {
Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {} Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
void add(ValueType value) { void add(ValueType value) {
CHECK_LT(value, std::numeric_limits<ValueType>::max());
CHECK_GE(value, lastValue_); CHECK_GE(value, lastValue_);
const auto numLowerBits = result_.numLowerBits; const auto numLowerBits = result_.numLowerBits;
...@@ -259,8 +258,6 @@ struct EliasFanoEncoderV2< ...@@ -259,8 +258,6 @@ struct EliasFanoEncoderV2<
// *** Validity checks. // *** Validity checks.
// Shift by numLowerBits must be valid. // Shift by numLowerBits must be valid.
CHECK_LT(static_cast<int>(numLowerBits), 8 * sizeof(Value)); CHECK_LT(static_cast<int>(numLowerBits), 8 * sizeof(Value));
// max() - 1 is reserved.
CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
CHECK_LE( CHECK_LE(
upperBound >> numLowerBits, std::numeric_limits<SkipValueType>::max()); upperBound >> numLowerBits, std::numeric_limits<SkipValueType>::max());
...@@ -366,25 +363,32 @@ FOLLY_ALWAYS_INLINE T addT(T a, U b) { ...@@ -366,25 +363,32 @@ FOLLY_ALWAYS_INLINE T addT(T a, U b) {
return static_cast<T>(a + static_cast<T>(b)); return static_cast<T>(a + static_cast<T>(b));
} }
template <class Encoder, class Instructions, class SizeType> template <
class Encoder,
class Instructions,
class SizeType,
bool kUnchecked = false>
class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
SkipPointers<Encoder::skipQuantum> { SkipPointers<Encoder::skipQuantum> {
using SkipValueType = typename Encoder::SkipValueType; using SkipValueType = typename Encoder::SkipValueType;
public: public:
using ValueType = typename Encoder::ValueType; using ValueType = typename Encoder::ValueType;
static constexpr SizeType kBeforeFirstPos = -1;
explicit UpperBitsReader(const typename Encoder::CompressedList& list) explicit UpperBitsReader(const typename Encoder::CompressedList& list)
: ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers), : ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers),
SkipPointers<Encoder::skipQuantum>(list.skipPointers), SkipPointers<Encoder::skipQuantum>(list.skipPointers),
start_(list.upper) { start_(list.upper),
size_(list.size),
upperBound_(estimateUpperBound(list)) {
reset(); reset();
} }
void reset() { void reset() {
// Pretend the bitvector is prefixed by a block of zeroes. // Pretend the bitvector is prefixed by a block of zeroes.
block_ = 0; block_ = 0;
position_ = static_cast<SizeType>(-1); position_ = kBeforeFirstPos;
outer_ = static_cast<OuterType>(-sizeof(block_t)); outer_ = static_cast<OuterType>(-sizeof(block_t));
value_ = 0; value_ = 0;
} }
...@@ -393,7 +397,18 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -393,7 +397,18 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
FOLLY_ALWAYS_INLINE ValueType value() const { return value_; } FOLLY_ALWAYS_INLINE ValueType value() const { return value_; }
FOLLY_ALWAYS_INLINE ValueType previous() { FOLLY_ALWAYS_INLINE bool valid() const {
// Also checks that position() != kBeforeFirstPos.
return position() < size();
}
FOLLY_ALWAYS_INLINE SizeType size() const { return size_; }
FOLLY_ALWAYS_INLINE bool previous() {
if (!kUnchecked && UNLIKELY(position() == 0)) {
return false;
}
size_t inner; size_t inner;
block_t block; block_t block;
DCHECK_GE(outer_, 0); DCHECK_GE(outer_, 0);
...@@ -404,9 +419,13 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -404,9 +419,13 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
return setValue(inner); return setValue(inner);
} }
FOLLY_ALWAYS_INLINE ValueType next() { FOLLY_ALWAYS_INLINE bool next() {
if (!kUnchecked && UNLIKELY(addT(position(), 1) >= size())) {
return setDone();
}
// Skip to the first non-zero block. // Skip to the first non-zero block.
while (block_ == 0) { while (UNLIKELY(block_ == 0)) {
outer_ += sizeof(block_t); outer_ += sizeof(block_t);
block_ = loadUnaligned<block_t>(start_ + outer_); block_ = loadUnaligned<block_t>(start_ + outer_);
} }
...@@ -418,8 +437,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -418,8 +437,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
return setValue(inner); return setValue(inner);
} }
FOLLY_ALWAYS_INLINE ValueType skip(SizeType n) { FOLLY_ALWAYS_INLINE bool skip(SizeType n) {
DCHECK_GT(n, 0); DCHECK_GT(n, 0);
if (!kUnchecked && UNLIKELY(addT(position_, n) >= size())) {
return setDone();
}
position_ += n; // n 1-bits will be read. position_ += n; // n 1-bits will be read.
...@@ -453,8 +475,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -453,8 +475,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
// Skip to the first element that is >= v and located *after* the current // Skip to the first element that is >= v and located *after* the current
// one (so even if current value equals v, position will be increased by 1). // one (so even if current value equals v, position will be increased by 1).
FOLLY_ALWAYS_INLINE ValueType skipToNext(ValueType v) { FOLLY_ALWAYS_INLINE bool skipToNext(ValueType v) {
DCHECK_GE(v, value_); DCHECK_GE(v, value_);
if (!kUnchecked && UNLIKELY(v > upperBound_)) {
return setDone();
}
// Use skip pointer. // Use skip pointer.
if constexpr (Encoder::skipQuantum > 0) { if constexpr (Encoder::skipQuantum > 0) {
...@@ -466,6 +491,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -466,6 +491,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
const size_t dest = loadUnaligned<SkipValueType>( const size_t dest = loadUnaligned<SkipValueType>(
this->skipPointers_ + (steps - 1) * sizeof(SkipValueType)); this->skipPointers_ + (steps - 1) * sizeof(SkipValueType));
DCHECK_LE(dest, size());
if (!kUnchecked && UNLIKELY(dest == size())) {
return setDone();
}
reposition(dest + Encoder::skipQuantum * steps); reposition(dest + Encoder::skipQuantum * steps);
position_ = dest - 1; position_ = dest - 1;
...@@ -489,6 +519,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -489,6 +519,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
skip -= cnt; skip -= cnt;
position_ += kBitsPerBlock - cnt; position_ += kBitsPerBlock - cnt;
outer_ += sizeof(block_t); outer_ += sizeof(block_t);
DCHECK_LT(outer_, (static_cast<size_t>(upperBound_) + size() + 7) / 8);
block_ = loadUnaligned<block_t>(start_ + outer_); block_ = loadUnaligned<block_t>(start_ + outer_);
} }
...@@ -498,17 +529,24 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -498,17 +529,24 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
block_ &= block_t(-1) << inner; block_ &= block_t(-1) << inner;
} }
next(); DCHECK_LT(addT(position(), 1), addT(size(), 1));
return value_; return next();
} }
/** /**
* Prepare to skip to `value`. This is a constant-time operation that will * Try to prepare to skip to value. This is a constant-time operation that
* prefetch memory required for a `skipTo(value)` call. * will attempt to prefetch memory required for a subsequent skipTo(value)
* call if the value to skip to is within this list.
* *
* @return position of reader * Returns:
* {true, position of the reader} if the skip is valid,
* {false, size()} otherwise.
*/ */
FOLLY_ALWAYS_INLINE SizeType prepareSkipTo(ValueType v) const { FOLLY_ALWAYS_INLINE std::pair<bool, SizeType> prepareSkipTo(
ValueType v) const {
if (!kUnchecked && UNLIKELY(v > upperBound_)) {
return std::make_pair(false, size());
}
auto position = position_; auto position = position_;
if constexpr (Encoder::skipQuantum > 0) { if constexpr (Encoder::skipQuantum > 0) {
...@@ -518,6 +556,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -518,6 +556,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
const size_t dest = loadUnaligned<SkipValueType>( const size_t dest = loadUnaligned<SkipValueType>(
this->skipPointers_ + (steps - 1) * sizeof(SkipValueType)); this->skipPointers_ + (steps - 1) * sizeof(SkipValueType));
DCHECK_LE(dest, size());
if (!kUnchecked && UNLIKELY(dest == size())) {
return std::make_pair(false, size());
}
position = dest - 1; position = dest - 1;
outer = (dest + Encoder::skipQuantum * steps) / 8; outer = (dest + Encoder::skipQuantum * steps) / 8;
...@@ -534,7 +577,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -534,7 +577,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
} }
} }
return position; return std::make_pair(true, position);
} }
FOLLY_ALWAYS_INLINE ValueType previousValue() const { FOLLY_ALWAYS_INLINE ValueType previousValue() const {
...@@ -548,7 +591,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -548,7 +591,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
// Returns true if we're at the beginning of the list, or previousValue() != // Returns true if we're at the beginning of the list, or previousValue() !=
// value(). // value().
FOLLY_ALWAYS_INLINE bool isAtBeginningOfRun() const { FOLLY_ALWAYS_INLINE bool isAtBeginningOfRun() const {
DCHECK_NE(position(), static_cast<SizeType>(-1)); DCHECK_NE(position(), kBeforeFirstPos);
if (position_ == 0) { if (position_ == 0) {
return true; return true;
} }
...@@ -556,31 +599,45 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -556,31 +599,45 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
return (start_[bitPos / 8] & (1 << (bitPos % 8))) == 0; return (start_[bitPos / 8] & (1 << (bitPos % 8))) == 0;
} }
FOLLY_ALWAYS_INLINE void setDone(SizeType endPos) { position_ = endPos; }
private: private:
using block_t = uint64_t; using block_t = uint64_t;
// The size in bytes of the upper bits is limited by n + universe / 8, // The size in bytes of the upper bits is limited by n + universe / 8,
// so a type that can hold either sizes or values is sufficient. // so a type that can hold either sizes or values is sufficient.
using OuterType = typename std::common_type_t<ValueType, SizeType>; using OuterType = typename std::common_type_t<ValueType, SizeType>;
FOLLY_ALWAYS_INLINE ValueType setValue(size_t inner) { static ValueType estimateUpperBound(
const typename Encoder::CompressedList& list) {
size_t upperBound = 8 * list.upperSizeBytes - list.size;
// The bitvector is byte-aligned, so we may be overestimating the universe
// size. Make sure it fits in ValueType.
return static_cast<ValueType>(std::min<size_t>(
upperBound,
std::numeric_limits<ValueType>::max() >> list.numLowerBits));
}
FOLLY_ALWAYS_INLINE bool setValue(size_t inner) {
value_ = static_cast<ValueType>(8 * outer_ + inner - position_); value_ = static_cast<ValueType>(8 * outer_ + inner - position_);
return value_; return true;
}
FOLLY_ALWAYS_INLINE bool setDone() {
position_ = size_;
return false;
} }
// NOTE: dest is a position in the bit vector, use size_t as SizeType may // NOTE: dest is a position in the bit vector, use size_t as SizeType may
// not be sufficient here. // not be sufficient here.
FOLLY_ALWAYS_INLINE void reposition(size_t dest) { FOLLY_ALWAYS_INLINE void reposition(size_t dest) {
outer_ = dest / 8; outer_ = dest / 8;
DCHECK_LT(outer_, (static_cast<size_t>(upperBound_) + size() + 7) / 8);
block_ = loadUnaligned<block_t>(start_ + outer_); block_ = loadUnaligned<block_t>(start_ + outer_);
block_ &= ~((block_t(1) << (dest % 8)) - 1); block_ &= ~((block_t(1) << (dest % 8)) - 1);
} }
FOLLY_ALWAYS_INLINE void getPreviousInfo( FOLLY_ALWAYS_INLINE void getPreviousInfo(
block_t& block, size_t& inner, OuterType& outer) const { block_t& block, size_t& inner, OuterType& outer) const {
DCHECK_NE(position(), std::numeric_limits<SizeType>::max());
DCHECK_GT(position(), 0); DCHECK_GT(position(), 0);
DCHECK_LT(position(), size());
outer = outer_; outer = outer_;
block = loadUnaligned<block_t>(start_ + outer); block = loadUnaligned<block_t>(start_ + outer);
...@@ -595,6 +652,8 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>, ...@@ -595,6 +652,8 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
} }
const unsigned char* const start_; const unsigned char* const start_;
const SizeType size_; // Size of the list.
const ValueType upperBound_; // Upper bound of values in this list.
block_t block_; block_t block_;
SizeType position_; // Index of current value (= #reads - 1). SizeType position_; // Index of current value (= #reads - 1).
OuterType outer_; // Outer offset: number of consumed bytes in upper. OuterType outer_; // Outer offset: number of consumed bytes in upper.
...@@ -615,55 +674,35 @@ template < ...@@ -615,55 +674,35 @@ template <
bool kUnchecked = false, bool kUnchecked = false,
class SizeT = typename Encoder::SkipValueType> class SizeT = typename Encoder::SkipValueType>
class EliasFanoReader { class EliasFanoReader {
using UpperBitsReader =
detail::UpperBitsReader<Encoder, Instructions, SizeT, kUnchecked>;
public: public:
using EncoderType = Encoder; using EncoderType = Encoder;
using ValueType = typename Encoder::ValueType; using ValueType = typename Encoder::ValueType;
using SizeType = SizeT; using SizeType = SizeT;
explicit EliasFanoReader(const typename Encoder::CompressedList& list) explicit EliasFanoReader(const typename Encoder::CompressedList& list)
: upper_(list), : upper_(list), lower_(list.lower), numLowerBits_(list.numLowerBits) {
lower_(list.lower),
size_(list.size),
numLowerBits_(list.numLowerBits) {
DCHECK_LE(list.size, std::numeric_limits<SizeType>::max()); DCHECK_LE(list.size, std::numeric_limits<SizeType>::max());
DCHECK(Instructions::supported()); DCHECK(Instructions::supported());
// To avoid extra branching during skipTo() while reading
// upper sequence we need to know the last element.
// If kUnchecked == true, we do not check that skipTo() is called
// within the bounds, so we can avoid initializing lastValue_.
if (kUnchecked || UNLIKELY(list.size == 0)) {
lastValue_ = 0;
return;
}
ValueType lastUpperValue = ValueType(8 * list.upperSizeBytes - size_);
auto it = list.upper + list.upperSizeBytes - 1;
DCHECK_NE(*it, 0);
lastUpperValue -= 8 - findLastSet(*it);
lastValue_ = readLowerPart(size_ - 1) | (lastUpperValue << numLowerBits_);
} }
void reset() { void reset() { upper_.reset(); }
upper_.reset();
value_ = kInvalidValue;
}
bool previous() { bool previous() {
if (!kUnchecked && UNLIKELY(position() == 0)) { if (LIKELY(upper_.previous())) {
reset(); return setValue(readCurrentValue());
return false;
} }
upper_.previous(); reset();
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_); return false;
return true;
} }
bool next() { bool next() {
if (!kUnchecked && UNLIKELY(detail::addT(position(), 1) >= size_)) { if (LIKELY(upper_.next())) {
return setDone(); return setValue(readCurrentValue());
} }
upper_.next(); return false;
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
return true;
} }
/** /**
...@@ -675,36 +714,39 @@ class EliasFanoReader { ...@@ -675,36 +714,39 @@ class EliasFanoReader {
if (n == 0) { if (n == 0) {
return valid(); return valid();
} }
if (!upper_.skip(n)) {
if (kUnchecked || LIKELY(detail::addT(position(), n) < size_)) { return false;
upper_.skip(n);
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
return true;
} }
return setValue(readCurrentValue());
return setDone();
} }
/** /**
* Skips to the first element >= value whose position is greater or equal to * Skips to the first element >= value whose position is greater or equal to
* the current position. Requires that value >= value() (or that the reader is * the current position.
* at position -1). Returns false if no such element exists. * Requires that value >= value() (or that the reader is positioned before the
* first element). Returns false if no such element exists.
*/ */
bool skipTo(ValueType value) { bool skipTo(ValueType value) {
if (value_ != kInvalidValue) { if (valid()) {
DCHECK_GE(value, value_); DCHECK_GE(value, value_);
if (UNLIKELY(value == value_)) {
return true;
}
} }
if (!kUnchecked && UNLIKELY(value > lastValue_)) { ValueType upperValue = value >> numLowerBits_;
return setDone();
} else if (UNLIKELY(value == value_)) { if (UNLIKELY(!upper_.skipToNext(upperValue))) {
return true; return false;
} }
ValueType upperValue = value >> numLowerBits_; do {
upper_.skipToNext(upperValue); if (auto cur = readCurrentValue(); LIKELY(cur >= value)) {
iterateTo(value); return setValue(cur);
return true; }
} while (LIKELY(upper_.next()));
return false;
} }
/** /**
...@@ -712,18 +754,20 @@ class EliasFanoReader { ...@@ -712,18 +754,20 @@ class EliasFanoReader {
* upper and lower bits. * upper and lower bits.
*/ */
void prepareSkipTo(ValueType value) const { void prepareSkipTo(ValueType value) const {
if (value_ != kInvalidValue) { if (valid()) {
DCHECK_GE(value, value_); DCHECK_GE(value, value_);
} if (UNLIKELY(value == value_)) {
return;
if ((!kUnchecked && value > lastValue_) || (value == value_)) { }
return;
} }
// Do minimal computation required to prefetch address used in // Do minimal computation required to prefetch address used in
// `readLowerPart()`. // `readLowerPart()`.
ValueType upperValue = (value >> numLowerBits_); ValueType upperValue = value >> numLowerBits_;
const auto upperPosition = upper_.prepareSkipTo(upperValue); const auto [valid, upperPosition] = upper_.prepareSkipTo(upperValue);
if (!valid) {
return;
}
const auto addr = lower_ + (upperPosition * numLowerBits_ / 8); const auto addr = lower_ + (upperPosition * numLowerBits_ / 8);
__builtin_prefetch(addr); __builtin_prefetch(addr);
__builtin_prefetch(addr + kCacheLineSize); __builtin_prefetch(addr + kCacheLineSize);
...@@ -752,7 +796,7 @@ class EliasFanoReader { ...@@ -752,7 +796,7 @@ class EliasFanoReader {
* assumeDistinct = true can enable some optimizations. * assumeDistinct = true can enable some optimizations.
*/ */
bool jumpTo(ValueType value, bool assumeDistinct = false) { bool jumpTo(ValueType value, bool assumeDistinct = false) {
if (value == value_) { if (valid() && value == value_) {
if (assumeDistinct == true) { if (assumeDistinct == true) {
return true; return true;
} }
...@@ -769,18 +813,13 @@ class EliasFanoReader { ...@@ -769,18 +813,13 @@ class EliasFanoReader {
// We need to reset if we're not in the initial state and the jump is // We need to reset if we're not in the initial state and the jump is
// backwards. // backwards.
if (position() != static_cast<SizeType>(-1) && if (position() != UpperBitsReader::kBeforeFirstPos &&
value < value_) { // If position() == size() value_ is kInvalidValue. (position() == size() || value < value_)) {
reset(); reset();
} }
return skipTo(value); return skipTo(value);
} }
ValueType lastValue() const {
CHECK(!kUnchecked);
return lastValue_;
}
ValueType previousValue() const { ValueType previousValue() const {
DCHECK_GT(position(), 0); DCHECK_GT(position(), 0);
DCHECK_LT(position(), size()); DCHECK_LT(position(), size());
...@@ -788,11 +827,9 @@ class EliasFanoReader { ...@@ -788,11 +827,9 @@ class EliasFanoReader {
(upper_.previousValue() << numLowerBits_); (upper_.previousValue() << numLowerBits_);
} }
SizeType size() const { return size_; } SizeType size() const { return upper_.size(); }
bool valid() const { bool valid() const { return upper_.valid(); }
return position() < size(); // Also checks that position() != -1.
}
SizeType position() const { return upper_.position(); } SizeType position() const { return upper_.position(); }
...@@ -802,17 +839,14 @@ class EliasFanoReader { ...@@ -802,17 +839,14 @@ class EliasFanoReader {
} }
private: private:
// Must hold kInvalidValue + 1 == 0. FOLLY_ALWAYS_INLINE bool setValue(ValueType value) {
constexpr static ValueType kInvalidValue = -1; DCHECK(valid());
value_ = value;
FOLLY_ALWAYS_INLINE bool setDone() { return true;
value_ = kInvalidValue;
upper_.setDone(size_);
return false;
} }
FOLLY_ALWAYS_INLINE ValueType readLowerPart(SizeType i) const { FOLLY_ALWAYS_INLINE ValueType readLowerPart(SizeType i) const {
DCHECK_LT(i, size_); DCHECK_LT(i, size());
const size_t pos = i * numLowerBits_; const size_t pos = i * numLowerBits_;
const unsigned char* ptr = lower_ + (pos / 8); const unsigned char* ptr = lower_ + (pos / 8);
const uint64_t ptrv = loadUnaligned<uint64_t>(ptr); const uint64_t ptrv = loadUnaligned<uint64_t>(ptr);
...@@ -822,22 +856,15 @@ class EliasFanoReader { ...@@ -822,22 +856,15 @@ class EliasFanoReader {
return Instructions::bzhi(ptrv >> (pos % 8), numLowerBits_); return Instructions::bzhi(ptrv >> (pos % 8), numLowerBits_);
} }
FOLLY_ALWAYS_INLINE void iterateTo(ValueType value) { FOLLY_ALWAYS_INLINE ValueType readCurrentValue() {
while (true) { return readLowerPart(position()) | (upper_.value() << numLowerBits_);
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
if (LIKELY(value_ >= value)) {
break;
}
upper_.next();
}
} }
detail::UpperBitsReader<Encoder, Instructions, SizeType> upper_; // Ordering of fields is counter-intutive but it optimizes the layout.
const uint8_t* lower_; UpperBitsReader upper_;
SizeType size_; const uint8_t* const lower_;
ValueType value_ = kInvalidValue; ValueType value_;
ValueType lastValue_; const uint8_t numLowerBits_;
uint8_t numLowerBits_;
}; };
} // namespace compression } // namespace compression
......
...@@ -249,22 +249,30 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) { ...@@ -249,22 +249,30 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) {
EXPECT_EQ(reader.value(), data[0]); EXPECT_EQ(reader.value(), data[0]);
EXPECT_EQ(reader.position(), 0); EXPECT_EQ(reader.position(), 0);
} }
{
// Skip past the last element. // Skip past the last element, when possible. Make sure to probe values far
Reader reader(list); // from the last element, as the reader implementation may keep an internal
EXPECT_FALSE(reader.skipTo(data.back() + 1)); // upper bound larger than that, and we need to make sure we exercise skipping
EXPECT_FALSE(reader.valid()); // both before and after that.
EXPECT_EQ(reader.position(), reader.size()); using ValueType = typename Reader::ValueType;
EXPECT_FALSE(reader.next()); std::vector<ValueType> valuesPastTheEnd;
const auto lastValue = data.back();
const auto kMaxValue = std::numeric_limits<ValueType>::max();
// Keep doubling the distance from the last value until we overflow.
for (ValueType value = lastValue + 1; value > lastValue;
value += value - lastValue) {
valuesPastTheEnd.push_back(value);
} }
{ if (kMaxValue != lastValue) {
// Skip to maximum integer. valuesPastTheEnd.push_back(kMaxValue);
}
for (auto value : valuesPastTheEnd) {
Reader reader(list); Reader reader(list);
using ValueType = typename Reader::ValueType; EXPECT_FALSE(reader.skipTo(value)) << value << " " << lastValue;
EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max())); EXPECT_FALSE(reader.valid()) << value << " " << lastValue;
EXPECT_FALSE(reader.valid()); EXPECT_EQ(reader.position(), reader.size()) << value << " " << lastValue;
EXPECT_EQ(reader.position(), reader.size()); EXPECT_FALSE(reader.next()) << value << " " << lastValue;
EXPECT_FALSE(reader.next());
} }
} }
...@@ -284,9 +292,9 @@ void testJump(const std::vector<uint64_t>& data, const List& list) { ...@@ -284,9 +292,9 @@ void testJump(const std::vector<uint64_t>& data, const List& list) {
for (auto i : is) { for (auto i : is) {
// Also test idempotency. // Also test idempotency.
for (size_t round = 0; round < 2; ++round) { for (size_t round = 0; round < 2; ++round) {
EXPECT_TRUE(reader.jump(i)); EXPECT_TRUE(reader.jump(i)) << i << " " << data.size();
EXPECT_EQ(reader.value(), data[i]); EXPECT_EQ(reader.value(), data[i]) << i << " " << data.size();
EXPECT_EQ(reader.position(), i); EXPECT_EQ(reader.position(), i) << i << " " << data.size();
} }
maybeTestPreviousValue(data, reader, i); maybeTestPreviousValue(data, reader, i);
maybeTestPrevious(data, reader, i); maybeTestPrevious(data, reader, i);
...@@ -332,9 +340,11 @@ void testJumpTo(const std::vector<uint64_t>& data, const List& list) { ...@@ -332,9 +340,11 @@ void testJumpTo(const std::vector<uint64_t>& data, const List& list) {
EXPECT_EQ(reader.position(), std::distance(data.begin(), it)); EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
} }
EXPECT_FALSE(reader.jumpTo(data.back() + 1)); if (data.back() != std::numeric_limits<ValueType>::max()) {
EXPECT_FALSE(reader.valid()); EXPECT_FALSE(reader.jumpTo(data.back() + 1));
EXPECT_EQ(reader.position(), reader.size()); EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
}
} }
template <class Reader, class Encoder> template <class Reader, class Encoder>
...@@ -360,11 +370,20 @@ void testEmpty() { ...@@ -360,11 +370,20 @@ void testEmpty() {
} }
} }
// `upperBoundExtension` is required to inject additional 0-blocks
// at the end of the list. This allows us to test lists with a large gap between
// last element and universe upper bound, to exercise bounds-checking when
// skipping past the last element
template <class Reader, class Encoder> template <class Reader, class Encoder>
void testAll(const std::vector<uint64_t>& data) { void testAll(
const std::vector<uint64_t>& data, uint64_t upperBoundExtension = 0) {
SCOPED_TRACE(__PRETTY_FUNCTION__); SCOPED_TRACE(__PRETTY_FUNCTION__);
auto list = Encoder::encode(data.begin(), data.end()); Encoder encoder(data.size(), data.back() + upperBoundExtension);
for (const auto value : data) {
encoder.add(value);
}
auto list = encoder.finish();
testNext<Reader>(data, list); testNext<Reader>(data, list);
testSkip<Reader>(data, list); testSkip<Reader>(data, list);
testSkipTo<Reader>(data, list); testSkipTo<Reader>(data, list);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <vector> #include <vector>
#include <folly/Benchmark.h> #include <folly/Benchmark.h>
#include <folly/Random.h>
#include <folly/experimental/EliasFanoCoding.h> #include <folly/experimental/EliasFanoCoding.h>
#include <folly/experimental/Select64.h> #include <folly/experimental/Select64.h>
#include <folly/experimental/test/CodingTestUtils.h> #include <folly/experimental/test/CodingTestUtils.h>
...@@ -106,12 +107,23 @@ class EliasFanoCodingTest : public ::testing::Test { ...@@ -106,12 +107,23 @@ class EliasFanoCodingTest : public ::testing::Test {
using Reader = EliasFanoReader<Encoder, instructions::Default, false>; using Reader = EliasFanoReader<Encoder, instructions::Default, false>;
testAll<Reader, Encoder>({0}); testAll<Reader, Encoder>({0});
testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000)); testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
// Test a list with size multiple of kForwardQuantum and universe multiple
// of kSkipQuantum, to exercise corner cases in the construction of forward
// and skip lists.
testAll<Reader, Encoder>(generateRandomList(
std::max<size_t>(8 * kForwardQuantum, 1024),
std::max<size_t>(16 * kSkipQuantum, 2048)));
testAll<Reader, Encoder>(generateRandomList( testAll<Reader, Encoder>(generateRandomList(
100 * 1000, 10 * 1000 * 1000, /* withDuplicates */ true)); 100 * 1000, 10 * 1000 * 1000, /* withDuplicates */ true));
testAll<Reader, Encoder>(generateSeqList(1, 100000, 100)); testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
// max() cannot be read, as it is assumed an invalid value. testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max()});
// TODO(ott): It should be possible to lift this constraint. // Test data with additional trailing 0s in the upperBits by extending
testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max() - 1}); // the upper bound.
constexpr uint64_t minUpperBoundExtension = 2;
constexpr uint64_t maxUpperBoundExtension = 1024;
testAll<Reader, Encoder>(
generateRandomList(100 * 1000, 10 * 1000 * 1000),
folly::Random::rand32(minUpperBoundExtension, maxUpperBoundExtension));
} }
template <size_t kSkipQuantum, size_t kForwardQuantum, typename ValueType> template <size_t kSkipQuantum, size_t kForwardQuantum, typename ValueType>
...@@ -135,12 +147,12 @@ class EliasFanoCodingTest : public ::testing::Test { ...@@ -135,12 +147,12 @@ class EliasFanoCodingTest : public ::testing::Test {
using Reader = EliasFanoReader<Encoder, instructions::Default, false>; using Reader = EliasFanoReader<Encoder, instructions::Default, false>;
constexpr size_t kMaxU16 = std::numeric_limits<uint16_t>::max(); constexpr size_t kMaxU16 = std::numeric_limits<uint16_t>::max();
// kMaxU16 is reserved for both value and size. // Max SizeType value is reserved.
testAll<Reader, Encoder>(generateSeqList(1, kMaxU16 - 1)); testAll<Reader, Encoder>(generateSeqList(1, kMaxU16 - 1));
// Test various sizes close to the limit. // Test various sizes close to the limit.
for (size_t i = 1; i <= 16; ++i) { for (size_t i = 1; i <= 16; ++i) {
testAll<Reader, Encoder>(generateRandomList( testAll<Reader, Encoder>(
kMaxU16 - i, kMaxU16 - 1, /* withDuplicates */ true)); generateRandomList(kMaxU16 - i, kMaxU16, /* withDuplicates */ true));
} }
} }
...@@ -155,27 +167,43 @@ TEST_F(EliasFanoCodingTest, Empty) { ...@@ -155,27 +167,43 @@ TEST_F(EliasFanoCodingTest, Empty) {
doTestEmpty(); doTestEmpty();
} }
TEST_F(EliasFanoCodingTest, Simple) { TEST_F(EliasFanoCodingTest, Simple32Bit) {
doTestAll<0, 0, uint32_t>(); doTestAll<0, 0, uint32_t>();
}
TEST_F(EliasFanoCodingTest, Simple64Bit) {
doTestAll<0, 0, uint64_t>(); doTestAll<0, 0, uint64_t>();
}
TEST_F(EliasFanoCodingTest, SimpleDense) {
doTestDenseAll<0, 0>(); doTestDenseAll<0, 0>();
} }
TEST_F(EliasFanoCodingTest, SkipPointers) { TEST_F(EliasFanoCodingTest, SkipPointers32Bit) {
doTestAll<128, 0, uint32_t>(); doTestAll<128, 0, uint32_t>();
}
TEST_F(EliasFanoCodingTest, SkipPointers64Bit) {
doTestAll<128, 0, uint64_t>(); doTestAll<128, 0, uint64_t>();
}
TEST_F(EliasFanoCodingTest, SkipPointersDense) {
doTestDenseAll<128, 0>(); doTestDenseAll<128, 0>();
} }
TEST_F(EliasFanoCodingTest, ForwardPointers) { TEST_F(EliasFanoCodingTest, ForwardPointers32Bit) {
doTestAll<0, 128, uint32_t>(); doTestAll<0, 128, uint32_t>();
}
TEST_F(EliasFanoCodingTest, ForwardPointers64Bit) {
doTestAll<0, 128, uint64_t>(); doTestAll<0, 128, uint64_t>();
}
TEST_F(EliasFanoCodingTest, ForwardPointersDense) {
doTestDenseAll<0, 128>(); doTestDenseAll<0, 128>();
} }
TEST_F(EliasFanoCodingTest, SkipForwardPointers) { TEST_F(EliasFanoCodingTest, SkipForwardPointers32Bit) {
doTestAll<128, 128, uint32_t>(); doTestAll<128, 128, uint32_t>();
}
TEST_F(EliasFanoCodingTest, SkipForwardPointers64Bit) {
doTestAll<128, 128, uint64_t>(); doTestAll<128, 128, uint64_t>();
}
TEST_F(EliasFanoCodingTest, SkipForwardPointersDense) {
doTestDenseAll<128, 128>(); doTestDenseAll<128, 128>();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment