Commit 2baf3f69 authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by facebook-github-bot-4

Make EliasFanoReader and BitVectorReader API more consistent

Summary: This diff introduces a few breaking API changes to both EliasFanoReader and BitVectorReader in order to fix some inconsistencies:

- As initalized, `EliasFanoReader` and `BitVectorReader` held a value of `0`, thus if `0` is present in the list `skipTo(0)` would not update the position to `0`, as it happens with any other `skipTo(<first element>)`. This fixes `jumpTo` accordingly.
- `jump(i + 1)` would go at position `i`. Now `reader.jump(i)`'s postcondition is `reader.position() == i`.
- It is now illegal to retrieve `value()` from a reader in an out-of-bounds position (before-the-begin or past-the-end). Validity of a reader can be checked with `valid()`.

Reviewed By: @philippv

Differential Revision: D2420023
parent fc0470a9
......@@ -121,7 +121,11 @@ struct BitVectorEncoder {
Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
void add(ValueType value) {
CHECK_GE(value, lastValue_);
CHECK_LT(value, std::numeric_limits<ValueType>::max());
// Also works when lastValue_ == -1.
CHECK_GT(value + 1, lastValue_ + 1)
<< "BitVectorCoding only supports stricly monotone lists";
auto block = bits_ + (value / 64) * sizeof(uint64_t);
size_t inner = value % 64;
folly::Bits<folly::Unaligned<uint64_t>>::set(
......@@ -160,7 +164,7 @@ struct BitVectorEncoder {
uint8_t* const skipPointers_ = nullptr;
uint8_t* const forwardPointers_ = nullptr;
ValueType lastValue_ = 0;
ValueType lastValue_ = -1;
size_t size_ = 0;
size_t skipPointersSize_ = 0;
......@@ -265,7 +269,7 @@ class BitVectorReader {
outer_ = 0;
inner_ = -1;
position_ = -1;
value_ = 0;
value_ = kInvalidValue;
}
bool next() {
......@@ -332,11 +336,13 @@ class BitVectorReader {
}
bool skipTo(ValueType v) {
DCHECK_GE(v, value_);
if (v <= value_) {
return true;
} else if (!kUnchecked && v > upperBound_) {
// Also works when value_ == kInvalidValue.
if (v != kInvalidValue) { DCHECK_GE(v + 1, value_ + 1); }
if (!kUnchecked && v > upperBound_) {
return setDone();
} else if (v == value_) {
return true;
}
// Small skip optimization.
......@@ -384,16 +390,19 @@ class BitVectorReader {
size_t size() const { return size_; }
bool valid() const {
return position() < size(); // Also checks that position() != -1.
}
size_t position() const { return position_; }
ValueType value() const { return value_; }
ValueType value() const {
DCHECK(valid());
return value_;
}
bool jump(size_t n) {
reset();
if (n > 0) {
return skip(n);
} else {
return true;
}
return skip(n + 1);
}
bool jumpTo(ValueType v) {
......@@ -402,12 +411,15 @@ class BitVectorReader {
}
bool setDone() {
value_ = std::numeric_limits<ValueType>::max();
value_ = kInvalidValue;
position_ = size_;
return false;
}
private:
constexpr static ValueType kInvalidValue =
std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 == 0.
bool setValue() {
value_ = static_cast<ValueType>(8 * outer_ + inner_);
return true;
......@@ -426,7 +438,7 @@ class BitVectorReader {
size_t inner_;
size_t position_;
uint64_t block_;
ValueType value_ = 0;
ValueType value_;
size_t size_;
ValueType upperBound_;
......
......@@ -149,6 +149,7 @@ struct EliasFanoEncoderV2 {
Layout::fromUpperBoundAndSize(upperBound, size).allocList()) { }
void add(ValueType value) {
CHECK_LT(value, std::numeric_limits<ValueType>::max());
CHECK_GE(value, lastValue_);
const auto numLowerBits = result_.numLowerBits;
......@@ -551,7 +552,7 @@ class EliasFanoReader {
void reset() {
upper_.reset();
value_ = 0;
value_ = kInvalidValue;
}
bool next() {
......@@ -582,11 +583,13 @@ class EliasFanoReader {
}
bool skipTo(ValueType value) {
DCHECK_GE(value, value_);
if (value <= value_) {
return true;
} else if (!kUnchecked && value > lastValue_) {
// Also works when value_ == kInvalidValue.
if (value != kInvalidValue) { DCHECK_GE(value + 1, value_ + 1); }
if (!kUnchecked && value > lastValue_) {
return setDone();
} else if (value == value_) {
return true;
}
size_t upperValue = (value >> numLowerBits_);
......@@ -606,21 +609,15 @@ class EliasFanoReader {
}
bool jump(size_t n) {
if (LIKELY(n - 1 < size_)) { // n > 0 && n <= size_
value_ = readLowerPart(n - 1) | (upper_.jump(n) << numLowerBits_);
return true;
} else if (n == 0) {
reset();
if (LIKELY(n < size_)) { // Also checks that n != -1.
value_ = readLowerPart(n) | (upper_.jump(n + 1) << numLowerBits_);
return true;
}
return setDone();
}
bool jumpTo(ValueType value) {
if (value <= 0) {
reset();
return true;
} else if (!kUnchecked && value > lastValue_) {
if (!kUnchecked && value > lastValue_) {
return setDone();
}
......@@ -638,12 +635,22 @@ class EliasFanoReader {
size_t size() const { return size_; }
bool valid() const {
return position() < size(); // Also checks that position() != -1.
}
size_t position() const { return upper_.position(); }
ValueType value() const { return value_; }
ValueType value() const {
DCHECK(valid());
return value_;
}
private:
constexpr static ValueType kInvalidValue =
std::numeric_limits<ValueType>::max(); // Must hold kInvalidValue + 1 == 0.
bool setDone() {
value_ = std::numeric_limits<ValueType>::max();
value_ = kInvalidValue;
upper_.setDone(size_);
return false;
}
......@@ -671,7 +678,7 @@ class EliasFanoReader {
const uint8_t* lower_;
detail::UpperBitsReader<Encoder, Instructions> upper_;
const ValueType lowerMask_;
ValueType value_ = 0;
ValueType value_ = kInvalidValue;
ValueType lastValue_;
uint8_t numLowerBits_;
};
......
......@@ -91,15 +91,17 @@ auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i)
template <class Reader, class List>
void testNext(const std::vector<uint32_t>& data, const List& list) {
Reader reader(list);
EXPECT_EQ(reader.value(), 0);
EXPECT_FALSE(reader.valid());
for (size_t i = 0; i < data.size(); ++i) {
EXPECT_TRUE(reader.next());
EXPECT_TRUE(reader.valid());
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
maybeTestPreviousValue(data, reader, i);
}
EXPECT_FALSE(reader.next());
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
}
......@@ -108,15 +110,16 @@ void testSkip(const std::vector<uint32_t>& data, const List& list,
size_t skipStep) {
CHECK_GT(skipStep, 0);
Reader reader(list);
EXPECT_EQ(reader.value(), 0);
for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
EXPECT_TRUE(reader.skip(skipStep));
EXPECT_TRUE(reader.valid());
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
maybeTestPreviousValue(data, reader, i);
}
EXPECT_FALSE(reader.skip(skipStep));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
......@@ -135,9 +138,7 @@ template <class Reader, class List>
void testSkipTo(const std::vector<uint32_t>& data, const List& list,
size_t skipToStep) {
CHECK_GT(skipToStep, 0);
Reader reader(list);
EXPECT_EQ(reader.value(), 0);
const uint32_t delta = std::max<uint32_t>(1, data.back() / skipToStep);
uint32_t value = delta;
......@@ -149,11 +150,13 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list,
break;
}
EXPECT_TRUE(reader.skipTo(value));
EXPECT_TRUE(reader.valid());
EXPECT_EQ(reader.value(), *it);
EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
value = reader.value() + delta;
maybeTestPreviousValue(data, reader, std::distance(data.begin(), it));
}
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
......@@ -168,9 +171,26 @@ void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
}
testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max());
{
// Skip to the first element.
Reader reader(list);
EXPECT_TRUE(reader.skipTo(data[0]));
EXPECT_EQ(reader.value(), data[0]);
EXPECT_EQ(reader.position(), 0);
}
{
// Skip past the last element.
Reader reader(list);
EXPECT_FALSE(reader.skipTo(data.back() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
{
// Skip to maximum integer.
Reader reader(list);
using ValueType = typename Reader::ValueType;
EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
......@@ -189,16 +209,14 @@ void testJump(const std::vector<uint32_t>& data, const List& list) {
}
Reader reader(list);
EXPECT_TRUE(reader.jump(0));
EXPECT_EQ(reader.value(), 0);
for (auto i : is) {
EXPECT_TRUE(reader.jump(i + 1));
EXPECT_TRUE(reader.jump(i));
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
maybeTestPreviousValue(data, reader, i);
}
EXPECT_FALSE(reader.jump(data.size() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.jump(data.size()));
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
}
......@@ -221,21 +239,15 @@ void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
}
EXPECT_TRUE(reader.jumpTo(0));
EXPECT_EQ(reader.value(), 0);
EXPECT_EQ(reader.position(), -1);
if (data.front() > 0) {
EXPECT_TRUE(reader.jumpTo(1));
EXPECT_EQ(reader.value(), data.front());
EXPECT_EQ(reader.value(), data[0]);
EXPECT_EQ(reader.position(), 0);
}
EXPECT_TRUE(reader.jumpTo(data.back()));
EXPECT_EQ(reader.value(), data.back());
EXPECT_EQ(reader.position(), reader.size() - 1);
EXPECT_FALSE(reader.jumpTo(data.back() + 1));
EXPECT_EQ(reader.value(), std::numeric_limits<uint32_t>::max());
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
}
......@@ -252,7 +264,7 @@ void testEmpty() {
Reader reader(list);
EXPECT_FALSE(reader.skip(1));
EXPECT_FALSE(reader.skip(10));
EXPECT_FALSE(reader.jump(1));
EXPECT_FALSE(reader.jump(0));
EXPECT_FALSE(reader.jump(10));
}
{
......
......@@ -43,6 +43,7 @@ class EliasFanoCodingTest : public ::testing::Test {
typedef EliasFanoEncoderV2<
uint32_t, uint32_t, kSkipQuantum, kForwardQuantum> Encoder;
typedef EliasFanoReader<Encoder, instructions::EF_TEST_ARCH> Reader;
testAll<Reader, Encoder>({0});
testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment