Commit 446ac5fa authored by Giuseppe Ottaviano's avatar Giuseppe Ottaviano Committed by Facebook GitHub Bot

Improve test coverage of EliasFanoCoding

Summary:
Add tests where the list size is close or at the maximum supported size, by fixing `EliasFanoReader` to support `uint16_t` as value and size type.

Types smaller than `int` require special support because any arithmetic operation promotes them to `int`, so operations that rely on unsigned overflow break. Work around this by introducing a helper for addition that preserves the type.

Reviewed By: philippv, luciang

Differential Revision: D32130076

fbshipit-source-id: 763a8e4cc2b023405c173316d2415ca8c3ec7148
parent 75752063
......@@ -177,6 +177,7 @@ struct EliasFanoEncoderV2 {
if constexpr (forwardQuantum != 0) {
if ((size_ + 1) % forwardQuantum == 0) {
DCHECK_LE(upperBits, std::numeric_limits<SkipValueType>::max());
const auto k = size_ / forwardQuantum;
// Store the number of preceding 0-bits.
forwardPointers_[k] = upperBits;
......@@ -199,6 +200,7 @@ struct EliasFanoEncoderV2 {
private:
void fillSkipPointersUpTo(ValueType fillBoundary) {
if constexpr (skipQuantum != 0) {
DCHECK_LE(size_, std::numeric_limits<SkipValueType>::max());
// The first skip pointer is omitted (it would always be 0), so the
// calculation is shifted by 1.
while ((skipPointersSize_ + 1) * skipQuantum <= fillBoundary) {
......@@ -257,6 +259,7 @@ struct EliasFanoEncoderV2<
// *** Validity checks.
// Shift by numLowerBits must be valid.
CHECK_LT(static_cast<int>(numLowerBits), 8 * sizeof(Value));
// max() - 1 is reserved.
CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
CHECK_LE(
upperBound >> numLowerBits, std::numeric_limits<SkipValueType>::max());
......@@ -354,6 +357,15 @@ struct EliasFanoEncoderV2<
namespace detail {
// Add a and b in the domain of T. This guarantees that if T is a sub-int type,
// we cast away the promotion to int, so that unsigned overflow and underflow
// work as expected.
template <class T, class U>
FOLLY_ALWAYS_INLINE T addT(T a, U b) {
static_assert(std::is_unsigned_v<T>);
return static_cast<T>(a + static_cast<T>(b));
}
template <class Encoder, class Instructions, class SizeType>
class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
SkipPointers<Encoder::skipQuantum> {
......@@ -384,7 +396,8 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
FOLLY_ALWAYS_INLINE ValueType previous() {
size_t inner;
block_t block;
getPreviousInfo(block, inner, outer_);
DCHECK_GE(outer_, 0);
getPreviousInfo(block, inner, outer_); // Updates outer_.
block_ = loadUnaligned<block_t>(start_ + outer_);
block_ ^= block;
--position_;
......@@ -445,7 +458,10 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
// Use skip pointer.
if constexpr (Encoder::skipQuantum > 0) {
if (UNLIKELY(v >= value_ + Encoder::skipQuantum)) {
// NOTE: The addition can overflow here, but that means value_ is within
// skipQuantum_ distance from the maximum representable value, and thus
// the last value, so the comparison is still correct.
if (UNLIKELY(v >= addT(value_, Encoder::skipQuantum))) {
const size_t steps = v / Encoder::skipQuantum;
const size_t dest = loadUnaligned<SkipValueType>(
this->skipPointers_ + (steps - 1) * sizeof(SkipValueType));
......@@ -496,7 +512,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
auto position = position_;
if constexpr (Encoder::skipQuantum > 0) {
if (v >= value_ + Encoder::skipQuantum) {
if (v >= addT(value_, Encoder::skipQuantum)) {
auto outer = outer_;
const size_t steps = v / Encoder::skipQuantum;
const size_t dest = loadUnaligned<SkipValueType>(
......@@ -597,17 +613,19 @@ template <
class Encoder,
class Instructions = instructions::Default,
bool kUnchecked = false,
class SizeType = typename Encoder::SkipValueType>
class SizeT = typename Encoder::SkipValueType>
class EliasFanoReader {
public:
using EncoderType = Encoder;
using ValueType = typename Encoder::ValueType;
using SizeType = SizeT;
explicit EliasFanoReader(const typename Encoder::CompressedList& list)
: upper_(list),
lower_(list.lower),
size_(list.size),
numLowerBits_(list.numLowerBits) {
DCHECK_LE(list.size, std::numeric_limits<SizeType>::max());
DCHECK(Instructions::supported());
// To avoid extra branching during skipTo() while reading
// upper sequence we need to know the last element.
......@@ -635,34 +653,32 @@ class EliasFanoReader {
return false;
}
upper_.previous();
value_ =
readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_);
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
return true;
}
bool next() {
if (!kUnchecked && UNLIKELY(position() + 1 >= size_)) {
if (!kUnchecked && UNLIKELY(detail::addT(position(), 1) >= size_)) {
return setDone();
}
upper_.next();
value_ =
readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_);
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
return true;
}
/**
* Advances by n elements. n = 0 is allowed and has no effect. Returns false
* if the end of the list is reached.
* if the end of the list is reached. position() + n must be representable by
* SizeType.
*/
bool skip(SizeType n) {
if (n == 0) {
return valid();
}
if (kUnchecked || LIKELY(position() + n < size_)) {
if (kUnchecked || LIKELY(detail::addT(position(), n) < size_)) {
upper_.skip(n);
value_ =
readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_);
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
return true;
}
......@@ -675,8 +691,8 @@ class EliasFanoReader {
* at position -1). Returns false if no such element exists.
*/
bool skipTo(ValueType value) {
if (value != kInvalidValue) {
DCHECK_GE(value + 1, value_ + 1);
if (value_ != kInvalidValue) {
DCHECK_GE(value, value_);
}
if (!kUnchecked && UNLIKELY(value > lastValue_)) {
......@@ -696,9 +712,8 @@ class EliasFanoReader {
* upper and lower bits.
*/
void prepareSkipTo(ValueType value) const {
// Also works when value_ == kInvalidValue.
if (value != kInvalidValue) {
DCHECK_GE(value + 1, value_ + 1);
if (value_ != kInvalidValue) {
DCHECK_GE(value, value_);
}
if ((!kUnchecked && value > lastValue_) || (value == value_)) {
......@@ -719,11 +734,12 @@ class EliasFanoReader {
* false if n >= size().
*/
bool jump(SizeType n) {
if (n + 1 < upper_.position() + 1) { // Also works if position() == -1.
// Also works if position() == -1.
if (detail::addT(n, 1) < detail::addT(position(), 1)) {
reset();
n += 1; // Initial position is -1.
} else {
n -= upper_.position();
n -= position();
}
return skip(n);
}
......@@ -745,7 +761,7 @@ class EliasFanoReader {
// iterating backwards to its first element.
auto valueLower = Instructions::bzhi(value_, numLowerBits_);
while (!upper_.isAtBeginningOfRun() &&
readLowerPart(upper_.position() - 1) == valueLower) {
readLowerPart(position() - 1) == valueLower) {
upper_.previous();
}
return true;
......@@ -768,7 +784,7 @@ class EliasFanoReader {
ValueType previousValue() const {
DCHECK_GT(position(), 0);
DCHECK_LT(position(), size());
return readLowerPart(upper_.position() - 1) |
return readLowerPart(position() - 1) |
(upper_.previousValue() << numLowerBits_);
}
......@@ -808,8 +824,7 @@ class EliasFanoReader {
FOLLY_ALWAYS_INLINE void iterateTo(ValueType value) {
while (true) {
value_ =
readLowerPart(upper_.position()) | (upper_.value() << numLowerBits_);
value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
if (LIKELY(value_ >= value)) {
break;
}
......
......@@ -130,15 +130,15 @@ void testNext(const std::vector<uint64_t>& data, const List& list) {
EXPECT_FALSE(reader.valid());
for (size_t i = 0; i < data.size(); ++i) {
EXPECT_TRUE(reader.next());
EXPECT_TRUE(reader.valid());
EXPECT_EQ(reader.value(), data[i]);
EXPECT_EQ(reader.position(), i);
EXPECT_TRUE(reader.next()) << i << " " << data.size();
EXPECT_TRUE(reader.valid()) << i << " " << data.size();
EXPECT_EQ(reader.value(), data[i]) << i << " " << data.size();
EXPECT_EQ(reader.position(), i) << i << " " << data.size();
maybeTestPreviousValue(data, reader, i);
maybeTestPrevious(data, reader, i);
}
EXPECT_FALSE(reader.next());
EXPECT_FALSE(reader.valid());
EXPECT_FALSE(reader.next()) << data.size();
EXPECT_FALSE(reader.valid()) << data.size();
EXPECT_EQ(reader.position(), reader.size());
}
......@@ -149,6 +149,11 @@ void testSkip(
Reader reader(list);
for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
// Destination must be representable.
if (i + skipStep > std::numeric_limits<typename Reader::SizeType>::max()) {
return;
}
// Also test that skip(0) stays in place.
for (auto step : {skipStep, size_t(0)}) {
EXPECT_TRUE(reader.skip(step));
......@@ -357,6 +362,8 @@ void testEmpty() {
template <class Reader, class Encoder>
void testAll(const std::vector<uint64_t>& data) {
SCOPED_TRACE(__PRETTY_FUNCTION__);
auto list = Encoder::encode(data.begin(), data.end());
testNext<Reader>(data, list);
testSkip<Reader>(data, list);
......
......@@ -85,8 +85,8 @@ TEST(EliasFanoCoding, defaultNumLowerBits) {
class EliasFanoCodingTest : public ::testing::Test {
public:
void doTestEmpty() {
typedef EliasFanoEncoderV2<uint32_t, size_t> Encoder;
typedef EliasFanoReader<Encoder> Reader;
using Encoder = EliasFanoEncoderV2<uint32_t, size_t>;
using Reader = EliasFanoReader<Encoder>;
testEmpty<Reader, Encoder>();
}
......@@ -97,13 +97,12 @@ class EliasFanoCodingTest : public ::testing::Test {
typename SkipValueType,
bool kUpperFirst>
void doTest() {
typedef EliasFanoEncoderV2<
using Encoder = EliasFanoEncoderV2<
ValueType,
SkipValueType,
kSkipQuantum,
kForwardQuantum,
kUpperFirst>
Encoder;
kUpperFirst>;
using Reader = EliasFanoReader<Encoder, instructions::Default, false>;
testAll<Reader, Encoder>({0});
testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
......@@ -117,18 +116,39 @@ class EliasFanoCodingTest : public ::testing::Test {
template <size_t kSkipQuantum, size_t kForwardQuantum, typename ValueType>
void doTestAll() {
// Note: SkipValue of uint8/16 aren't well-supported. For now we only test
// ValueType for uint32/64.
static_assert(
std::is_same<ValueType, uint32_t>::value ||
std::is_same<ValueType, uint64_t>::value);
// TODO(ott): improve test coverage for SkipValue of uint8/16 once such
// types are well supported.
doTest<kSkipQuantum, kForwardQuantum, ValueType, uint32_t, true>();
doTest<kSkipQuantum, kForwardQuantum, ValueType, uint32_t, false>();
doTest<kSkipQuantum, kForwardQuantum, ValueType, uint64_t, true>();
doTest<kSkipQuantum, kForwardQuantum, ValueType, uint64_t, false>();
}
// Test lists where values and sizes are close to the numeric limits of the
// corresponding types, by using 16-bit types for everything.
template <size_t kSkipQuantum, size_t kForwardQuantum, bool kUpperFirst>
void doTestDense() {
using Encoder = EliasFanoEncoderV2<
uint16_t,
uint16_t,
kSkipQuantum,
kForwardQuantum,
kUpperFirst>;
using Reader = EliasFanoReader<Encoder, instructions::Default, false>;
constexpr size_t kMaxU16 = std::numeric_limits<uint16_t>::max();
// kMaxU16 is reserved for both value and size.
testAll<Reader, Encoder>(generateSeqList(1, kMaxU16 - 1));
// Test various sizes close to the limit.
for (size_t i = 1; i <= 16; ++i) {
testAll<Reader, Encoder>(generateRandomList(
kMaxU16 - i, kMaxU16 - 1, /* withDuplicates */ true));
}
}
template <size_t kSkipQuantum, size_t kForwardQuantum>
void doTestDenseAll() {
doTestDense<kSkipQuantum, kForwardQuantum, true>();
doTestDense<kSkipQuantum, kForwardQuantum, false>();
}
};
TEST_F(EliasFanoCodingTest, Empty) {
......@@ -138,21 +158,25 @@ TEST_F(EliasFanoCodingTest, Empty) {
TEST_F(EliasFanoCodingTest, Simple) {
doTestAll<0, 0, uint32_t>();
doTestAll<0, 0, uint64_t>();
doTestDenseAll<0, 0>();
}
TEST_F(EliasFanoCodingTest, SkipPointers) {
doTestAll<128, 0, uint32_t>();
doTestAll<128, 0, uint64_t>();
doTestDenseAll<128, 0>();
}
TEST_F(EliasFanoCodingTest, ForwardPointers) {
doTestAll<0, 128, uint32_t>();
doTestAll<0, 128, uint64_t>();
doTestDenseAll<0, 128>();
}
TEST_F(EliasFanoCodingTest, SkipForwardPointers) {
doTestAll<128, 128, uint32_t>();
doTestAll<128, 128, uint64_t>();
doTestDenseAll<128, 128>();
}
TEST_F(EliasFanoCodingTest, BugLargeGapInUpperBits) { // t16274876
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment