Commit c9c55642 authored by Swaroop Manjunath's avatar Swaroop Manjunath Committed by Facebook GitHub Bot

Eliminate need for reading upper bound at construction.

Summary:
# Note: This is a resubmission after reverting this change.
The failing `DCHECK` has been fixed with an explicit cast to `size_t` to accommodate long lists.

# Summary

This diff introduces the following changes to the EliasFanoReader implementation.

- `EliasFanoReader` no longer requires knowledge of the last value in the list at construction time. This removes the need to access the last byte of the encoded list.
- Operations in `UpperBitsReader` are now responsible for ensuring validity.
- Removes constraint that the last set bit in upperBits must be in the last byte of the encoded list.

In addition, the diff also extends the unit-tests for Elias Fano coding to add arbitrary extensions to the upperBound at construction to ensure that additional 0-blocks at the end of the list do not affect the behavior of the reader.

Reviewed By: ot, luciang

Differential Revision: D22211304

fbshipit-source-id: 0dbe904c9fd8cd9568a480355e5e6a4525922966
parent 01da43c2
This diff is collapsed.
...@@ -35,6 +35,11 @@ ...@@ -35,6 +35,11 @@
namespace folly { namespace folly {
namespace compression { namespace compression {
template <typename ValueType, class List>
folly::Optional<std::size_t> getUniverseUpperBound(const List& /* list */) {
return folly::none;
}
template <class URNG> template <class URNG>
std::vector<uint64_t> generateRandomList( std::vector<uint64_t> generateRandomList(
size_t n, size_t n,
...@@ -268,6 +273,16 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) { ...@@ -268,6 +273,16 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) {
EXPECT_EQ(reader.position(), reader.size()); EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next()); EXPECT_FALSE(reader.next());
} }
// Skip past the last element and before the upperBound.
using ValueType = typename Reader::ValueType;
if (const auto upperBound = getUniverseUpperBound<ValueType>(list);
upperBound && *upperBound != data.back()) {
Reader reader(list);
EXPECT_FALSE(reader.skipTo(*upperBound));
EXPECT_FALSE(reader.valid());
EXPECT_EQ(reader.position(), reader.size());
EXPECT_FALSE(reader.next());
}
} }
template <class Reader, class List> template <class Reader, class List>
...@@ -362,9 +377,19 @@ void testEmpty() { ...@@ -362,9 +377,19 @@ void testEmpty() {
} }
} }
// `upperBoundExtension` is required to inject additional 0-blocks
// at the end of the list. This allows us to test lists with a large gap between
// last element and universe upper bound, to exercise bounds-checking when
// skipping past the last element
template <class Reader, class Encoder> template <class Reader, class Encoder>
void testAll(const std::vector<uint64_t>& data) { void testAll(
auto list = Encoder::encode(data.begin(), data.end()); const std::vector<uint64_t>& data,
uint64_t upperBoundExtension = 0) {
Encoder encoder(data.size(), data.back() + upperBoundExtension);
for (const auto value : data) {
encoder.add(value);
}
auto list = encoder.finish();
testNext<Reader>(data, list); testNext<Reader>(data, list);
testSkip<Reader>(data, list); testSkip<Reader>(data, list);
testSkipTo<Reader>(data, list); testSkipTo<Reader>(data, list);
......
...@@ -21,11 +21,32 @@ ...@@ -21,11 +21,32 @@
#include <vector> #include <vector>
#include <folly/Benchmark.h> #include <folly/Benchmark.h>
#include <folly/Optional.h>
#include <folly/Random.h>
#include <folly/experimental/EliasFanoCoding.h> #include <folly/experimental/EliasFanoCoding.h>
#include <folly/experimental/Select64.h> #include <folly/experimental/Select64.h>
#include <folly/experimental/test/CodingTestUtils.h> #include <folly/experimental/test/CodingTestUtils.h>
#include <folly/init/Init.h> #include <folly/init/Init.h>
namespace folly {
namespace compression {
// Overload to help CodingTestUtils retrieve the universe upperbound
// of the list for certain test cases.
template <typename ValueType, typename T>
folly::Optional<std::size_t> getUniverseUpperBound(
const EliasFanoCompressedListBase<T>& list) {
constexpr ValueType maxUpperValue = std::numeric_limits<ValueType>::max();
const ValueType maxUpperBits = maxUpperValue >> list.numLowerBits;
const ValueType upperBitsUniverse = std::min(
static_cast<ValueType>(8 * list.upperSizeBytes - list.size),
maxUpperBits);
return (upperBitsUniverse << list.numLowerBits) |
((1 << list.numLowerBits) - 1);
}
} // namespace compression
} // namespace folly
using namespace folly::compression; using namespace folly::compression;
namespace { namespace {
...@@ -114,6 +135,13 @@ class EliasFanoCodingTest : public ::testing::Test { ...@@ -114,6 +135,13 @@ class EliasFanoCodingTest : public ::testing::Test {
// max() cannot be read, as it is assumed an invalid value. // max() cannot be read, as it is assumed an invalid value.
// TODO(ott): It should be possible to lift this constraint. // TODO(ott): It should be possible to lift this constraint.
testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max() - 1}); testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max() - 1});
// Test data with additional trailing 0s in the upperBits by extending
// the upper bound.
constexpr uint64_t minUpperBoundExtension = 2;
constexpr uint64_t maxUpperBoundExtension = 1024;
testAll<Reader, Encoder>(
generateRandomList(100 * 1000, 10 * 1000 * 1000),
folly::Random::rand32(minUpperBoundExtension, maxUpperBoundExtension));
} }
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment