Do not read the end of the list on EliasFanoReader construction

Summary: Move size and upper bound accounting to `UpperBitsReader`, so we don't need to read the last value on construction, which can be expensive when opening a large number of small lists. This also makes the maximum `ValueType` representable, and lifts the requirement that the provided upper bound at construction time must be equal to the last element in the list, allowing multiple lists to share the encoding of the upper bound. Based on initial work by swaroopnm. Reviewed By: philippv Differential Revision: D32130075 fbshipit-source-id: e98a053b46b10c435ac0d402ff94a56fcfe095ad

Do not read the end of the list on EliasFanoReader construction
Summary: Move size and upper bound accounting to `UpperBitsReader`, so we don't need to read the last value on construction, which can be expensive when opening a large number of small lists. This also makes the maximum `ValueType` representable, and lifts the requirement that the provided upper bound at construction time must be equal to the last element in the list, allowing multiple lists to share the encoding of the upper bound. Based on initial work by swaroopnm. Reviewed By: philippv Differential Revision: D32130075 fbshipit-source-id: e98a053b46b10c435ac0d402ff94a56fcfe095ad
928aa5de · Giuseppe Ottaviano · Facebook GitHub Bot · 446ac5fa · 928aa5de · 928aa5de
Commit 928aa5de authored Nov 04, 2021 by Giuseppe Ottaviano Committed by Facebook GitHub Bot Nov 04, 2021
3 changed files
--- a/folly/experimental/EliasFanoCoding.h
+++ b/folly/experimental/EliasFanoCoding.h
@@ -158,7 +158,6 @@ struct EliasFanoEncoderV2 {
            Layout::fromUpperBoundAndSize(upperBound, size).allocList()) {}
  void add(ValueType value) {
-    CHECK_LT(value, std::numeric_limits<ValueType>::max());
    CHECK_GE(value, lastValue_);
    const auto numLowerBits = result_.numLowerBits;
@@ -259,8 +258,6 @@ struct EliasFanoEncoderV2<
    // *** Validity checks.
    // Shift by numLowerBits must be valid.
    CHECK_LT(static_cast<int>(numLowerBits), 8 * sizeof(Value));
-    // max() - 1 is reserved.
-    CHECK_LT(size, std::numeric_limits<SkipValueType>::max());
    CHECK_LE(
        upperBound >> numLowerBits, std::numeric_limits<SkipValueType>::max());
@@ -366,25 +363,32 @@ FOLLY_ALWAYS_INLINE T addT(T a, U b) {
  return static_cast<T>(a + static_cast<T>(b));
 }
-template <class Encoder, class Instructions, class SizeType>
+template <
+    class Encoder,
+    class Instructions,
+    class SizeType,
+    bool kUnchecked = false>
 class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
                        SkipPointers<Encoder::skipQuantum> {
  using SkipValueType = typename Encoder::SkipValueType;
 public:
  using ValueType = typename Encoder::ValueType;
+  static constexpr SizeType kBeforeFirstPos = -1;
  explicit UpperBitsReader(const typename Encoder::CompressedList& list)
      : ForwardPointers<Encoder::forwardQuantum>(list.forwardPointers),
        SkipPointers<Encoder::skipQuantum>(list.skipPointers),
-        start_(list.upper) {
+        start_(list.upper),
+        size_(list.size),
+        upperBound_(estimateUpperBound(list)) {
    reset();
  }
  void reset() {
    // Pretend the bitvector is prefixed by a block of zeroes.
    block_ = 0;
-    position_ = static_cast<SizeType>(-1);
+    position_ = kBeforeFirstPos;
    outer_ = static_cast<OuterType>(-sizeof(block_t));
    value_ = 0;
  }
@@ -393,7 +397,18 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
  FOLLY_ALWAYS_INLINE ValueType value() const { return value_; }
-  FOLLY_ALWAYS_INLINE ValueType previous() {
+  FOLLY_ALWAYS_INLINE bool valid() const {
+    // Also checks that position() != kBeforeFirstPos.
+    return position() < size();
+  }
+  FOLLY_ALWAYS_INLINE SizeType size() const { return size_; }
+  FOLLY_ALWAYS_INLINE bool previous() {
+    if (!kUnchecked && UNLIKELY(position() == 0)) {
+      return false;
+    }
    size_t inner;
    block_t block;
    DCHECK_GE(outer_, 0);
@@ -404,9 +419,13 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
    return setValue(inner);
  }
-  FOLLY_ALWAYS_INLINE ValueType next() {
+  FOLLY_ALWAYS_INLINE bool next() {
+    if (!kUnchecked && UNLIKELY(addT(position(), 1) >= size())) {
+      return setDone();
+    }
    // Skip to the first non-zero block.
-    while (block_ == 0) {
+    while (UNLIKELY(block_ == 0)) {
      outer_ += sizeof(block_t);
      block_ = loadUnaligned<block_t>(start_ + outer_);
    }
@@ -418,8 +437,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
    return setValue(inner);
  }
-  FOLLY_ALWAYS_INLINE ValueType skip(SizeType n) {
+  FOLLY_ALWAYS_INLINE bool skip(SizeType n) {
    DCHECK_GT(n, 0);
+    if (!kUnchecked && UNLIKELY(addT(position_, n) >= size())) {
+      return setDone();
+    }
    position_ += n; // n 1-bits will be read.
@@ -453,8 +475,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
  // Skip to the first element that is >= v and located *after* the current
  // one (so even if current value equals v, position will be increased by 1).
-  FOLLY_ALWAYS_INLINE ValueType skipToNext(ValueType v) {
+  FOLLY_ALWAYS_INLINE bool skipToNext(ValueType v) {
    DCHECK_GE(v, value_);
+    if (!kUnchecked && UNLIKELY(v > upperBound_)) {
+      return setDone();
+    }
    // Use skip pointer.
    if constexpr (Encoder::skipQuantum > 0) {
@@ -466,6 +491,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
        const size_t dest = loadUnaligned<SkipValueType>(
            this->skipPointers_ + (steps - 1) * sizeof(SkipValueType));
+        DCHECK_LE(dest, size());
+        if (!kUnchecked && UNLIKELY(dest == size())) {
+          return setDone();
+        }
        reposition(dest + Encoder::skipQuantum * steps);
        position_ = dest - 1;
@@ -489,6 +519,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
      skip -= cnt;
      position_ += kBitsPerBlock - cnt;
      outer_ += sizeof(block_t);
+      DCHECK_LT(outer_, (static_cast<size_t>(upperBound_) + size() + 7) / 8);
      block_ = loadUnaligned<block_t>(start_ + outer_);
    }
@@ -498,17 +529,24 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
      block_ &= block_t(-1) << inner;
    }
-    next();
+    DCHECK_LT(addT(position(), 1), addT(size(), 1));
-    return value_;
+    return next();
  }
  /**
-   * Prepare to skip to `value`. This is a constant-time operation that will
+   * Try to prepare to skip to value. This is a constant-time operation that
-   * prefetch memory required for a `skipTo(value)` call.
+   * will attempt to prefetch memory required for a subsequent skipTo(value)
+   * call if the value to skip to is within this list.
   *
-   * @return position of reader
+   * Returns:
+   *   {true, position of the reader} if the skip is valid,
+   *   {false, size()} otherwise.
   */
-  FOLLY_ALWAYS_INLINE SizeType prepareSkipTo(ValueType v) const {
+  FOLLY_ALWAYS_INLINE std::pair<bool, SizeType> prepareSkipTo(
+      ValueType v) const {
+    if (!kUnchecked && UNLIKELY(v > upperBound_)) {
+      return std::make_pair(false, size());
+    }
    auto position = position_;
    if constexpr (Encoder::skipQuantum > 0) {
@@ -518,6 +556,11 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
        const size_t dest = loadUnaligned<SkipValueType>(
            this->skipPointers_ + (steps - 1) * sizeof(SkipValueType));
+        DCHECK_LE(dest, size());
+        if (!kUnchecked && UNLIKELY(dest == size())) {
+          return std::make_pair(false, size());
+        }
        position = dest - 1;
        outer = (dest + Encoder::skipQuantum * steps) / 8;
@@ -534,7 +577,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
      }
    }
-    return position;
+    return std::make_pair(true, position);
  }
  FOLLY_ALWAYS_INLINE ValueType previousValue() const {
@@ -548,7 +591,7 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
  // Returns true if we're at the beginning of the list, or previousValue() !=
  // value().
  FOLLY_ALWAYS_INLINE bool isAtBeginningOfRun() const {
-    DCHECK_NE(position(), static_cast<SizeType>(-1));
+    DCHECK_NE(position(), kBeforeFirstPos);
    if (position_ == 0) {
      return true;
    }
@@ -556,31 +599,45 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
    return (start_[bitPos / 8] & (1 << (bitPos % 8))) == 0;
  }
-  FOLLY_ALWAYS_INLINE void setDone(SizeType endPos) { position_ = endPos; }
 private:
  using block_t = uint64_t;
  // The size in bytes of the upper bits is limited by n + universe / 8,
  // so a type that can hold either sizes or values is sufficient.
  using OuterType = typename std::common_type_t<ValueType, SizeType>;
-  FOLLY_ALWAYS_INLINE ValueType setValue(size_t inner) {
+  static ValueType estimateUpperBound(
+      const typename Encoder::CompressedList& list) {
+    size_t upperBound = 8 * list.upperSizeBytes - list.size;
+    // The bitvector is byte-aligned, so we may be overestimating the universe
+    // size. Make sure it fits in ValueType.
+    return static_cast<ValueType>(std::min<size_t>(
+        upperBound,
+        std::numeric_limits<ValueType>::max() >> list.numLowerBits));
+  }
+  FOLLY_ALWAYS_INLINE bool setValue(size_t inner) {
    value_ = static_cast<ValueType>(8 * outer_ + inner - position_);
-    return value_;
+    return true;
+  }
+  FOLLY_ALWAYS_INLINE bool setDone() {
+    position_ = size_;
+    return false;
  }
  // NOTE: dest is a position in the bit vector, use size_t as SizeType may
  // not be sufficient here.
  FOLLY_ALWAYS_INLINE void reposition(size_t dest) {
    outer_ = dest / 8;
+    DCHECK_LT(outer_, (static_cast<size_t>(upperBound_) + size() + 7) / 8);
    block_ = loadUnaligned<block_t>(start_ + outer_);
    block_ &= ~((block_t(1) << (dest % 8)) - 1);
  }
  FOLLY_ALWAYS_INLINE void getPreviousInfo(
      block_t& block, size_t& inner, OuterType& outer) const {
-    DCHECK_NE(position(), std::numeric_limits<SizeType>::max());
    DCHECK_GT(position(), 0);
+    DCHECK_LT(position(), size());
    outer = outer_;
    block = loadUnaligned<block_t>(start_ + outer);
@@ -595,6 +652,8 @@ class UpperBitsReader : ForwardPointers<Encoder::forwardQuantum>,
  }
  const unsigned char* const start_;
+  const SizeType size_; // Size of the list.
+  const ValueType upperBound_; // Upper bound of values in this list.
  block_t block_;
  SizeType position_; // Index of current value (= #reads - 1).
  OuterType outer_; // Outer offset: number of consumed bytes in upper.
@@ -615,55 +674,35 @@ template <
    bool kUnchecked = false,
    class SizeT = typename Encoder::SkipValueType>
 class EliasFanoReader {
+  using UpperBitsReader =
+      detail::UpperBitsReader<Encoder, Instructions, SizeT, kUnchecked>;
 public:
  using EncoderType = Encoder;
  using ValueType = typename Encoder::ValueType;
  using SizeType = SizeT;
  explicit EliasFanoReader(const typename Encoder::CompressedList& list)
-      : upper_(list),
+      : upper_(list), lower_(list.lower), numLowerBits_(list.numLowerBits) {
-        lower_(list.lower),
-        size_(list.size),
-        numLowerBits_(list.numLowerBits) {
    DCHECK_LE(list.size, std::numeric_limits<SizeType>::max());
    DCHECK(Instructions::supported());
-    // To avoid extra branching during skipTo() while reading
-    // upper sequence we need to know the last element.
-    // If kUnchecked == true, we do not check that skipTo() is called
-    // within the bounds, so we can avoid initializing lastValue_.
-    if (kUnchecked || UNLIKELY(list.size == 0)) {
-      lastValue_ = 0;
-      return;
-    }
-    ValueType lastUpperValue = ValueType(8 * list.upperSizeBytes - size_);
-    auto it = list.upper + list.upperSizeBytes - 1;
-    DCHECK_NE(*it, 0);
-    lastUpperValue -= 8 - findLastSet(*it);
-    lastValue_ = readLowerPart(size_ - 1) | (lastUpperValue << numLowerBits_);
  }
-  void reset() {
+  void reset() { upper_.reset(); }
-    upper_.reset();
-    value_ = kInvalidValue;
-  }
  bool previous() {
-    if (!kUnchecked && UNLIKELY(position() == 0)) {
+    if (LIKELY(upper_.previous())) {
-      reset();
+      return setValue(readCurrentValue());
-      return false;
    }
-    upper_.previous();
+    reset();
-    value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
+    return false;
-    return true;
  }
  bool next() {
-    if (!kUnchecked && UNLIKELY(detail::addT(position(), 1) >= size_)) {
+    if (LIKELY(upper_.next())) {
-      return setDone();
+      return setValue(readCurrentValue());
    }
-    upper_.next();
+    return false;
-    value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
-    return true;
  }
  /**
@@ -675,36 +714,39 @@ class EliasFanoReader {
    if (n == 0) {
      return valid();
    }
+    if (!upper_.skip(n)) {
-    if (kUnchecked || LIKELY(detail::addT(position(), n) < size_)) {
+      return false;
-      upper_.skip(n);
-      value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
-      return true;
    }
+    return setValue(readCurrentValue());
-    return setDone();
  }
  /**
   * Skips to the first element >= value whose position is greater or equal to
-   * the current position. Requires that value >= value() (or that the reader is
+   * the current position.
-   * at position -1). Returns false if no such element exists.
+   * Requires that value >= value() (or that the reader is positioned before the
+   * first element). Returns false if no such element exists.
   */
  bool skipTo(ValueType value) {
-    if (value_ != kInvalidValue) {
+    if (valid()) {
      DCHECK_GE(value, value_);
+      if (UNLIKELY(value == value_)) {
+        return true;
+      }
    }
-    if (!kUnchecked && UNLIKELY(value > lastValue_)) {
+    ValueType upperValue = value >> numLowerBits_;
-      return setDone();
-    } else if (UNLIKELY(value == value_)) {
+    if (UNLIKELY(!upper_.skipToNext(upperValue))) {
-      return true;
+      return false;
    }
-    ValueType upperValue = value >> numLowerBits_;
+    do {
-    upper_.skipToNext(upperValue);
+      if (auto cur = readCurrentValue(); LIKELY(cur >= value)) {
-    iterateTo(value);
+        return setValue(cur);
-    return true;
+      }
+    } while (LIKELY(upper_.next()));
+    return false;
  }
  /**
@@ -712,18 +754,20 @@ class EliasFanoReader {
   * upper and lower bits.
   */
  void prepareSkipTo(ValueType value) const {
-    if (value_ != kInvalidValue) {
+    if (valid()) {
      DCHECK_GE(value, value_);
-    }
+      if (UNLIKELY(value == value_)) {
+        return;
-    if ((!kUnchecked && value > lastValue_) || (value == value_)) {
+      }
-      return;
    }
    // Do minimal computation required to prefetch address used in
    // `readLowerPart()`.
-    ValueType upperValue = (value >> numLowerBits_);
+    ValueType upperValue = value >> numLowerBits_;
-    const auto upperPosition = upper_.prepareSkipTo(upperValue);
+    const auto [valid, upperPosition] = upper_.prepareSkipTo(upperValue);
+    if (!valid) {
+      return;
+    }
    const auto addr = lower_ + (upperPosition * numLowerBits_ / 8);
    __builtin_prefetch(addr);
    __builtin_prefetch(addr + kCacheLineSize);
@@ -752,7 +796,7 @@ class EliasFanoReader {
   * assumeDistinct = true can enable some optimizations.
   */
  bool jumpTo(ValueType value, bool assumeDistinct = false) {
-    if (value == value_) {
+    if (valid() && value == value_) {
      if (assumeDistinct == true) {
        return true;
      }
@@ -769,18 +813,13 @@ class EliasFanoReader {
    // We need to reset if we're not in the initial state and the jump is
    // backwards.
-    if (position() != static_cast<SizeType>(-1) &&
+    if (position() != UpperBitsReader::kBeforeFirstPos &&
-        value < value_) { // If position() == size() value_ is kInvalidValue.
+        (position() == size() || value < value_)) {
      reset();
    }
    return skipTo(value);
  }
-  ValueType lastValue() const {
-    CHECK(!kUnchecked);
-    return lastValue_;
-  }
  ValueType previousValue() const {
    DCHECK_GT(position(), 0);
    DCHECK_LT(position(), size());
@@ -788,11 +827,9 @@ class EliasFanoReader {
        (upper_.previousValue() << numLowerBits_);
  }
-  SizeType size() const { return size_; }
+  SizeType size() const { return upper_.size(); }
-  bool valid() const {
+  bool valid() const { return upper_.valid(); }
-    return position() < size(); // Also checks that position() != -1.
-  }
  SizeType position() const { return upper_.position(); }
@@ -802,17 +839,14 @@ class EliasFanoReader {
  }
 private:
-  // Must hold kInvalidValue + 1 == 0.
+  FOLLY_ALWAYS_INLINE bool setValue(ValueType value) {
-  constexpr static ValueType kInvalidValue = -1;
+    DCHECK(valid());
+    value_ = value;
-  FOLLY_ALWAYS_INLINE bool setDone() {
+    return true;
-    value_ = kInvalidValue;
-    upper_.setDone(size_);
-    return false;
  }
  FOLLY_ALWAYS_INLINE ValueType readLowerPart(SizeType i) const {
-    DCHECK_LT(i, size_);
+    DCHECK_LT(i, size());
    const size_t pos = i * numLowerBits_;
    const unsigned char* ptr = lower_ + (pos / 8);
    const uint64_t ptrv = loadUnaligned<uint64_t>(ptr);
@@ -822,22 +856,15 @@ class EliasFanoReader {
    return Instructions::bzhi(ptrv >> (pos % 8), numLowerBits_);
  }
-  FOLLY_ALWAYS_INLINE void iterateTo(ValueType value) {
+  FOLLY_ALWAYS_INLINE ValueType readCurrentValue() {
-    while (true) {
+    return readLowerPart(position()) | (upper_.value() << numLowerBits_);
-      value_ = readLowerPart(position()) | (upper_.value() << numLowerBits_);
-      if (LIKELY(value_ >= value)) {
-        break;
-      }
-      upper_.next();
-    }
  }
-  detail::UpperBitsReader<Encoder, Instructions, SizeType> upper_;
+  // Ordering of fields is counter-intutive but it optimizes the layout.
-  const uint8_t* lower_;
+  UpperBitsReader upper_;
-  SizeType size_;
+  const uint8_t* const lower_;
-  ValueType value_ = kInvalidValue;
+  ValueType value_;
-  ValueType lastValue_;
+  const uint8_t numLowerBits_;
-  uint8_t numLowerBits_;
 };
 } // namespace compression

--- a/folly/experimental/test/CodingTestUtils.h
+++ b/folly/experimental/test/CodingTestUtils.h
@@ -249,22 +249,30 @@ void testSkipTo(const std::vector<uint64_t>& data, const List& list) {
    EXPECT_EQ(reader.value(), data[0]);
    EXPECT_EQ(reader.position(), 0);
  }
-  {
-    // Skip past the last element.
+  // Skip past the last element, when possible. Make sure to probe values far
-    Reader reader(list);
+  // from the last element, as the reader implementation may keep an internal
-    EXPECT_FALSE(reader.skipTo(data.back() + 1));
+  // upper bound larger than that, and we need to make sure we exercise skipping
-    EXPECT_FALSE(reader.valid());
+  // both before and after that.
-    EXPECT_EQ(reader.position(), reader.size());
+  using ValueType = typename Reader::ValueType;
-    EXPECT_FALSE(reader.next());
+  std::vector<ValueType> valuesPastTheEnd;
+  const auto lastValue = data.back();
+  const auto kMaxValue = std::numeric_limits<ValueType>::max();
+  // Keep doubling the distance from the last value until we overflow.
+  for (ValueType value = lastValue + 1; value > lastValue;
+       value += value - lastValue) {
+    valuesPastTheEnd.push_back(value);
  }
-  {
+  if (kMaxValue != lastValue) {
-    // Skip to maximum integer.
+    valuesPastTheEnd.push_back(kMaxValue);
+  }
+  for (auto value : valuesPastTheEnd) {
    Reader reader(list);
-    using ValueType = typename Reader::ValueType;
+    EXPECT_FALSE(reader.skipTo(value)) << value << " " << lastValue;
-    EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
+    EXPECT_FALSE(reader.valid()) << value << " " << lastValue;
-    EXPECT_FALSE(reader.valid());
+    EXPECT_EQ(reader.position(), reader.size()) << value << " " << lastValue;
-    EXPECT_EQ(reader.position(), reader.size());
+    EXPECT_FALSE(reader.next()) << value << " " << lastValue;
-    EXPECT_FALSE(reader.next());
  }
 }
@@ -284,9 +292,9 @@ void testJump(const std::vector<uint64_t>& data, const List& list) {
  for (auto i : is) {
    // Also test idempotency.
    for (size_t round = 0; round < 2; ++round) {
-      EXPECT_TRUE(reader.jump(i));
+      EXPECT_TRUE(reader.jump(i)) << i << " " << data.size();
-      EXPECT_EQ(reader.value(), data[i]);
+      EXPECT_EQ(reader.value(), data[i]) << i << " " << data.size();
-      EXPECT_EQ(reader.position(), i);
+      EXPECT_EQ(reader.position(), i) << i << " " << data.size();
    }
    maybeTestPreviousValue(data, reader, i);
    maybeTestPrevious(data, reader, i);
@@ -332,9 +340,11 @@ void testJumpTo(const std::vector<uint64_t>& data, const List& list) {
    EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
  }
-  EXPECT_FALSE(reader.jumpTo(data.back() + 1));
+  if (data.back() != std::numeric_limits<ValueType>::max()) {
-  EXPECT_FALSE(reader.valid());
+    EXPECT_FALSE(reader.jumpTo(data.back() + 1));
-  EXPECT_EQ(reader.position(), reader.size());
+    EXPECT_FALSE(reader.valid());
+    EXPECT_EQ(reader.position(), reader.size());
+  }
 }
 template <class Reader, class Encoder>
@@ -360,11 +370,20 @@ void testEmpty() {
  }
 }
+// `upperBoundExtension` is required to inject additional 0-blocks
+// at the end of the list. This allows us to test lists with a large gap between
+// last element and universe upper bound, to exercise bounds-checking when
+// skipping past the last element
 template <class Reader, class Encoder>
-void testAll(const std::vector<uint64_t>& data) {
+void testAll(
+    const std::vector<uint64_t>& data, uint64_t upperBoundExtension = 0) {
  SCOPED_TRACE(__PRETTY_FUNCTION__);
-  auto list = Encoder::encode(data.begin(), data.end());
+  Encoder encoder(data.size(), data.back() + upperBoundExtension);
+  for (const auto value : data) {
+    encoder.add(value);
+  }
+  auto list = encoder.finish();
  testNext<Reader>(data, list);
  testSkip<Reader>(data, list);
  testSkipTo<Reader>(data, list);

--- a/folly/experimental/test/EliasFanoCodingTest.cpp
+++ b/folly/experimental/test/EliasFanoCodingTest.cpp
@@ -21,6 +21,7 @@
 #include <vector>
 #include <folly/Benchmark.h>
+#include <folly/Random.h>
 #include <folly/experimental/EliasFanoCoding.h>
 #include <folly/experimental/Select64.h>
 #include <folly/experimental/test/CodingTestUtils.h>
@@ -106,12 +107,23 @@ class EliasFanoCodingTest : public ::testing::Test {
    using Reader = EliasFanoReader<Encoder, instructions::Default, false>;
    testAll<Reader, Encoder>({0});
    testAll<Reader, Encoder>(generateRandomList(100 * 1000, 10 * 1000 * 1000));
+    // Test a list with size multiple of kForwardQuantum and universe multiple
+    // of kSkipQuantum, to exercise corner cases in the construction of forward
+    // and skip lists.
+    testAll<Reader, Encoder>(generateRandomList(
+        std::max<size_t>(8 * kForwardQuantum, 1024),
+        std::max<size_t>(16 * kSkipQuantum, 2048)));
    testAll<Reader, Encoder>(generateRandomList(
        100 * 1000, 10 * 1000 * 1000, /* withDuplicates */ true));
    testAll<Reader, Encoder>(generateSeqList(1, 100000, 100));
-    // max() cannot be read, as it is assumed an invalid value.
+    testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max()});
-    // TODO(ott): It should be possible to lift this constraint.
+    // Test data with additional trailing 0s in the upperBits by extending
-    testAll<Reader, Encoder>({0, 1, std::numeric_limits<uint32_t>::max() - 1});
+    // the upper bound.
+    constexpr uint64_t minUpperBoundExtension = 2;
+    constexpr uint64_t maxUpperBoundExtension = 1024;
+    testAll<Reader, Encoder>(
+        generateRandomList(100 * 1000, 10 * 1000 * 1000),
+        folly::Random::rand32(minUpperBoundExtension, maxUpperBoundExtension));
  }
  template <size_t kSkipQuantum, size_t kForwardQuantum, typename ValueType>
@@ -135,12 +147,12 @@ class EliasFanoCodingTest : public ::testing::Test {
    using Reader = EliasFanoReader<Encoder, instructions::Default, false>;
    constexpr size_t kMaxU16 = std::numeric_limits<uint16_t>::max();
-    // kMaxU16 is reserved for both value and size.
+    // Max SizeType value is reserved.
    testAll<Reader, Encoder>(generateSeqList(1, kMaxU16 - 1));
    // Test various sizes close to the limit.
    for (size_t i = 1; i <= 16; ++i) {
-      testAll<Reader, Encoder>(generateRandomList(
+      testAll<Reader, Encoder>(
-          kMaxU16 - i, kMaxU16 - 1, /* withDuplicates */ true));
+          generateRandomList(kMaxU16 - i, kMaxU16, /* withDuplicates */ true));
    }
  }
@@ -155,27 +167,43 @@ TEST_F(EliasFanoCodingTest, Empty) {
  doTestEmpty();
 }
-TEST_F(EliasFanoCodingTest, Simple) {
+TEST_F(EliasFanoCodingTest, Simple32Bit) {
  doTestAll<0, 0, uint32_t>();
+}
+TEST_F(EliasFanoCodingTest, Simple64Bit) {
  doTestAll<0, 0, uint64_t>();
+}
+TEST_F(EliasFanoCodingTest, SimpleDense) {
  doTestDenseAll<0, 0>();
 }
-TEST_F(EliasFanoCodingTest, SkipPointers) {
+TEST_F(EliasFanoCodingTest, SkipPointers32Bit) {
  doTestAll<128, 0, uint32_t>();
+}
+TEST_F(EliasFanoCodingTest, SkipPointers64Bit) {
  doTestAll<128, 0, uint64_t>();
+}
+TEST_F(EliasFanoCodingTest, SkipPointersDense) {
  doTestDenseAll<128, 0>();
 }
-TEST_F(EliasFanoCodingTest, ForwardPointers) {
+TEST_F(EliasFanoCodingTest, ForwardPointers32Bit) {
  doTestAll<0, 128, uint32_t>();
+}
+TEST_F(EliasFanoCodingTest, ForwardPointers64Bit) {
  doTestAll<0, 128, uint64_t>();
+}
+TEST_F(EliasFanoCodingTest, ForwardPointersDense) {
  doTestDenseAll<0, 128>();
 }
-TEST_F(EliasFanoCodingTest, SkipForwardPointers) {
+TEST_F(EliasFanoCodingTest, SkipForwardPointers32Bit) {
  doTestAll<128, 128, uint32_t>();
+}
+TEST_F(EliasFanoCodingTest, SkipForwardPointers64Bit) {
  doTestAll<128, 128, uint64_t>();
+}
+TEST_F(EliasFanoCodingTest, SkipForwardPointersDense) {
  doTestDenseAll<128, 128>();
 }