Commit b797de38 authored by Matt Ma's avatar Matt Ma Committed by Facebook Github Bot

Move QuotientMultiSet to folly/experimental/

Summary:
QuotientMultiSet is a space-efficient static data structure to store a non-decreasing sequence of b-bit integers.

If the integers are uniformly distributed lookup is O(1)-time and performs a single random memory lookup with high probability.

Reviewed By: ot

Differential Revision: D17506766

fbshipit-source-id: b3e7a22dd193672fadb07d4cccb8b01bedae7cf9
parent ccd541a4
......@@ -578,6 +578,7 @@ if (BUILD_TESTS)
TEST lock_free_ring_buffer_test SOURCES LockFreeRingBufferTest.cpp
#TEST nested_command_line_app_test SOURCES NestedCommandLineAppTest.cpp
#TEST program_options_test SOURCES ProgramOptionsTest.cpp
TEST quotient_multiset_test SOURCES QuotientMultiSetTest.cpp
# Depends on liburcu
#TEST read_mostly_shared_ptr_test SOURCES ReadMostlySharedPtrTest.cpp
#TEST ref_count_test SOURCES RefCountTest.cpp
......
This diff is collapsed.
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/QuotientMultiSet.h>
#include <math.h>
#include <folly/Math.h>
#if FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
namespace folly {
QuotientMultiSetBuilder::QuotientMultiSetBuilder(
size_t keyBits,
size_t expectedElements,
double loadFactor)
: keyBits_(keyBits), maxKey_(qms_detail::maxValue(keyBits_)) {
expectedElements = std::max<size_t>(expectedElements, 1);
uint64_t numSlots = ceil(expectedElements / loadFactor);
// Make sure 1:1 mapping between key space and <divisor, remainder> pairs.
divisor_ = divCeil(maxKey_, numSlots);
remainderBits_ = findLastSet(divisor_ - 1);
// We only support remainders as long as 56 bits. If the set is very
// sparse, force the maximum allowed remainder size. This will waste
// up to 3 extra blocks (because of 8-bit quotients) but be correct.
if (remainderBits_ > 56) {
remainderBits_ = 56;
divisor_ = uint64_t(1) << remainderBits_;
}
blockSize_ = Block::blockSize(remainderBits_);
fraction_ = qms_detail::getInverse(divisor_);
}
QuotientMultiSetBuilder::~QuotientMultiSetBuilder() = default;
bool QuotientMultiSetBuilder::maybeAllocateBlocks(size_t limitIndex) {
bool blockAllocated = false;
for (; numBlocks_ <= limitIndex; numBlocks_++) {
auto block = Block::make(remainderBits_);
blocks_.emplace_back(std::move(block), numBlocks_);
blockAllocated = true;
}
return blockAllocated;
}
bool QuotientMultiSetBuilder::insert(uint64_t key) {
FOLLY_SAFE_CHECK(key <= maxKey_, "Invalid key");
FOLLY_SAFE_CHECK(
key >= prevKey_, "Keys need to be inserted in nondecreasing order");
const auto qr = qms_detail::getQuotientAndRemainder(key, divisor_, fraction_);
const auto& quotient = qr.first;
const auto& remainder = qr.second;
const size_t blockIndex = quotient / kBlockSize;
const size_t offsetInBlock = quotient % kBlockSize;
bool newBlockAllocated = false;
// Allocate block for the given key if necessary.
newBlockAllocated |= maybeAllocateBlocks(
std::max<uint64_t>(blockIndex, nextSlot_ / kBlockSize));
auto block = getBlock(nextSlot_ / kBlockSize).block.get();
// Start a new run.
if (prevOccupiedQuotient_ != quotient) {
closePreviousRun();
if (blockIndex > nextSlot_ / kBlockSize) {
nextSlot_ = (blockIndex * kBlockSize);
newBlockAllocated |= maybeAllocateBlocks(blockIndex);
block = getBlock(blockIndex).block.get();
}
// Update previous run info.
prevRunStart_ = nextSlot_;
prevOccupiedQuotient_ = quotient;
}
block->setRemainder(nextSlot_ % kBlockSize, remainderBits_, remainder);
// Set occupied bit for the given key.
block = getBlock(blockIndex).block.get();
block->setOccupied(offsetInBlock);
nextSlot_++;
prevKey_ = key;
numKeys_++;
return newBlockAllocated;
}
void QuotientMultiSetBuilder::setBlockPayload(uint64_t payload) {
DCHECK(!blocks_.empty());
blocks_.back().block->payload = payload;
}
void QuotientMultiSetBuilder::closePreviousRun() {
if (FOLLY_UNLIKELY(nextSlot_ == 0)) {
return;
}
// Mark runend for previous run.
const auto runEnd = nextSlot_ - 1;
auto block = getBlock(runEnd / kBlockSize).block.get();
block->setRunend(runEnd % kBlockSize);
numRuns_++;
// Set the offset of previous block if this run is the first one in that
// block.
auto prevRunOccupiedBlock =
getBlock(prevOccupiedQuotient_ / kBlockSize).block.get();
if (isPowTwo(prevRunOccupiedBlock->occupieds)) {
prevRunOccupiedBlock->offset = runEnd;
}
// Update mark all blocks before prevOccupiedQuotient_ + 1 to be ready.
size_t limitIndex = (prevOccupiedQuotient_ + 1) / kBlockSize;
for (size_t idx = readyBlocks_; idx < blocks_.size(); idx++) {
if (blocks_[idx].index < limitIndex) {
blocks_[idx].ready = true;
readyBlocks_++;
} else {
break;
}
}
}
void QuotientMultiSetBuilder::moveReadyBlocks(IOBufQueue& buff) {
while (!blocks_.empty()) {
if (!blocks_.front().ready) {
break;
}
buff.append(
IOBuf::takeOwnership(blocks_.front().block.release(), blockSize_));
blocks_.pop_front();
}
}
void QuotientMultiSetBuilder::flush(IOBufQueue& buff) {
moveReadyBlocks(buff);
readyBlocks_ = 0;
}
void QuotientMultiSetBuilder::close(IOBufQueue& buff) {
closePreviousRun();
// Mark all blocks as ready.
for (auto iter = blocks_.rbegin(); iter != blocks_.rend(); iter++) {
if (iter->ready) {
break;
}
iter->ready = true;
}
moveReadyBlocks(buff);
// Add metadata trailer. This will also allows getRemainder() to access whole
// 64-bits at any position without bounds-checking.
static_assert(sizeof(Metadata) > 7, "getRemainder() is not safe");
auto metadata = reinterpret_cast<Metadata*>(calloc(1, sizeof(Metadata)));
metadata->numBlocks = numBlocks_;
metadata->numKeys = numKeys_;
metadata->divisor = divisor_;
metadata->keyBits = keyBits_;
metadata->remainderBits = remainderBits_;
VLOG(2) << "Metadata: " << metadata->debugString();
buff.append(IOBuf::takeOwnership(metadata, sizeof(Metadata)));
}
} // namespace folly
#endif // FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/experimental/QuotientMultiSet.h>
#include <random>
#include <folly/Format.h>
#include <folly/Random.h>
#include <folly/container/Enumerate.h>
#include <folly/io/IOBufQueue.h>
#include <folly/portability/GTest.h>
#if FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
namespace {
class QuotientMultiSetTest : public ::testing::Test {
protected:
static constexpr uint64_t kBlockSize = folly::QuotientMultiSet<>::kBlockSize;
void SetUp() override {
rng.seed(folly::randomNumberSeed());
}
void buildAndValidate(
std::vector<uint64_t>& keys,
uint64_t keyBits,
double loadFactor) {
// Elements must be added in ascending order.
std::sort(keys.begin(), keys.end());
folly::QuotientMultiSetBuilder builder(keyBits, keys.size(), loadFactor);
folly::IOBufQueue buff;
for (const auto& iter : folly::enumerate(keys)) {
if (builder.insert(*iter)) {
// Set payload to relative position of the first key in block.
builder.setBlockPayload(iter.index);
}
if (builder.numReadyBlocks() >= 1) {
builder.flush(buff);
}
}
builder.close(buff);
auto spBuf = buff.move();
folly::StringPiece data(spBuf->coalesce());
folly::QuotientMultiSet reader(data);
size_t index = 0;
folly::QuotientMultiSet<>::Iterator iter(&reader);
while (index < keys.size()) {
uint64_t start = index;
uint64_t& key = keys[start];
auto debugInfo = [&] {
return folly::sformat("Index: {} Key: {}", index, key);
};
while (index < keys.size() && keys[index] == key) {
index++;
}
auto range = reader.equalRange(key);
size_t pos = reader.getBlockPayload(range.begin / kBlockSize);
EXPECT_EQ(start, pos + range.begin % kBlockSize) << debugInfo();
pos = reader.getBlockPayload((range.end - 1) / kBlockSize);
EXPECT_EQ(index - 1, pos + (range.end - 1) % kBlockSize) << debugInfo();
iter.skipTo(key);
EXPECT_EQ(key, iter.key()) << debugInfo();
EXPECT_EQ(range.begin, iter.pos()) << debugInfo();
EXPECT_EQ(
start,
reader.getBlockPayload(iter.pos() / kBlockSize) +
iter.pos() % kBlockSize)
<< debugInfo();
if (start < keys.size() - 1) {
EXPECT_TRUE(iter.next());
EXPECT_EQ(keys[start + 1], iter.key());
}
// Verify getting keys not in keys returns false.
uint64_t prevKey = (start == 0 ? 0 : keys[start - 1]);
if (prevKey + 1 < key) {
uint64_t missedKey = folly::Random::rand64(prevKey + 1, key, rng);
EXPECT_FALSE(reader.equalRange(missedKey)) << key;
iter.skipTo(missedKey);
EXPECT_EQ(key, iter.key()) << debugInfo();
EXPECT_EQ(range.begin, iter.pos()) << debugInfo();
EXPECT_EQ(
start,
reader.getBlockPayload(iter.pos() / kBlockSize) +
iter.pos() % kBlockSize)
<< debugInfo();
if (start < keys.size() - 1) {
EXPECT_TRUE(iter.next());
EXPECT_EQ(keys[start + 1], iter.key());
}
}
}
const auto maxKey = folly::qms_detail::maxValue(keyBits);
if (keys.back() < maxKey) {
uint64_t key = folly::Random::rand64(keys.back(), maxKey, rng) + 1;
EXPECT_FALSE(reader.equalRange(key)) << keys.back() << " " << key;
EXPECT_FALSE(iter.skipTo(key));
EXPECT_TRUE(iter.done());
}
folly::QuotientMultiSet<>::Iterator nextIter(&reader);
for (const auto key : keys) {
EXPECT_TRUE(nextIter.next());
EXPECT_EQ(key, nextIter.key());
}
EXPECT_FALSE(nextIter.next());
EXPECT_TRUE(nextIter.done());
}
std::mt19937 rng;
};
} // namespace
TEST_F(QuotientMultiSetTest, Simple) {
std::vector<uint64_t> keys = {
100, 1000, 1 << 14, 10 << 14, 0, 10, 100 << 14, 1000 << 14};
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, Empty) {
folly::QuotientMultiSetBuilder builder(32, 1024);
folly::IOBufQueue buff;
builder.close(buff);
auto spBuf = buff.move();
folly::StringPiece data(spBuf->coalesce());
folly::QuotientMultiSet reader(data);
for (size_t idx = 0; idx < 1024; idx++) {
uint64_t key = folly::Random::rand32(rng);
EXPECT_FALSE(reader.equalRange(key));
}
}
TEST_F(QuotientMultiSetTest, ZeroKeyBits) {
std::vector<uint64_t> keys(67, 0);
buildAndValidate(keys, 0, 0.95);
}
TEST_F(QuotientMultiSetTest, Uniform) {
constexpr auto kLoadFactor =
folly::QuotientMultiSetBuilder::kDefaultMaxLoadFactor;
constexpr uint64_t kAvgSize = 1 << 16;
auto randSize = [&](uint64_t avgSize) {
return folly::Random::rand64(avgSize / 2, avgSize * 3 / 2, rng);
};
std::vector<std::tuple<int, uint64_t, double>> testCases = {
{1, randSize(1 << 9), kLoadFactor},
{8, randSize(1 << 10), kLoadFactor},
{9, randSize(1 << 11), kLoadFactor},
{12, randSize(kAvgSize), kLoadFactor},
{32, randSize(kAvgSize), kLoadFactor},
{48, randSize(kAvgSize), kLoadFactor},
{64, randSize(kAvgSize), kLoadFactor},
{32, randSize(kAvgSize), 1}, // Full
{12, 3800, kLoadFactor}, // Almost full
{64, randSize(16), kLoadFactor}, // Sparse, long keys.
};
for (const auto& testCase : testCases) {
const auto& [keyBits, size, loadFactor] = testCase;
SCOPED_TRACE(folly::sformat(
"Key bits: {} Size: {} Load factor: {}", keyBits, size, loadFactor));
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < size; idx++) {
keys.emplace_back(
folly::Random::rand64(rng) & folly::qms_detail::maxValue(keyBits));
}
buildAndValidate(keys, keyBits, loadFactor);
}
}
TEST_F(QuotientMultiSetTest, UniformDistributionFullLoadFactor) {
const uint64_t numElements = 1 << 16;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
uint64_t key = folly::Random::rand32(idx << 16, (idx + 1) << 16, rng);
keys.emplace_back(key);
}
buildAndValidate(keys, 32, 1.0);
}
TEST_F(QuotientMultiSetTest, Overflow) {
const uint64_t numElements = 1 << 12;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(idx);
keys.emplace_back(idx);
keys.emplace_back(idx);
}
buildAndValidate(keys, 12, 0.95);
}
TEST_F(QuotientMultiSetTest, RandomLengthRuns) {
const uint64_t numElements = 1 << 16;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < (numElements >> 4); idx++) {
uint64_t key = folly::Random::rand32(rng);
uint64_t length = folly::Random::rand32(0, 10, rng);
for (uint64_t k = 0; k < length; k++) {
keys.emplace_back(key + k);
}
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, RunAcrossBlocks) {
const uint64_t numElements = 1 << 10;
std::vector<uint64_t> keys;
// Add keys with cluster size 137.
for (uint64_t idx = 0; idx < (numElements >> 4); idx++) {
uint64_t key = folly::Random::rand32(rng);
for (uint64_t k = 0; k < 136; k++) {
key += k;
keys.emplace_back(key);
}
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, PackAtHeadSlots) {
const uint64_t numElements = 1 << 12;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
uint64_t key = folly::Random::rand32(idx << 8, (idx + 1) << 8, rng);
keys.emplace_back(key);
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, PackAtTailSlots) {
const uint64_t numElements = 1 << 12;
std::vector<uint64_t> keys;
uint64_t key = (1 << 30);
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(key + idx);
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, KeysOnlyInHeadAndTail) {
const uint64_t numElements = 1 << 11;
std::vector<uint64_t> keys;
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(idx);
}
uint64_t key = (1 << 30);
for (uint64_t idx = 0; idx < numElements; idx++) {
keys.emplace_back(key + idx);
}
buildAndValidate(keys, 32, 0.95);
}
TEST_F(QuotientMultiSetTest, RunendRightBeforeFirstOccupiedRunend) {
std::vector<uint64_t> keys;
// 60 ranges [0, 67] with occupied slot 20.
for (size_t idx = 0; idx < 68; idx++) {
keys.push_back(60);
}
// 60 ranges [68, 68] with occupied slot 66.
for (size_t idx = 0; idx < 1; idx++) {
keys.push_back(200);
}
// 60 ranges [69, 88] with occupied slot 83.
for (size_t idx = 0; idx < 20; idx++) {
keys.push_back(250);
}
buildAndValidate(keys, 8, 0.95);
}
#endif // FOLLY_QUOTIENT_MULTI_SET_SUPPORTED
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment