Commit 66d50da2 authored by Zino Benaissa's avatar Zino Benaissa Committed by Facebook GitHub Bot

Make folly::small_vector friendly to compiler optimizations

Summary:
This includes several enhancements to folly::small_vector:

Eliminate overhead accessing vector elements by making heap_ points to element 0 of the vector.  To get start address only dispatch is required as shown below:

     value_type* data () {
       // Dispatch between heap_ or internalStorage,
       return isExtern() ? heap_ : &internalStorage_;
     }

Reorganize the implementation by recording if capacity is stored internally or on the heap in "size". As a result the maximum vector size is half smaller. This is the only visible behavior change of this PR. Assuming size is 64 bits then
  - bit63 determines if vector elements are stored in internalStorage_ or externally in the heap
  - bit62 determines if vector capacity is stored internally or on the heap
  - bit61..bit0 contain the size of the vector.

Reduce the overhead of updating size buy adding incrementSize functions.

Tune emplace_back function and vector initialization.

Reviewed By: yfeldblum

Differential Revision: D24413105

fbshipit-source-id: b447704832af9c734be51aa6d23b8cb4be69a8bf
parent f1c0c3c1
......@@ -206,11 +206,9 @@ struct IntegralSizePolicyBase {
IntegralSizePolicyBase() : size_(0) {}
protected:
static constexpr std::size_t policyMaxSize() {
return SizeType(~kExternMask);
}
static constexpr std::size_t policyMaxSize() { return SizeType(~kClearMask); }
std::size_t doSize() const { return size_ & ~kExternMask; }
std::size_t doSize() const { return size_ & ~kClearMask; }
std::size_t isExtern() const { return kExternMask & size_; }
......@@ -222,10 +220,28 @@ struct IntegralSizePolicyBase {
}
}
std::size_t isHeapifiedCapacity() const { return kCapacityMask & size_; }
void setHeapifiedCapacity(bool b) {
if (b) {
size_ |= kCapacityMask;
} else {
size_ &= ~kCapacityMask;
}
}
void setSize(std::size_t sz) {
assert(sz <= policyMaxSize());
size_ = (kExternMask & size_) | SizeType(sz);
size_ = (kClearMask & size_) | SizeType(sz);
}
void incrementSize(std::size_t n) {
// We can safely increment size without overflowing into mask bits because
// we always check new size is less than maxPolicySize (see
// makeSizeInternal). To be sure, added assertion to verify it.
assert(doSize() + n <= policyMaxSize());
size_ += SizeType(n);
}
std::size_t getInternalSize() { return size_; }
void swapSizePolicy(IntegralSizePolicyBase& o) { std::swap(size_, o.size_); }
......@@ -233,9 +249,16 @@ struct IntegralSizePolicyBase {
static bool constexpr kShouldUseHeap = ShouldUseHeap;
private:
// We reserve two most significant bits of size_.
static SizeType constexpr kExternMask =
kShouldUseHeap ? SizeType(1) << (sizeof(SizeType) * 8 - 1) : 0;
static SizeType constexpr kCapacityMask =
kShouldUseHeap ? SizeType(1) << (sizeof(SizeType) * 8 - 2) : 0;
static SizeType constexpr kClearMask =
kShouldUseHeap ? SizeType(3) << (sizeof(SizeType) * 8 - 2) : 0;
SizeType size_;
};
......@@ -406,18 +429,10 @@ struct small_vector_base {
type;
};
template <class T>
T* pointerFlagSet(T* p) {
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(p) | 1);
}
template <class T>
bool pointerFlagGet(T* p) {
return reinterpret_cast<uintptr_t>(p) & 1;
}
template <class T>
T* pointerFlagClear(T* p) {
return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(p) & ~uintptr_t(1));
inline void* unshiftPointer(void* p, size_t sizeBytes) {
return static_cast<char*>(p) - sizeBytes;
}
inline void* shiftPointer(void* p, size_t sizeBytes) {
return static_cast<char*>(p) + sizeBytes;
}
......@@ -474,12 +489,8 @@ class small_vector : public detail::small_vector_base<
auto n = o.size();
makeSize(n);
{
auto rollback = makeGuard([&] {
if (this->isExtern()) {
u.freeHeap();
}
});
std::uninitialized_copy(o.begin(), o.end(), begin());
auto rollback = makeGuard([&] { freeHeap(); });
std::uninitialized_copy(o.begin(), o.begin() + n, begin());
rollback.dismiss();
}
this->setSize(n);
......@@ -490,11 +501,12 @@ class small_vector : public detail::small_vector_base<
if (o.isExtern()) {
swap(o);
} else {
auto n = o.size();
std::uninitialized_copy(
std::make_move_iterator(o.begin()),
std::make_move_iterator(o.end()),
begin());
this->setSize(o.size());
this->setSize(n);
}
}
......@@ -522,9 +534,7 @@ class small_vector : public detail::small_vector_base<
for (auto& t : *this) {
(&t)->~value_type();
}
if (this->isExtern()) {
u.freeHeap();
}
freeHeap();
}
small_vector& operator=(small_vector const& o) {
......@@ -677,10 +687,11 @@ class small_vector : public detail::small_vector_base<
downsize(sz);
return;
}
auto extra = sz - size();
makeSize(sz);
detail::populateMemForward(
begin() + size(), sz - size(), [&](void* p) { new (p) value_type(); });
this->setSize(sz);
begin() + size(), extra, [&](void* p) { new (p) value_type(); });
this->incrementSize(extra);
}
void resize(size_type sz, value_type const& v) {
......@@ -688,10 +699,11 @@ class small_vector : public detail::small_vector_base<
erase(begin() + sz, end());
return;
}
auto extra = sz - size();
makeSize(sz);
detail::populateMemForward(
begin() + size(), sz - size(), [&](void* p) { new (p) value_type(v); });
this->setSize(sz);
begin() + size(), extra, [&](void* p) { new (p) value_type(v); });
this->incrementSize(extra);
}
value_type* data() noexcept {
......@@ -737,7 +749,7 @@ class small_vector : public detail::small_vector_base<
using AllocationSizeOrUnreachable =
conditional_t<kMustTrackHeapifiedCapacity, Unreachable, AllocationSize>;
if (this->isExtern()) {
if (u.hasCapacity()) {
if (hasCapacity()) {
return u.getCapacity();
}
return AllocationSizeOrUnreachable{}(u.pdata_.heap_) / sizeof(value_type);
......@@ -756,19 +768,31 @@ class small_vector : public detail::small_vector_base<
template <class... Args>
reference emplace_back(Args&&... args) {
if (capacity() == size()) {
auto isize_ = this->getInternalSize();
if (isize_ < MaxInline) {
new (u.buffer() + isize_) value_type(std::forward<Args>(args)...);
this->incrementSize(1);
return *(u.buffer() + isize_);
}
if (!BaseType::kShouldUseHeap) {
throw_exception<std::length_error>("max_size exceeded in small_vector");
}
auto size_ = size();
auto capacity_ = capacity();
if (capacity_ == size_) {
// Any of args may be references into the vector.
// When we are reallocating, we have to be careful to construct the new
// element before modifying the data in the old buffer.
makeSize(
size() + 1,
size_ + 1,
[&](void* p) { new (p) value_type(std::forward<Args>(args)...); },
size());
size_);
} else {
new (end()) value_type(std::forward<Args>(args)...);
// We know the vector is stored in the heap.
new (u.heap() + size_) value_type(std::forward<Args>(args)...);
}
this->setSize(size() + 1);
return back();
this->incrementSize(1);
return *(u.heap() + size_);
}
void push_back(value_type&& t) { emplace_back(std::move(t)); }
......@@ -785,27 +809,26 @@ class small_vector : public detail::small_vector_base<
iterator insert(const_iterator constp, value_type&& t) {
iterator p = unconst(constp);
if (p == end()) {
push_back(std::move(t));
return end() - 1;
}
auto offset = p - begin();
if (capacity() == size()) {
auto size_ = size();
if (capacity() == size_) {
makeSize(
size() + 1,
size_ + 1,
[&t](void* ptr) { new (ptr) value_type(std::move(t)); },
offset);
this->setSize(this->size() + 1);
this->incrementSize(1);
} else {
detail::moveObjectsRightAndCreate(
data() + offset,
data() + size(),
data() + size() + 1,
data() + size_,
data() + size_ + 1,
[&]() mutable -> value_type&& { return std::move(t); });
this->setSize(size() + 1);
this->incrementSize(1);
}
return begin() + offset;
}
......@@ -818,13 +841,14 @@ class small_vector : public detail::small_vector_base<
iterator insert(const_iterator pos, size_type n, value_type const& val) {
auto offset = pos - begin();
makeSize(size() + n);
auto size_ = size();
makeSize(size_ + n);
detail::moveObjectsRightAndCreate(
data() + offset,
data() + size(),
data() + size() + n,
data() + size_,
data() + size_ + n,
[&]() mutable -> value_type const& { return val; });
this->setSize(size() + n);
this->incrementSize(n);
return begin() + offset;
}
......@@ -953,15 +977,16 @@ class small_vector : public detail::small_vector_base<
auto const distance = std::distance(first, last);
auto const offset = pos - begin();
auto size_ = size();
assert(distance >= 0);
assert(offset >= 0);
makeSize(size() + distance);
makeSize(size_ + distance);
detail::moveObjectsRightAndCreate(
data() + offset,
data() + size(),
data() + size() + distance,
data() + size_,
data() + size_ + distance,
[&, in = last]() mutable -> it_ref { return *--in; });
this->setSize(size() + distance);
this->incrementSize(distance);
return begin() + offset;
}
......@@ -986,18 +1011,19 @@ class small_vector : public detail::small_vector_base<
}
return;
}
auto distance = std::distance(first, last);
size_type distance = std::distance(first, last);
if (distance <= MaxInline) {
this->incrementSize(distance);
detail::populateMemForward(
u.buffer(), distance, [&](void* p) { new (p) value_type(*first++); });
return;
}
makeSize(distance);
this->setSize(distance);
this->incrementSize(distance);
{
auto rollback = makeGuard([&] {
if (this->isExtern()) {
u.freeHeap();
}
});
auto rollback = makeGuard([&] { freeHeap(); });
detail::populateMemForward(
data(), distance, [&](void* p) { new (p) value_type(*first++); });
u.heap(), distance, [&](void* p) { new (p) value_type(*first++); });
rollback.dismiss();
}
}
......@@ -1005,13 +1031,10 @@ class small_vector : public detail::small_vector_base<
template <typename InitFunc>
void doConstruct(size_type n, InitFunc&& func) {
makeSize(n);
this->setSize(n);
assert(size() == 0);
this->incrementSize(n);
{
auto rollback = makeGuard([&] {
if (this->isExtern()) {
u.freeHeap();
}
});
auto rollback = makeGuard([&] { freeHeap(); });
detail::populateMemForward(data(), n, std::forward<InitFunc>(func));
rollback.dismiss();
}
......@@ -1031,6 +1054,9 @@ class small_vector : public detail::small_vector_base<
}
void makeSize(size_type newSize) {
if (newSize <= capacity()) {
return;
}
makeSizeInternal(newSize, false, [](void*) { assume_unreachable(); }, 0);
}
......@@ -1060,11 +1086,6 @@ class small_vector : public detail::small_vector_base<
if (newSize > max_size()) {
throw_exception<std::length_error>("max_size exceeded in small_vector");
}
if (newSize <= capacity()) {
assert(!insert);
return;
}
assert(this->kShouldUseHeap);
// This branch isn't needed for correctness, but allows the optimizer to
// skip generating code for the rest of this function in NoHeap
......@@ -1094,9 +1115,6 @@ class small_vector : public detail::small_vector_base<
const size_t sizeBytes =
newCapacity * sizeof(value_type) + allocationExtraBytes;
void* newh = checkedMalloc(sizeBytes);
// We expect newh to be at least 2-aligned, because we want to
// use its least significant bit as a flag.
assert(!detail::pointerFlagGet(newh));
value_type* newp = static_cast<value_type*>(
heapifyCapacity ? detail::shiftPointer(newh, kHeapifyCapacitySize)
......@@ -1119,15 +1137,11 @@ class small_vector : public detail::small_vector_base<
for (auto& val : *this) {
val.~value_type();
}
freeHeap();
if (this->isExtern()) {
u.freeHeap();
}
if (heapifyCapacity) {
u.pdata_.heap_ = detail::pointerFlagSet(newh);
} else {
u.pdata_.heap_ = newh;
}
// Store shifted pointer if capacity is heapified
u.pdata_.heap_ = newp;
this->setHeapifiedCapacity(heapifyCapacity);
this->setExtern(true);
this->setCapacity(newCapacity);
}
......@@ -1138,7 +1152,7 @@ class small_vector : public detail::small_vector_base<
*/
void setCapacity(size_type newCapacity) {
assert(this->isExtern());
if (u.hasCapacity()) {
if (hasCapacity()) {
assert(newCapacity < std::numeric_limits<InternalSizeType>::max());
u.setCapacity(newCapacity);
}
......@@ -1146,7 +1160,7 @@ class small_vector : public detail::small_vector_base<
private:
struct HeapPtrWithCapacity {
void* heap_;
value_type* heap_;
InternalSizeType capacity_;
InternalSizeType getCapacity() const { return capacity_; }
......@@ -1155,21 +1169,18 @@ class small_vector : public detail::small_vector_base<
} FOLLY_SV_PACK_ATTR;
struct HeapPtr {
// Lower order bit of heap_ is used as flag to indicate whether capacity is
// stored at the front of the heap allocation.
void* heap_;
// heap[-kHeapifyCapacitySize] contains capacity
value_type* heap_;
InternalSizeType getCapacity() const {
assert(detail::pointerFlagGet(heap_));
return *static_cast<InternalSizeType*>(detail::pointerFlagClear(heap_));
return *static_cast<InternalSizeType*>(
detail::unshiftPointer(heap_, kHeapifyCapacitySize));
}
void setCapacity(InternalSizeType c) {
*static_cast<InternalSizeType*>(detail::pointerFlagClear(heap_)) = c;
}
size_t allocationExtraBytes() const {
assert(detail::pointerFlagGet(heap_));
return kHeapifyCapacitySize;
*static_cast<InternalSizeType*>(
detail::unshiftPointer(heap_, kHeapifyCapacitySize)) = c;
}
size_t allocationExtraBytes() const { return kHeapifyCapacitySize; }
} FOLLY_SV_PACK_ATTR;
typedef aligned_storage_for_t<value_type[MaxInline]> InlineStorageDataType;
......@@ -1177,7 +1188,7 @@ class small_vector : public detail::small_vector_base<
typedef typename std::conditional<
sizeof(value_type) * MaxInline != 0,
InlineStorageDataType,
void*>::type InlineStorageType;
value_type*>::type InlineStorageType;
static bool constexpr kHasInlineCapacity =
sizeof(HeapPtrWithCapacity) < sizeof(InlineStorageType);
......@@ -1214,6 +1225,23 @@ class small_vector : public detail::small_vector_base<
conditional<kHasInlineCapacity, HeapPtrWithCapacity, HeapPtr>::type
PointerType;
bool hasCapacity() const {
return kAlwaysHasCapacity || !kHeapifyCapacityThreshold ||
this->isHeapifiedCapacity();
}
void freeHeap() {
if (this->isExtern()) {
if (hasCapacity()) {
auto extraBytes = u.pdata_.allocationExtraBytes();
auto vp = detail::unshiftPointer(u.pdata_.heap_, extraBytes);
sizedFree(vp, u.getCapacity() * sizeof(value_type) + extraBytes);
} else {
free(u.pdata_.heap_);
}
}
}
union Data {
explicit Data() { pdata_.heap_ = nullptr; }
......@@ -1227,35 +1255,12 @@ class small_vector : public detail::small_vector_base<
value_type const* buffer() const noexcept {
return const_cast<Data*>(this)->buffer();
}
value_type* heap() noexcept {
if (kHasInlineCapacity || !detail::pointerFlagGet(pdata_.heap_)) {
return static_cast<value_type*>(pdata_.heap_);
} else {
return static_cast<value_type*>(detail::shiftPointer(
detail::pointerFlagClear(pdata_.heap_), kHeapifyCapacitySize));
}
}
value_type const* heap() const noexcept {
return const_cast<Data*>(this)->heap();
}
value_type* heap() noexcept { return pdata_.heap_; }
value_type const* heap() const noexcept { return pdata_.heap_; }
bool hasCapacity() const {
return kAlwaysHasCapacity || detail::pointerFlagGet(pdata_.heap_);
}
InternalSizeType getCapacity() const { return pdata_.getCapacity(); }
void setCapacity(InternalSizeType c) { pdata_.setCapacity(c); }
void freeHeap() {
auto vp = detail::pointerFlagClear(pdata_.heap_);
if (hasCapacity()) {
sizedFree(
vp,
pdata_.getCapacity() * sizeof(value_type) +
pdata_.allocationExtraBytes());
} else {
free(vp);
}
}
} u;
};
FOLLY_SV_PACK_POP
......
......@@ -618,7 +618,7 @@ TEST(small_vector, NoHeap) {
// Check max_size works right with various policy combinations.
folly::small_vector<std::string, 32, uint32_t> v4;
EXPECT_EQ(v4.max_size(), (1ul << 31) - 1);
EXPECT_EQ(v4.max_size(), (1ul << 30) - 1);
/*
* Test that even when we ask for a small number inlined it'll still
......@@ -641,9 +641,9 @@ TEST(small_vector, NoHeap) {
TEST(small_vector, MaxSize) {
folly::small_vector<int, 2, uint8_t> vec;
EXPECT_EQ(vec.max_size(), 127);
EXPECT_EQ(vec.max_size(), 63);
folly::small_vector<int, 2, uint16_t> vec2;
EXPECT_EQ(vec2.max_size(), (1 << 15) - 1);
EXPECT_EQ(vec2.max_size(), (1 << 14) - 1);
}
TEST(small_vector, AllHeap) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment