Commit ed2ea649 authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

probabilistic F14 ref and iter stability testing under ASAN

Summary:
This diff adds spurious rehash at places where the caller should
not be assuming reference (unless guaranteed) or iterator stability,
allowing ASAN to detect these problems.  Under ASAN it also junk-fills
deleted value_type instances that can't be immediately deallocated.

Reviewed By: yfeldblum

Differential Revision: D9826273

fbshipit-source-id: 2d543473b17ffd7debb52082a3e72842121ece1a
parent 546113b4
......@@ -237,6 +237,12 @@ previous paragraph). google::dense_hash_map works around this tradeoff
by providing both clear() and clear_no_resize(); we could do something
similar.
As stated above, F14NodeMap and F14NodeSet are the only F14 variants
that provides reference stability. When running under ASAN the other
storage policies will probabilistically perform extra rehashes, which
makes it likely that reference stability problems will be found by the
address sanitizer.
F14NodeMap does not currently support the C++17 node API, but it could
be trivially added.
......
......@@ -21,6 +21,7 @@
#include <utility>
#include <folly/Memory.h>
#include <folly/Portability.h>
#include <folly/Unit.h>
#include <folly/container/detail/F14Table.h>
#include <folly/hash/Hash.h>
......@@ -379,6 +380,12 @@ struct BasePolicy
// out that DenseMaskIter with an empty body can be elided.
FOLLY_SAFE_DCHECK(false, "should be disabled");
}
void afterDestroyWithoutDeallocate(Value* addr, std::size_t n) {
if (kIsSanitizeAddress) {
memset(static_cast<void*>(addr), 0x66, sizeof(Value) * n);
}
}
};
// BaseIter is a convenience for concrete set and map implementations
......@@ -615,7 +622,9 @@ class ValueContainerPolicy : public BasePolicy<
void destroyItem(Item& item) {
Alloc& a = this->alloc();
AllocTraits::destroy(a, std::addressof(item));
auto ptr = std::addressof(item);
AllocTraits::destroy(a, ptr);
this->afterDestroyWithoutDeallocate(ptr, 1);
}
template <typename V>
......@@ -1189,6 +1198,7 @@ class VectorContainerPolicy : public BasePolicy<
complainUnlessNothrowMove<Key>();
complainUnlessNothrowMove<lift_unit_t<MappedTypeOrVoid>>();
auto origSrc = src;
if (valueIsTriviallyCopyable()) {
std::memcpy(static_cast<void*>(dst), src, n * sizeof(Value));
} else {
......@@ -1203,6 +1213,7 @@ class VectorContainerPolicy : public BasePolicy<
}
}
}
this->afterDestroyWithoutDeallocate(origSrc, n);
}
template <typename P, typename V>
......
......@@ -20,7 +20,7 @@ namespace folly {
namespace f14 {
namespace detail {
// If you get a link failure that leads you here, your build is varying
// If you get a link failure that leads you here, your build has varying
// compiler flags across compilation units in a way that would break F14.
// SIMD (SSE2 or NEON) needs to be either on everywhere or off everywhere
// that uses F14. If SIMD is on then hardware CRC needs to be enabled
......@@ -31,6 +31,9 @@ void F14LinkCheck<getF14IntrinsicsMode()>::check() noexcept {}
EmptyTagVectorType kEmptyTagVector = {};
#endif
FOLLY_F14_TLS_IF_ASAN std::size_t asanPendingSafeInserts = 0;
FOLLY_F14_TLS_IF_ASAN std::size_t asanRehashState = 0;
} // namespace detail
} // namespace f14
} // namespace folly
......@@ -47,6 +47,12 @@
#include <folly/container/detail/F14Defaults.h>
#include <folly/container/detail/F14IntrinsicsAvailability.h>
#if FOLLY_SANITIZE_ADDRESS
#define FOLLY_F14_TLS_IF_ASAN FOLLY_TLS
#else
#define FOLLY_F14_TLS_IF_ASAN
#endif
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
......@@ -315,6 +321,9 @@ using EmptyTagVectorType = std::aligned_storage_t<
extern EmptyTagVectorType kEmptyTagVector;
extern FOLLY_F14_TLS_IF_ASAN std::size_t asanPendingSafeInserts;
extern FOLLY_F14_TLS_IF_ASAN std::size_t asanRehashState;
template <unsigned BitCount>
struct FullMask {
static constexpr MaskType value =
......@@ -1819,14 +1828,12 @@ class F14Table : public Policy {
}
}
FOLLY_NOINLINE void rehashImpl(
FOLLY_NOINLINE void reserveImpl(
std::size_t capacity,
std::size_t origChunkCount,
std::size_t origMaxSizeWithoutRehash) {
FOLLY_SAFE_DCHECK(capacity >= size(), "");
auto origChunks = chunks_;
// compute new size
std::size_t const kInitialCapacity = 2;
std::size_t const kHalfChunkCapacity =
......@@ -1838,8 +1845,7 @@ class F14Table : public Policy {
newMaxSizeWithoutRehash =
(capacity < kInitialCapacity) ? kInitialCapacity : kHalfChunkCapacity;
} else {
newChunkCount = nextPowTwo(
(capacity + Chunk::kDesiredCapacity - 1) / Chunk::kDesiredCapacity);
newChunkCount = nextPowTwo((capacity - 1) / Chunk::kDesiredCapacity + 1);
newMaxSizeWithoutRehash = newChunkCount * Chunk::kDesiredCapacity;
constexpr std::size_t kMaxChunksWithoutCapacityOverflow =
......@@ -1851,10 +1857,21 @@ class F14Table : public Policy {
}
}
// is rehash needed?
if (origMaxSizeWithoutRehash == newMaxSizeWithoutRehash) {
return;
if (origMaxSizeWithoutRehash != newMaxSizeWithoutRehash) {
rehashImpl(
origChunkCount,
origMaxSizeWithoutRehash,
newChunkCount,
newMaxSizeWithoutRehash);
}
}
void rehashImpl(
std::size_t origChunkCount,
std::size_t origMaxSizeWithoutRehash,
std::size_t newChunkCount,
std::size_t newMaxSizeWithoutRehash) {
auto origChunks = chunks_;
BytePtr rawAllocation;
auto undoState = this->beforeRehash(
......@@ -1984,18 +2001,64 @@ class F14Table : public Policy {
success = true;
}
void asanOnReserve(std::size_t capacity) {
if (kIsSanitizeAddress && capacity > size()) {
asanPendingSafeInserts += capacity - size();
}
}
bool asanShouldAddExtraRehash() {
if (!kIsSanitizeAddress) {
return false;
} else if (asanPendingSafeInserts > 0) {
--asanPendingSafeInserts;
return false;
} else if (size() <= 1) {
return size() > 0;
} else {
constexpr std::size_t kBigPrime = 4294967291U;
auto s = (asanRehashState += kBigPrime);
return (s % size()) == 0;
}
}
void asanExtraRehash() {
auto cc = chunkMask_ + 1;
auto bc = bucket_count();
rehashImpl(cc, bc, cc, bc);
}
void asanOnInsert() {
// When running under ASAN, we add a spurious rehash with 1/size()
// probability before every insert. This means that finding reference
// stability problems for F14Value and F14Vector is much more likely.
// The most common pattern that causes this is
//
// auto& ref = map[k1]; map[k2] = foo(ref);
//
// One way to fix this is to call map.reserve(N) before such a
// sequence, where N is the number of keys that might be inserted
// within the section that retains references.
if (asanShouldAddExtraRehash()) {
asanExtraRehash();
}
}
public:
// user has no control over max_load_factor
void rehash(std::size_t capacity) {
rehashImpl(
std::max<std::size_t>(capacity, size()),
chunkMask_ + 1,
bucket_count());
reserve(capacity);
}
void reserve(std::size_t capacity) {
rehash(capacity);
// We want to support the pattern
// map.reserve(2); auto& r1 = map[k1]; auto& r2 = map[k2];
asanOnReserve(capacity);
reserveImpl(
std::max<std::size_t>(capacity, size()),
chunkMask_ + 1,
bucket_count());
}
// Returns true iff a rehash was performed
......@@ -2003,7 +2066,7 @@ class F14Table : public Policy {
auto capacity = size() + incoming;
auto bc = bucket_count();
if (capacity - 1 >= bc) {
rehashImpl(capacity, chunkMask_ + 1, bc);
reserveImpl(capacity, chunkMask_ + 1, bc);
}
}
......@@ -2021,6 +2084,8 @@ class F14Table : public Policy {
}
}
asanOnInsert();
reserveForInsert();
std::size_t index = hp.first;
......@@ -2162,7 +2227,18 @@ class F14Table : public Policy {
}
void clear() noexcept {
clearImpl<false>();
if (kIsSanitizeAddress) {
// force recycling of heap memory
auto bc = bucket_count();
reset();
try {
reserveImpl(bc, 0, 0);
} catch (std::bad_alloc const&) {
// ASAN mode only, keep going
}
} else {
clearImpl<false>();
}
}
// Like clear(), but always frees all dynamic storage allocated
......
/*
* Copyright 2018-present Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/Portability.h>
#include <folly/container/F14Set.h>
#include <folly/portability/GTest.h>
using namespace folly;
template <typename S>
void useIteratorAfterInsertSmall() {
S set;
auto iter = set.insert(1).first;
set.insert(2);
EXPECT_EQ(*iter, 1);
}
template <typename S>
void useIteratorAfterInsertLarge() {
S set;
set.insert(10);
for (int i = 0; i < 1000; ++i) {
auto iter = set.find(10);
set.insert(i);
if (i > 500) {
EXPECT_EQ(*iter, 10);
}
}
}
template <typename S>
void useReferenceAfterInsertSmall() {
S set;
auto& ref = *set.insert(1).first;
set.insert(2);
EXPECT_EQ(ref, 1);
}
template <typename S>
void useReferenceAfterInsertLarge() {
S set;
set.insert(10);
for (int i = 0; i < 1000; ++i) {
auto& ref = *set.find(10);
set.insert(i);
if (i > 500) {
EXPECT_EQ(ref, 10);
}
}
}
template <typename F>
void repeat(F const& func) {
for (int i = 0; i < 32; ++i) {
func();
}
}
template <typename F>
void expectAsanFailure(F const& func) {
if (kIsSanitizeAddress) {
EXPECT_EXIT(
repeat(func), testing::ExitedWithCode(1), ".*heap-use-after-free.*");
}
}
TEST(F14AsanSupportTest, F14ValueIterInsertSmall) {
expectAsanFailure(useIteratorAfterInsertSmall<F14ValueSet<int>>);
}
TEST(F14AsanSupportTest, F14NodeIterInsertSmall) {
expectAsanFailure(useIteratorAfterInsertSmall<F14NodeSet<int>>);
}
TEST(F14AsanSupportTest, F14VectorIterInsertSmall) {
expectAsanFailure(useIteratorAfterInsertSmall<F14VectorSet<int>>);
}
TEST(F14AsanSupportTest, F14FastIterInsertSmall) {
expectAsanFailure(useIteratorAfterInsertSmall<F14FastSet<int>>);
}
TEST(F14AsanSupportTest, F14ValueIterInsertLarge) {
expectAsanFailure(useIteratorAfterInsertLarge<F14ValueSet<int>>);
}
TEST(F14AsanSupportTest, F14NodeIterInsertLarge) {
expectAsanFailure(useIteratorAfterInsertLarge<F14NodeSet<int>>);
}
TEST(F14AsanSupportTest, F14VectorIterInsertLarge) {
expectAsanFailure(useIteratorAfterInsertLarge<F14VectorSet<int>>);
}
TEST(F14AsanSupportTest, F14FastIterInsertLarge) {
expectAsanFailure(useIteratorAfterInsertLarge<F14FastSet<int>>);
}
TEST(F14AsanSupportTest, F14ValueRefInsertSmall) {
expectAsanFailure(useReferenceAfterInsertSmall<F14ValueSet<int>>);
}
TEST(F14AsanSupportTest, F14VectorRefInsertSmall) {
expectAsanFailure(useReferenceAfterInsertSmall<F14VectorSet<int>>);
}
TEST(F14AsanSupportTest, F14FastRefInsertSmall) {
expectAsanFailure(useReferenceAfterInsertSmall<F14FastSet<int>>);
}
TEST(F14AsanSupportTest, F14ValueRefInsertLarge) {
expectAsanFailure(useReferenceAfterInsertLarge<F14ValueSet<int>>);
}
TEST(F14AsanSupportTest, F14VectorRefInsertLarge) {
expectAsanFailure(useReferenceAfterInsertLarge<F14VectorSet<int>>);
}
TEST(F14AsanSupportTest, F14FastRefInsertLarge) {
expectAsanFailure(useReferenceAfterInsertLarge<F14FastSet<int>>);
}
TEST(F14AsanSupportTest, F14VectorErase) {
F14VectorSet<int> set;
set.insert(1);
set.insert(2);
set.insert(3);
auto& v = *set.begin();
EXPECT_EQ(v, 3);
set.erase(2);
if (kIsSanitizeAddress) {
EXPECT_NE(v, 3);
}
}
......@@ -728,7 +728,6 @@ void runInsertAndEmplace() {
typename S::value_type k1{0};
typename S::value_type k2{0};
uint64_t k3 = 0;
uint64_t k4 = 10;
S s;
resetTracking();
EXPECT_TRUE(s.emplace(k1).second);
......@@ -745,8 +744,9 @@ void runInsertAndEmplace() {
// copy convert expected for failing emplace with wrong type
EXPECT_EQ(Tracked<0>::counts.dist(Counts{0, 0, 1, 0}), 0);
s.clear();
resetTracking();
EXPECT_TRUE(s.emplace(k4).second);
EXPECT_TRUE(s.emplace(k3).second);
// copy convert + move expected for successful emplace with wrong type
EXPECT_EQ(Tracked<0>::counts.dist(Counts{0, 1, 1, 0}), 0);
}
......@@ -754,7 +754,6 @@ void runInsertAndEmplace() {
typename S::value_type k1{0};
typename S::value_type k2{0};
uint64_t k3 = 0;
uint64_t k4 = 10;
S s;
resetTracking();
EXPECT_TRUE(s.emplace(std::move(k1)).second);
......@@ -771,8 +770,9 @@ void runInsertAndEmplace() {
// move convert expected for failing emplace with wrong type
EXPECT_EQ(Tracked<0>::counts.dist(Counts{0, 0, 0, 1}), 0);
s.clear();
resetTracking();
EXPECT_TRUE(s.emplace(std::move(k4)).second);
EXPECT_TRUE(s.emplace(std::move(k3)).second);
// move convert + move expected for successful emplace with wrong type
EXPECT_EQ(Tracked<0>::counts.dist(Counts{0, 1, 0, 1}), 0);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment