Commit b436f54d authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

force link failure when compilation flags don't match

Summary:
F14 varies its inline code according to the flags passed to the
compiler. This can cause problems if libraries override compilation flags
on their own, since it won't be safe to exchange F14 instances between
compilation domains.  This diff introduces a template specialization
that varies according to the intrinsics mode and that has only a single
definition (in F14Table.cpp).  The member is called on a couple of cold
paths (exception handling for copy construction and rehash).  This makes
it very likely that a compilation unit that uses F14 and is compiled
with different compiler flags than F14Table.cpp will get a linker error.

Reviewed By: yfeldblum

Differential Revision: D9200315

fbshipit-source-id: 9cbca18eef0ddd6efcf6d9d6057eda2400f8653c
parent 54a91c4f
......@@ -18,8 +18,6 @@
#include <folly/Portability.h>
#ifndef FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
// F14 has been implemented for SSE2 and NEON (so far)
#if FOLLY_SSE >= 2 || FOLLY_NEON
#define FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE 1
......@@ -30,4 +28,28 @@
"falling back to std::unordered_map / set"
#endif
#if FOLLY_SSE_PREREQ(4, 2) || __ARM_FEATURE_CRC32
#define FOLLY_F14_CRC_INTRINSIC_AVAILABLE 1
#else
#define FOLLY_F14_CRC_INTRINSIC_AVAILABLE 0
#endif
namespace folly {
namespace f14 {
namespace detail {
enum class F14IntrinsicsMode { None, Simd, SimdAndCrc };
static constexpr F14IntrinsicsMode getF14IntrinsicsMode() {
#if !FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
return F14IntrinsicsMode::None;
#elif !FOLLY_F14_CRC_INTRINSIC_AVAILABLE
return F14IntrinsicsMode::Simd;
#else
return F14IntrinsicsMode::SimdAndCrc;
#endif
}
} // namespace detail
} // namespace f14
} // namespace folly
......@@ -16,20 +16,21 @@
#include <folly/container/detail/F14Table.h>
///////////////////////////////////
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
///////////////////////////////////
namespace folly {
namespace f14 {
namespace detail {
// If you get a link failure that leads you here, your build is varying
// compiler flags across compilation units in a way that would break F14.
// SIMD (SSE2 or NEON) needs to be either on everywhere or off everywhere
// that uses F14. If SIMD is on then hardware CRC needs to be enabled
// everywhere or disabled everywhere.
void F14LinkCheck<getF14IntrinsicsMode()>::check() noexcept {}
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
EmptyTagVectorType kEmptyTagVector = {};
#endif
} // namespace detail
} // namespace f14
} // namespace folly
///////////////////////////////////
#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
///////////////////////////////////
......@@ -48,23 +48,30 @@
#include <folly/container/detail/F14IntrinsicsAvailability.h>
#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
#if FOLLY_NEON
#ifdef __ARM_FEATURE_CRC32
#include <arm_acle.h> // __crc32cd
#else
#include <nmmintrin.h> // _mm_crc32_u64
#endif
#else
#ifdef _WIN32
#include <intrin.h> // _mul128 in fallback bit mixer
#endif
#endif
#if FOLLY_NEON
#include <arm_neon.h> // uint8x16t intrinsics
#else // SSE2
#include <immintrin.h> // __m128i intrinsics
#include <nmmintrin.h> // _mm_crc32_u64
#include <xmmintrin.h> // _mm_prefetch
#endif
#endif
#ifdef _WIN32
#include <intrin.h> // for _mul128
#endif
namespace folly {
struct F14TableStats {
char const* policy;
std::size_t size{0};
......@@ -99,6 +106,24 @@ struct F14TableStats {
namespace f14 {
namespace detail {
template <F14IntrinsicsMode>
struct F14LinkCheck {};
template <>
struct F14LinkCheck<getF14IntrinsicsMode()> {
// The purpose of this method is to trigger a link failure if
// compilation flags vary across compilation units. The definition
// is in F14Table.cpp, so only one of F14LinkCheck<None>::check,
// F14LinkCheck<Simd>::check, or F14LinkCheck<SimdAndCrc>::check will
// be available at link time.
//
// To cause a link failure the function must be invoked in code that
// is not optimized away, so we call it on a couple of cold paths
// (exception handling paths in copy construction and rehash). LTO may
// remove it entirely, but that's fine.
static void check() noexcept;
};
#if defined(_LIBCPP_VERSION)
template <typename K, typename V, typename H>
......@@ -1230,16 +1255,18 @@ class F14Table : public Policy {
static_assert(sizeof(std::size_t) == sizeof(uint64_t), "");
std::size_t tag;
if (!isAvalanchingHasher()) {
#if FOLLY_SSE_PREREQ(4, 2)
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
#if FOLLY_SSE
// SSE4.2 CRC
std::size_t c = _mm_crc32_u64(0, hash);
tag = (c >> 24) | 0x80;
hash += c;
#elif __ARM_FEATURE_CRC32
#else
// CRC is optional on armv8 (-march=armv8-a+crc), standard on armv8.1
std::size_t c = __crc32cd(0, hash);
tag = (c >> 24) | 0x80;
hash += c;
#endif
#else
// The mixer below is not fully avalanching for all 64 bits of
// output, but looks quite good for bits 18..63 and puts plenty
......@@ -1279,15 +1306,17 @@ class F14Table : public Policy {
static_assert(sizeof(std::size_t) == sizeof(uint32_t), "");
uint8_t tag;
if (!isAvalanchingHasher()) {
#if FOLLY_SSE_PREREQ(4, 2)
#if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
#if FOLLY_SSE
// SSE4.2 CRC
auto c = _mm_crc32_u32(0, hash);
tag = static_cast<uint8_t>(~(c >> 25));
hash += c;
#elif __ARM_FEATURE_CRC32
#else
auto c = __crc32cw(0, hash);
tag = static_cast<uint8_t>(~(c >> 25));
hash += c;
#endif
#else
// finalizer for 32-bit murmur2
hash ^= hash >> 13;
......@@ -1784,6 +1813,7 @@ class F14Table : public Policy {
}
} catch (...) {
reset();
F14LinkCheck<getF14IntrinsicsMode()>::check();
throw;
}
}
......@@ -1859,6 +1889,7 @@ class F14Table : public Policy {
FOLLY_SAFE_DCHECK(
origChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
chunkMask_ = static_cast<InternalSizeType>(origChunkCount - 1);
F14LinkCheck<getF14IntrinsicsMode()>::check();
}
this->afterRehash(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment