Commit 1a1ce79e authored by Stephen Chen's avatar Stephen Chen Committed by Facebook Github Bot

gate sse42 implementation of checksum for x86 only

Summary:
folly/detail/ChecksumDetail.h gets included in Checksum.h and it
unconditionally includes <immintrin.h>. This doesn't compile on
aarch64 platform. Add #ifdefs for this.

Reviewed By: yfeldblum

Differential Revision: D5048636

fbshipit-source-id: d2074eb1b01487b02d95f3e2eebe683237d918fe
parent 2750533d
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include <algorithm> #include <algorithm>
#include <stdexcept> #include <stdexcept>
#if FOLLY_X64 && (__SSE4_2__ || defined(__clang__) || __GNUC_PREREQ(4, 9)) #if FOLLY_SSE_PREREQ(4, 2)
#include <nmmintrin.h> #include <nmmintrin.h>
#endif #endif
...@@ -31,7 +31,7 @@ namespace detail { ...@@ -31,7 +31,7 @@ namespace detail {
uint32_t uint32_t
crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum); crc32c_sw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum);
#if FOLLY_X64 && (__SSE4_2__ || defined(__clang__) || __GNUC_PREREQ(4, 9)) #if FOLLY_SSE_PREREQ(4, 2)
// Fast SIMD implementation of CRC-32C for x86 with SSE 4.2 // Fast SIMD implementation of CRC-32C for x86 with SSE 4.2
FOLLY_TARGET_ATTRIBUTE("sse4.2") FOLLY_TARGET_ATTRIBUTE("sse4.2")
...@@ -111,6 +111,11 @@ uint32_t crc32c_hw(const uint8_t *data, size_t nbytes, ...@@ -111,6 +111,11 @@ uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
throw std::runtime_error("crc32_hw is not implemented on this platform"); throw std::runtime_error("crc32_hw is not implemented on this platform");
} }
uint32_t crc32_hw(const uint8_t *data, size_t nbytes,
uint32_t startingChecksum) {
throw std::runtime_error("crc32_hw is not implemented on this platform");
}
bool crc32c_hw_supported() { bool crc32c_hw_supported() {
return false; return false;
} }
......
...@@ -115,6 +115,8 @@ ...@@ -115,6 +115,8 @@
namespace folly { namespace folly {
namespace detail { namespace detail {
#if FOLLY_SSE_PREREQ(4, 2)
uint32_t uint32_t
crc32_hw_aligned(uint32_t remainder, const __m128i* p, size_t vec_count) { crc32_hw_aligned(uint32_t remainder, const __m128i* p, size_t vec_count) {
/* Constants precomputed by gen_crc32_multipliers.c. Do not edit! */ /* Constants precomputed by gen_crc32_multipliers.c. Do not edit! */
...@@ -269,5 +271,7 @@ _128_bits_at_a_time: ...@@ -269,5 +271,7 @@ _128_bits_at_a_time:
x0 = _mm_clmulepi64_si128(_mm_and_si128(x0, mask32), barrett_reduction_constants, 0x10); x0 = _mm_clmulepi64_si128(_mm_and_si128(x0, mask32), barrett_reduction_constants, 0x10);
return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(x0, x1), 4)); return _mm_cvtsi128_si32(_mm_srli_si128(_mm_xor_si128(x0, x1), 4));
} }
#endif
} }
} // namespace } // namespace
...@@ -16,7 +16,12 @@ ...@@ -16,7 +16,12 @@
#pragma once #pragma once
#include <folly/Portability.h>
#if FOLLY_SSE_PREREQ(4, 2)
#include <immintrin.h> #include <immintrin.h>
#endif
#include <stdint.h> #include <stdint.h>
#include <cstddef> #include <cstddef>
...@@ -68,8 +73,10 @@ uint32_t crc32c_sw(const uint8_t* data, size_t nbytes, ...@@ -68,8 +73,10 @@ uint32_t crc32c_sw(const uint8_t* data, size_t nbytes,
uint32_t uint32_t
crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum = ~0U); crc32_hw(const uint8_t* data, size_t nbytes, uint32_t startingChecksum = ~0U);
#if FOLLY_SSE_PREREQ(4, 2)
uint32_t uint32_t
crc32_hw_aligned(uint32_t remainder, const __m128i* p, size_t vec_count); crc32_hw_aligned(uint32_t remainder, const __m128i* p, size_t vec_count);
#endif
/** /**
* Check whether a hardware-accelerated CRC-32 implementation is * Check whether a hardware-accelerated CRC-32 implementation is
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment