Commit 5269697c authored by Laurent Stacul's avatar Laurent Stacul Committed by Facebook GitHub Bot

Fix wrong SSE version (#1330)

Summary:
For some reasons, I only allow my compiler (gcc 10.0.1) to use x86 SSE, SSE2 and SSE3 instructions. When I compile, I get the following error:

```
In file included from /opt/1A/toolchain/x86_64-v20.0.12/lib/gcc/x86_64-1a-linux-gnu/10.0.1/include/smmintrin.h:32,
                 from /opt/1A/toolchain/x86_64-v20.0.12/lib/gcc/x86_64-1a-linux-gnu/10.0.1/include/nmmintrin.h:31,
                 from /home/docker/opensource/folly/folly/GroupVarint.h:43,
                 from /home/docker/opensource/folly/folly/test/GroupVarintTest.cpp:17:
/home/docker/opensource/folly/folly/GroupVarint.h: In function 'folly::GroupVarint<unsigned int>::decode(char const*, unsigned int*, unsigned int*, unsigned int*, unsigned int*)':
/opt/1A/toolchain/x86_64-v20.0.12/lib/gcc/x86_64-1a-linux-gnu/10.0.1/include/tmmintrin.h:136:1: error: inlining failed in call to 'always_inline' '_mm_shuffle_epi8(long long __vector(2), long long __vector(2))': target specific option mismatch
  136 | _mm_shuffle_epi8 (__m128i __X, __m128i __Y)
      | ^~~~~~~~~~~~~~~~
```

After some searches, it seems the requested instructions are defined in SSE4.1 as we can see in `nmmintrin.h`:
```
#ifndef _NMMINTRIN_H_INCLUDED
#define _NMMINTRIN_H_INCLUDED

/* We just include SSE4.1 header file.  */
#include <smmintrin.h>

#endif /* _NMMINTRIN_H_INCLUDED */
```
This PR fix the code accordingly.
Pull Request resolved: https://github.com/facebook/folly/pull/1330

Reviewed By: Orvid

Differential Revision: D20323400

Pulled By: yfeldblum

fbshipit-source-id: 9913cc5eb378d180403094589b852d45e70978bc
parent 88d9bf7b
......@@ -138,7 +138,7 @@ struct group_varint_table_sse_mask_make_item
}
};
#if FOLLY_SSE >= 3
#if FOLLY_SSE >= 4
alignas(16) FOLLY_STORAGE_CONSTEXPR
decltype(groupVarintSSEMasks) groupVarintSSEMasks =
make_array_with<256>(group_varint_table_sse_mask_make_item{});
......
......@@ -39,7 +39,7 @@
#if FOLLY_HAVE_GROUP_VARINT
#if FOLLY_SSE >= 3
#if FOLLY_SSE >= 4
#include <nmmintrin.h>
namespace folly {
namespace detail {
......@@ -199,7 +199,7 @@ class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
return decode_simple(p, dest, dest + 1, dest + 2, dest + 3);
}
#if FOLLY_SSE >= 3
#if FOLLY_SSE >= 4
/**
* Just like the non-SSSE3 decode below, but with the additional constraint
* that we must be able to read at least 17 bytes from the input pointer, p.
......@@ -242,7 +242,7 @@ class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
return p + detail::groupVarintLengths[key];
}
#else /* !__SSSE3__ */
#else // FOLLY_SSE >= 4
static const char* decode(
const char* p, uint32_t* a, uint32_t* b, uint32_t* c, uint32_t* d) {
return decode_simple(p, a, b, c, d);
......@@ -251,7 +251,7 @@ class GroupVarint<uint32_t> : public detail::GroupVarintBase<uint32_t> {
static const char* decode(const char* p, uint32_t* dest) {
return decode_simple(p, dest);
}
#endif /* __SSSE3__ */
#endif // FOLLY_SSE >= 4
private:
static uint8_t key(uint32_t x) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment