Commit 5ed20b59 authored by Elizabeth Smith's avatar Elizabeth Smith Committed by Tudor Bosman

MSVC intrinsics for bits and cpuid

Summary: Use msvc intrinsics for cpuid, popcount, byteswap, and bit scan functionality

Test Plan: fbconfig -r folly && fbmake runtests

Reviewed By: delong.j@fb.com

FB internal diff: D1413254
parent 1f1ba906
...@@ -20,9 +20,8 @@ ...@@ -20,9 +20,8 @@
#include <folly/Portability.h> #include <folly/Portability.h>
// None of this is necessary if we're compiling for a target that supports // None of this is necessary if we're compiling for a target that supports
// popcnt // popcnt, which includes MSVC
#ifndef __POPCNT__ #if !defined(__POPCNT__) && !defined(_MSC_VER)
namespace { namespace {
int popcount_builtin(unsigned int x) { int popcount_builtin(unsigned int x) {
......
...@@ -57,14 +57,10 @@ ...@@ -57,14 +57,10 @@
#include <folly/Portability.h> #include <folly/Portability.h>
#ifndef __GNUC__ #if !defined(__clang__) && !defined(_MSC_VER)
#error GCC required
#endif
#ifndef __clang__
#define FOLLY_INTRINSIC_CONSTEXPR constexpr #define FOLLY_INTRINSIC_CONSTEXPR constexpr
#else #else
// Unlike GCC, in Clang (as of 3.2) intrinsics aren't constexpr. // GCC is the only compiler with intrinsics constexpr.
#define FOLLY_INTRINSIC_CONSTEXPR const #define FOLLY_INTRINSIC_CONSTEXPR const
#endif #endif
...@@ -78,6 +74,14 @@ ...@@ -78,6 +74,14 @@
# include <byteswap.h> # include <byteswap.h>
#endif #endif
#ifdef _MSC_VER
# include <intrin.h>
# pragma intrinsic(_BitScanForward)
# pragma intrinsic(_BitScanForward64)
# pragma intrinsic(_BitScanReverse)
# pragma intrinsic(_BitScanReverse64)
#endif
#include <cassert> #include <cassert>
#include <cinttypes> #include <cinttypes>
#include <iterator> #include <iterator>
...@@ -98,7 +102,12 @@ typename std::enable_if< ...@@ -98,7 +102,12 @@ typename std::enable_if<
sizeof(T) <= sizeof(unsigned int)), sizeof(T) <= sizeof(unsigned int)),
unsigned int>::type unsigned int>::type
findFirstSet(T x) { findFirstSet(T x) {
#ifdef _MSC_VER
unsigned long index;
return _BitScanForward(&index, x) ? index : 0;
#else
return __builtin_ffs(x); return __builtin_ffs(x);
#endif
} }
template <class T> template <class T>
...@@ -110,7 +119,12 @@ typename std::enable_if< ...@@ -110,7 +119,12 @@ typename std::enable_if<
sizeof(T) <= sizeof(unsigned long)), sizeof(T) <= sizeof(unsigned long)),
unsigned int>::type unsigned int>::type
findFirstSet(T x) { findFirstSet(T x) {
#ifdef _MSC_VER
unsigned long index;
return _BitScanForward(&index, x) ? index : 0;
#else
return __builtin_ffsl(x); return __builtin_ffsl(x);
#endif
} }
template <class T> template <class T>
...@@ -122,7 +136,12 @@ typename std::enable_if< ...@@ -122,7 +136,12 @@ typename std::enable_if<
sizeof(T) <= sizeof(unsigned long long)), sizeof(T) <= sizeof(unsigned long long)),
unsigned int>::type unsigned int>::type
findFirstSet(T x) { findFirstSet(T x) {
#ifdef _MSC_VER
unsigned long index;
return _BitScanForward64(&index, x) ? index : 0;
#else
return __builtin_ffsll(x); return __builtin_ffsll(x);
#endif
} }
template <class T> template <class T>
...@@ -147,7 +166,18 @@ typename std::enable_if< ...@@ -147,7 +166,18 @@ typename std::enable_if<
sizeof(T) <= sizeof(unsigned int)), sizeof(T) <= sizeof(unsigned int)),
unsigned int>::type unsigned int>::type
findLastSet(T x) { findLastSet(T x) {
#ifdef _MSC_VER
unsigned long index;
int clz;
if (_BitScanReverse(&index, x)) {
clz = static_cast<int>(31 - index);
} else {
clz = 32;
}
return x ? 8 * sizeof(unsigned int) - clz : 0;
#else
return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0; return x ? 8 * sizeof(unsigned int) - __builtin_clz(x) : 0;
#endif
} }
template <class T> template <class T>
...@@ -159,7 +189,18 @@ typename std::enable_if< ...@@ -159,7 +189,18 @@ typename std::enable_if<
sizeof(T) <= sizeof(unsigned long)), sizeof(T) <= sizeof(unsigned long)),
unsigned int>::type unsigned int>::type
findLastSet(T x) { findLastSet(T x) {
#ifdef _MSC_VER
unsigned long index;
int clz;
if (_BitScanReverse(&index, x)) {
clz = static_cast<int>(31 - index);
} else {
clz = 32;
}
return x ? 8 * sizeof(unsigned int) - clz : 0;
#else
return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0; return x ? 8 * sizeof(unsigned long) - __builtin_clzl(x) : 0;
#endif
} }
template <class T> template <class T>
...@@ -171,7 +212,18 @@ typename std::enable_if< ...@@ -171,7 +212,18 @@ typename std::enable_if<
sizeof(T) <= sizeof(unsigned long long)), sizeof(T) <= sizeof(unsigned long long)),
unsigned int>::type unsigned int>::type
findLastSet(T x) { findLastSet(T x) {
#ifdef _MSC_VER
unsigned long index;
unsigned long long clz;
if (_BitScanReverse(&index, x)) {
clz = static_cast<unsigned long long>(63 - index);
} else {
clz = 64;
}
return x ? 8 * sizeof(unsigned long long) - clz : 0;
#else
return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0; return x ? 8 * sizeof(unsigned long long) - __builtin_clzll(x) : 0;
#endif
} }
template <class T> template <class T>
...@@ -237,6 +289,8 @@ struct EndianIntBase { ...@@ -237,6 +289,8 @@ struct EndianIntBase {
static T swap(T x); static T swap(T x);
}; };
#ifndef _MSC_VER
/** /**
* If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our * If we have the bswap_16 macro from byteswap.h, use it; otherwise, provide our
* own definition. * own definition.
...@@ -254,6 +308,8 @@ our_bswap16(Int16 x) { ...@@ -254,6 +308,8 @@ our_bswap16(Int16 x) {
} }
#endif #endif
#endif
#define FB_GEN(t, fn) \ #define FB_GEN(t, fn) \
template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); } template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); }
...@@ -262,12 +318,21 @@ template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); } ...@@ -262,12 +318,21 @@ template<> inline t EndianIntBase<t>::swap(t x) { return fn(x); }
// __builtin_bswap16 for some reason, so we have to provide our own. // __builtin_bswap16 for some reason, so we have to provide our own.
FB_GEN( int8_t,) FB_GEN( int8_t,)
FB_GEN(uint8_t,) FB_GEN(uint8_t,)
#ifdef _MSC_VER
FB_GEN( int64_t, _byteswap_uint64)
FB_GEN(uint64_t, _byteswap_uint64)
FB_GEN( int32_t, _byteswap_ulong)
FB_GEN(uint32_t, _byteswap_ulong)
FB_GEN( int16_t, _byteswap_ushort)
FB_GEN(uint16_t, _byteswap_ushort)
#else
FB_GEN( int64_t, __builtin_bswap64) FB_GEN( int64_t, __builtin_bswap64)
FB_GEN(uint64_t, __builtin_bswap64) FB_GEN(uint64_t, __builtin_bswap64)
FB_GEN( int32_t, __builtin_bswap32) FB_GEN( int32_t, __builtin_bswap32)
FB_GEN(uint32_t, __builtin_bswap32) FB_GEN(uint32_t, __builtin_bswap32)
FB_GEN( int16_t, our_bswap16) FB_GEN( int16_t, our_bswap16)
FB_GEN(uint16_t, our_bswap16) FB_GEN(uint16_t, our_bswap16)
#endif
#undef FB_GEN #undef FB_GEN
......
...@@ -30,7 +30,14 @@ namespace folly { ...@@ -30,7 +30,14 @@ namespace folly {
class CpuId { class CpuId {
public: public:
CpuId() { CpuId() {
#if FOLLY_X64 || defined(__i386__) #ifdef _MSC_VER
int reg[4];
__cpuid((int *)reg, 1);
c_ = reg[2];
d_ = reg[3];
#elif FOLLY_X64 || defined(__i386__)
__asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx"); __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
#else #else
// On non-Intel, none of these features exist; at least not in the same form // On non-Intel, none of these features exist; at least not in the same form
......
...@@ -24,7 +24,14 @@ namespace detail { ...@@ -24,7 +24,14 @@ namespace detail {
// __builtin_popcount directly, as it's presumably inlined. // __builtin_popcount directly, as it's presumably inlined.
// If not, use runtime detection using __attribute__((ifunc)) // If not, use runtime detection using __attribute__((ifunc))
// (see Bits.cpp) // (see Bits.cpp)
#ifdef __POPCNT__ #ifdef _MSC_VER
inline int popcount(unsigned int x) {
return __popcnt(x);
}
inline int popcountll(unsigned long long x) {
return __popcnt64(x);
}
#elif defined(__POPCNT__)
inline int popcount(unsigned int x) { inline int popcount(unsigned int x) {
return __builtin_popcount(x); return __builtin_popcount(x);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment