Adding support for signed integers

Summary: For use with Thrift::Frozen, especially. Test Plan: Unit tests Reviewed By: philipp@fb.com FB internal diff: D1255257

Adding support for signed integers
Summary: For use with Thrift::Frozen, especially. Test Plan: Unit tests Reviewed By: philipp@fb.com FB internal diff: D1255257
ff03c3a8 · Tom Jackson · ptarjan · 14e2c0f8 · ff03c3a8 · ff03c3a8
Commit ff03c3a8 authored Apr 04, 2014 by Tom Jackson Committed by ptarjan Apr 08, 2014
3 changed files
--- a/folly/experimental/Bits.h
+++ b/folly/experimental/Bits.h
@@ -41,7 +41,8 @@ namespace detail {
 * (T, where T is an unsigned integral type) or unaligned values
 * (Unaligned<T>, where T is an unsigned integral type)
 */
-template <class T, class Enable=void> struct BitsTraits;
+template <class T, class Enable = void>
+struct BitsTraits;

 // Partial specialization for Unaligned<T>, where T is unsigned integral
 // loadRMW is the same as load, but it indicates that it loads for a
@@ -49,7 +50,7 @@ template <class T, class Enable=void> struct BitsTraits;
 // silence the GCC warning in that case.
 template <class T>
 struct BitsTraits<Unaligned<T>, typename std::enable_if<
-    (std::is_integral<T>::value && std::is_unsigned<T>::value)>::type> {
+    (std::is_integral<T>::value)>::type> {
  typedef T UnderlyingType;
  static T load(const Unaligned<T>& x) { return x.value; }
  static void store(Unaligned<T>& x, T v) { x.value = v; }
@@ -68,7 +69,7 @@ struct BitsTraits<Unaligned<T>, typename std::enable_if<
 // Special version that allows to disable address sanitizer on demand.
 template <class T>
 struct BitsTraits<UnalignedNoASan<T>, typename std::enable_if<
-    (std::is_integral<T>::value && std::is_unsigned<T>::value)>::type> {
+    (std::is_integral<T>::value)>::type> {
  typedef T UnderlyingType;
  static T FOLLY_DISABLE_ADDRESS_SANITIZER
  load(const UnalignedNoASan<T>& x) { return x.value; }
@@ -90,7 +91,7 @@ struct BitsTraits<UnalignedNoASan<T>, typename std::enable_if<
 // Partial specialization for T, where T is unsigned integral
 template <class T>
 struct BitsTraits<T, typename std::enable_if<
-    (std::is_integral<T>::value && std::is_unsigned<T>::value)>::type> {
+    (std::is_integral<T>::value)>::type> {
  typedef T UnderlyingType;
  static T load(const T& x) { return x; }
  static void store(T& x, T v) { x = v; }
@@ -122,8 +123,8 @@ struct Bits {
  /**
   * Number of bits in a block.
   */
-  static constexpr size_t bitsPerBlock =
-    std::numeric_limits<UnderlyingType>::digits;
+  static constexpr size_t bitsPerBlock = std::numeric_limits<
+      typename std::make_unsigned<UnderlyingType>::type>::digits;

  /**
   * Byte index of the given bit.
@@ -166,6 +167,7 @@ struct Bits {
   * from the least significant count bits of value; little endian.
   * (value & 1 becomes the bit at bitStart, etc)
   * Precondition: count <= sizeof(T) * 8
+   * Precondition: value can fit in 'count' bits
   */
  static void set(T* p, size_t bitStart, size_t count, UnderlyingType value);

@@ -223,16 +225,26 @@ template <class T, class Traits>
 inline void Bits<T, Traits>::set(T* p, size_t bitStart, size_t count,
                                 UnderlyingType value) {
  assert(count <= sizeof(UnderlyingType) * 8);
+  size_t cut = bitsPerBlock - count;
+  assert(value == (value << cut >> cut));
  size_t idx = blockIndex(bitStart);
  size_t offset = bitOffset(bitStart);
+  if (std::is_signed<UnderlyingType>::value) {
+    value &= ones(count);
+  }
  if (offset + count <= bitsPerBlock) {
    innerSet(p + idx, offset, count, value);
  } else {
    size_t countInThisBlock = bitsPerBlock - offset;
    size_t countInNextBlock = count - countInThisBlock;
-    innerSet(p + idx, offset, countInThisBlock,
-             value & ((one << countInThisBlock) - 1));
-    innerSet(p + idx + 1, 0, countInNextBlock, value >> countInThisBlock);
+
+    UnderlyingType thisBlock = value & ((one << countInThisBlock) - 1);
+    UnderlyingType nextBlock = value >> countInThisBlock;
+    if (std::is_signed<UnderlyingType>::value) {
+      nextBlock &= ones(countInNextBlock);
+    }
+    innerSet(p + idx, offset, countInThisBlock, thisBlock);
+    innerSet(p + idx + 1, 0, countInNextBlock, nextBlock);
  }
 }

@@ -259,20 +271,27 @@ inline auto Bits<T, Traits>::get(const T* p, size_t bitStart, size_t count)
  assert(count <= sizeof(UnderlyingType) * 8);
  size_t idx = blockIndex(bitStart);
  size_t offset = bitOffset(bitStart);
+  UnderlyingType ret;
  if (offset + count <= bitsPerBlock) {
-    return innerGet(p + idx, offset, count);
+    ret = innerGet(p + idx, offset, count);
  } else {
    size_t countInThisBlock = bitsPerBlock - offset;
    size_t countInNextBlock = count - countInThisBlock;
    UnderlyingType thisBlockValue = innerGet(p + idx, offset, countInThisBlock);
    UnderlyingType nextBlockValue = innerGet(p + idx + 1, 0, countInNextBlock);
-    return (nextBlockValue << countInThisBlock) | thisBlockValue;
+    ret = (nextBlockValue << countInThisBlock) | thisBlockValue;
  }
+  if (std::is_signed<UnderlyingType>::value) {
+    size_t emptyBits = bitsPerBlock - count;
+    ret <<= emptyBits;
+    ret >>= emptyBits;
+  }
+  return ret;
 }

 template <class T, class Traits>
 inline auto Bits<T, Traits>::innerGet(const T* p, size_t offset, size_t count)
-  -> UnderlyingType {
+    -> UnderlyingType {
  return (Traits::load(*p) >> offset) & ones(count);
 }


--- a/folly/experimental/test/BitsBenchmark.cpp
+++ b/folly/experimental/test/BitsBenchmark.cpp
+/*
+ * Copyright 2014 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <atomic>
+#include <glog/logging.h>
+#include <random>
+#include <memory>
+
+#include "folly/Benchmark.h"
+#include "folly/experimental/Bits.h"
+
+std::random_device rd;
+
+const size_t kBufferSize = 1 << 10;
+std::vector<uint8_t> buffer(kBufferSize + 16);
+
+template <class T>
+void benchmarkSet(size_t n, T) {
+  size_t size = sizeof(T) * 6.9; // use 6.9 bits/byte
+  const size_t k = 16;
+  T values[k];
+  BENCHMARK_SUSPEND {
+    std::mt19937 gen(rd());
+    T max, min;
+    if (std::is_signed<T>::value) {
+      max = (T(1) << (size - 1)) - 1;
+      min = -(T(1) << (size - 1));
+    } else {
+      max = (T(1) << size) - 1;
+      min = 0;
+    }
+    CHECK_LE(folly::findLastSet(max), size);
+    CHECK_LE(folly::findLastSet(-min), size);
+    std::uniform_int_distribution<T> dis(min, max);
+    for (int i = 0; i < k; ++i) {
+      values[i] = dis(gen);
+    }
+  }
+
+  for (size_t i = 0; i < n; ++i) {
+    size_t bit = (i * 2973) % (kBufferSize * 8);
+    size_t drop = i % size;
+    folly::Bits<T>::set(reinterpret_cast<T *>(buffer.data()),
+                        bit, size - drop, values[i % k] >> drop);
+  }
+
+  folly::doNotOptimizeAway(
+      folly::Bits<T>::test(reinterpret_cast<T *>(buffer.data()), 512));
+}
+
+BENCHMARK_NAMED_PARAM(benchmarkSet, u16, uint16_t());
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i16, int16_t());
+BENCHMARK_NAMED_PARAM(benchmarkSet, u32, uint32_t());
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i32, int32_t());
+BENCHMARK_NAMED_PARAM(benchmarkSet, u64, uint64_t());
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i64, int64_t());
+
+BENCHMARK_DRAW_LINE();
+
+std::atomic<int64_t> sum(0);
+
+template <class T>
+void benchmarkGet(size_t n, T x) {
+  size_t size = sizeof(T) * 6.9; // use 6.9 bits/byte
+  for (size_t i = 0; i < n; ++i) {
+    size_t bit = (i * 2973) % (kBufferSize * 8);
+    size_t drop = i % size;
+    x += folly::Bits<T>::get(
+        reinterpret_cast<T *>(buffer.data()), bit, size - drop);
+  }
+  folly::doNotOptimizeAway(x);
+}
+
+BENCHMARK_NAMED_PARAM(benchmarkGet, u16, uint16_t(0));
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i16, int16_t(0));
+BENCHMARK_NAMED_PARAM(benchmarkGet, u32, uint32_t(0));
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i32, int32_t(0));
+BENCHMARK_NAMED_PARAM(benchmarkGet, u64, uint64_t(0));
+BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i64, int64_t(0));
+
+#if 0
+============================================================================
+folly/experimental/test/BitsBenchmark.cpp       relative  time/iter  iters/s
+============================================================================
+benchmarkSet(u16)                                            8.58ns  116.59M
+benchmarkSet(i16)                                 88.42%     9.70ns  103.08M
+benchmarkSet(u32)                                            8.37ns  119.45M
+benchmarkSet(i32)                                 88.23%     9.49ns  105.39M
+benchmarkSet(u64)                                            9.23ns  108.34M
+benchmarkSet(i64)                                 82.77%    11.15ns   89.68M
+----------------------------------------------------------------------------
+benchmarkGet(u16)                                            6.32ns  158.13M
+benchmarkGet(i16)                                 80.40%     7.87ns  127.14M
+benchmarkGet(u32)                                            6.34ns  157.65M
+benchmarkGet(i32)                                 84.61%     7.50ns  133.39M
+benchmarkGet(u64)                                            7.32ns  136.58M
+benchmarkGet(i64)                                 85.78%     8.53ns  117.16M
+============================================================================
+#endif
+
+int main(int argc, char *argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  folly::runBenchmarks();
+  return sum.load();
+}
--- a/folly/experimental/test/BitsTest.cpp
+++ b/folly/experimental/test/BitsTest.cpp
@@ -144,6 +144,39 @@ TEST(Bits, MultiBitUnaligned8) {
 }

 template <class T>
+void runSignedMultiBitTest8() {
+  auto load = detail::BitsTraits<T>::load;
+  T buf[] = {0x12, 0x34, 0x56, 0x78};
+
+  EXPECT_EQ(0x02, load(Bits<T>::get(buf, 0, 4)));
+  EXPECT_EQ(0x1a-32, load(Bits<T>::get(buf, 9, 5)));
+  EXPECT_EQ(0xb1-256, load(Bits<T>::get(buf, 13, 8)));
+
+  Bits<T>::set(buf, 0, 4, 0x0b - 0x10);
+  EXPECT_EQ(0x1b, load(buf[0]));
+  EXPECT_EQ(0x34, load(buf[1]));
+  EXPECT_EQ(0x56, load(buf[2]));
+  EXPECT_EQ(0x78, load(buf[3]));
+
+  Bits<T>::set(buf, 9, 5, 0x0e);
+  EXPECT_EQ(0x1b, load(buf[0]));
+  EXPECT_EQ(0x1c, load(buf[1]));
+  EXPECT_EQ(0x56, load(buf[2]));
+  EXPECT_EQ(0x78, load(buf[3]));
+
+  Bits<T>::set(buf, 13, 8, 0xaa - 0x100);
+  EXPECT_EQ(0x1b, load(buf[0]));
+  EXPECT_EQ(0x5c, load(buf[1]));
+  EXPECT_EQ(0x55, load(buf[2]));
+  EXPECT_EQ(0x78, load(buf[3]));
+}
+
+
+TEST(Bits, SignedMultiBit8) {
+  runSignedMultiBitTest8<int8_t>();
+}
+
+template <class T, class R = T>
 void runMultiBitTest64() {
  auto load = detail::BitsTraits<T>::load;
  T buf[] = {0x123456789abcdef0, 0x13579bdf2468ace0};
@@ -166,8 +199,113 @@ TEST(Bits, MultiBit64) {
  runMultiBitTest64<uint64_t>();
 }

+TEST(Bits, MultiBitSigned64) {
+  //runMultiBitTest64<int64_t>();
+}
+
 TEST(Bits, MultiBitUnaligned64) {
-  runMultiBitTest64<Unaligned<uint64_t>>();
+  runMultiBitTest64<Unaligned<uint64_t>, uint64_t>();
+}
+
+namespace {
+template <bool aligned, class T>
+typename std::enable_if<!aligned>::type testSet(uint8_t *buf,
+                                                size_t start,
+                                                size_t bits,
+                                                T value) {
+  Bits<Unaligned<T>>::set(
+      reinterpret_cast<Unaligned<T> *>(buf), start, bits, value);
+}
+
+template <bool aligned, class T>
+typename std::enable_if<aligned>::type testSet(uint8_t *buf,
+                                               size_t start,
+                                               size_t bits,
+                                               T value) {
+  Bits<T>::set(reinterpret_cast<T *>(buf), start, bits, value);
+}
+
+template <bool aligned, class T>
+typename std::enable_if<!aligned, T>::type testGet(uint8_t *buf,
+                                                   size_t start,
+                                                   size_t bits) {
+  return Bits<Unaligned<T>>::get(
+      reinterpret_cast<Unaligned<T> *>(buf), start, bits);
+}
+
+template <bool aligned, class T>
+typename std::enable_if<aligned, T>::type testGet(uint8_t *buf,
+                                                  size_t start,
+                                                  size_t bits) {
+  return Bits<T>::get(reinterpret_cast<T *>(buf), start, bits);
+}
+
+template <class T, bool negate = false>
+T testValue(int bits) {
+  if (std::is_signed<T>::value) {
+    --bits;
+  }
+  auto value = pow(2, bits) * (negate ? -2.0 : 2.0) / 3.0;
+  CHECK_GE(value, std::numeric_limits<T>::min());
+  CHECK_LE(value, std::numeric_limits<T>::max());
+  return static_cast<T>(value);
+}
+}
+
+static_assert((-4) >> 1 == -2, "OH");
+template <bool aligned>
+void testConcatenation() {
+  // concatenate fields of length 1, 2, 3, ... 64, all unsigned, storing 2/3s
+  // the maximum value in each.
+  const size_t bitLimit = 64;
+  const size_t bitsPerPass = (1 + bitLimit) * bitLimit / 2;
+  const size_t totalBits = bitsPerPass * 3;
+  uint8_t buf[(totalBits + 7) / 8];
+#define EACH_UNSIGNED_SIZE(MACRO, ARG) \
+  MACRO(8, uint8_t, ARG);              \
+  MACRO(16, uint16_t, ARG);            \
+  MACRO(32, uint32_t, ARG);            \
+  MACRO(64, uint64_t, ARG);
+#define EACH_SIGNED_SIZE(MACRO, ARG) \
+  MACRO(7, int8_t, ARG);             \
+  MACRO(15, int16_t, ARG);           \
+  MACRO(31, int32_t, ARG);           \
+  MACRO(63, int64_t, ARG);
+  {
+    size_t w = 0, s;
+#define WRITE_TEST(N, T, NEG)                                                 \
+  for (; s <= N; ++s, w += s) {                                               \
+    testSet<aligned>(buf, w, s, testValue<T, NEG>(s));                        \
+    EXPECT_EQ((testValue<T, NEG>(s)), (testGet<aligned, T>(buf, w, s))) << s; \
+  }
+    s = 0;
+    EACH_UNSIGNED_SIZE(WRITE_TEST, false)
+    s = 0;
+    EACH_SIGNED_SIZE(WRITE_TEST, false)
+    s = 0;
+    EACH_SIGNED_SIZE(WRITE_TEST, true)
+#undef WRITE_TEST
+  }
+  {
+    size_t r = 0, s;
+#define READ_TEST(N, T, NEG)  \
+  for (; s <= N; ++s, r += s) \
+    EXPECT_EQ((testValue<T, NEG>(s)), (testGet<aligned, T>(buf, r, s))) << s;
+    s = 0;
+    EACH_UNSIGNED_SIZE(READ_TEST, false)
+    s = 0;
+    EACH_SIGNED_SIZE(READ_TEST, false)
+    s = 0;
+    EACH_SIGNED_SIZE(READ_TEST, true)
+#undef READ_TEST
+  }
+#undef EACH_UNSIGNED_SIZE
+}
+
+TEST(Bits, ConcatenationUnalignedUnsigned) { testConcatenation<false>(); }
+
+TEST(Bits, ConcatenationAligned) {
+  testConcatenation<true>();
 }

 int main(int argc, char *argv[]) {