Commit 19e5f7ed authored by Brian Pane's avatar Brian Pane Committed by Peter Griess

Add new CRC-32C checksum functions to folly

Summary:
* Added a new crc32c() function, with a portable implementation
and an optimized version for x86 with SSE4.2

Test Plan: New unit test included

Reviewed By: tudorb@fb.com

FB internal diff: D1111515
parent b4d29567
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "folly/Checksum.h"
#include <algorithm>
#include <stdexcept>
#include <boost/crc.hpp>
#include "folly/CpuId.h"
namespace folly {
namespace detail {
#if defined(__x86_64__) && defined (__GNUC__)
// Fast SIMD implementation of CRC-32C for x86 with SSE 4.2
uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
uint32_t startingChecksum) {
uint32_t sum = startingChecksum;
size_t offset = 0;
// Process bytes one at a time until we reach an 8-byte boundary and can
// start doing aligned 64-bit reads.
static uintptr_t ALIGN_MASK = sizeof(uint64_t) - 1;
size_t mask = (size_t)((uintptr_t)data & ALIGN_MASK);
if (mask != 0) {
size_t limit = std::min(nbytes, sizeof(uint64_t) - mask);
while (offset < limit) {
sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]);
offset++;
}
}
// Process 8 bytes at a time until we have fewer than 8 bytes left.
while (offset + sizeof(uint64_t) <= nbytes) {
const uint64_t* src = (const uint64_t*)(data + offset);
sum = __builtin_ia32_crc32di(sum, *src);
offset += sizeof(uint64_t);
}
// Process any bytes remaining after the last aligned 8-byte block.
while (offset < nbytes) {
sum = (uint32_t)__builtin_ia32_crc32qi(sum, data[offset]);
offset++;
}
return sum;
}
bool crc32c_hw_supported() {
static folly::CpuId id;
return id.sse42();
}
#else
uint32_t crc32c_hw(const uint8_t *data, size_t nbytes,
uint32_t startingChecksum) {
throw std::runtime_error("crc32_hw is not implemented on this platform");
}
bool crc32c_hw_supported() {
return false;
}
#endif
uint32_t crc32c_sw(const uint8_t *data, size_t nbytes,
uint32_t startingChecksum) {
// Reverse the bits in the starting checksum so they'll be in the
// right internal format for Boost's CRC engine.
// O(1)-time, branchless bit reversal algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html
startingChecksum = ((startingChecksum >> 1) & 0x55555555) |
((startingChecksum & 0x55555555) << 1);
startingChecksum = ((startingChecksum >> 2) & 0x33333333) |
((startingChecksum & 0x33333333) << 2);
startingChecksum = ((startingChecksum >> 4) & 0x0f0f0f0f) |
((startingChecksum & 0x0f0f0f0f) << 4);
startingChecksum = ((startingChecksum >> 8) & 0x00ff00ff) |
((startingChecksum & 0x00ff00ff) << 8);
startingChecksum = (startingChecksum >> 16) |
(startingChecksum << 16);
static const uint32_t CRC32C_POLYNOMIAL = 0x1EDC6F41;
boost::crc_optimal<32, CRC32C_POLYNOMIAL, ~0U, 0, true, true> sum(
startingChecksum);
sum.process_bytes(data, nbytes);
return sum.checksum();
}
} // folly::detail
uint32_t crc32c(const uint8_t *data, size_t nbytes,
uint32_t startingChecksum) {
if (detail::crc32c_hw_supported()) {
return detail::crc32c_hw(data, nbytes, startingChecksum);
} else {
return detail::crc32c_sw(data, nbytes, startingChecksum);
}
}
} // folly
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_CHECKSUM_H_
#define FOLLY_CHECKSUM_H_
#include <stdint.h>
#include <cstddef>
/*
* Checksum functions
*/
namespace folly {
/**
* Compute the CRC-32C checksum of a buffer, using a hardware-accelerated
* implementation if available or a portable software implementation as
* a default.
*
* @note CRC-32C is different from CRC-32; CRC-32C starts with a different
* polynomial and thus yields different results for the same input
* than a traditional CRC-32.
*/
uint32_t crc32c(const uint8_t* data, size_t nbytes,
uint32_t startingChecksum = ~0U);
} // folly
#endif /* FOLLY_CHECKSUM_H_ */
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef FOLLY_DETAIL_CHECKSUMDETAIL_H_
#define FOLLY_DETAIL_CHECKSUMDETAIL_H_
namespace folly { namespace detail {
/**
* Compute a CRC-32C checksum of a buffer using a hardware-accelerated
* implementation.
*
* @note This function is exposed to support special cases where the
* calling code is absolutely certain it ought to invoke a hardware-
* accelerated CRC-32C implementation - unit tests, for example. For
* all other scenarios, please call crc32c() and let it pick an
* implementation based on the capabilities of the underlying CPU.
*/
uint32_t crc32c_hw(const uint8_t* data, size_t nbytes,
uint32_t startingChecksum = ~0U);
/**
* Check whether a hardware-accelerated CRC-32C implementation is
* supported on the current CPU.
*/
bool crc32c_hw_supported();
/**
* Compute a CRC-32C checksum of a buffer using a portable,
* software-only implementation.
*
* @note This function is exposed to support special cases where the
* calling code is absolutely certain it wants to use the software
* implementation instead of the hardware-accelerated code - unit
* tests, for example. For all other scenarios, please call crc32c()
* and let it pick an implementation based on the capabilities of
* the underlying CPU.
*/
uint32_t crc32c_sw(const uint8_t* data, size_t nbytes,
uint32_t startingChecksum = ~0U);
}} // folly::detail
#endif /* FOLLY_DETAIL_CHECKSUMDETAIL_H_ */
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "folly/Checksum.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include "folly/Benchmark.h"
#include "folly/Hash.h"
#include "folly/detail/ChecksumDetail.h"
namespace {
const unsigned int BUFFER_SIZE = 64 * 1024 * sizeof(uint64_t);
uint8_t buffer[BUFFER_SIZE];
struct ExpectedResult {
size_t offset;
size_t length;
uint32_t crc32c;
};
ExpectedResult expectedResults[] = {
// Zero-byte input
{ 0, 0, ~0U },
// Small aligned inputs to test special cases in SIMD implementations
{ 8, 1, 1543413366 },
{ 8, 2, 523493126 },
{ 8, 3, 1560427360 },
{ 8, 4, 3422504776 },
{ 8, 5, 447841138 },
{ 8, 6, 3910050499 },
{ 8, 7, 3346241981 },
// Small unaligned inputs
{ 9, 1, 3855826643 },
{ 10, 2, 560880875 },
{ 11, 3, 1479707779 },
{ 12, 4, 2237687071 },
{ 13, 5, 4063855784 },
{ 14, 6, 2553454047 },
{ 15, 7, 1349220140 },
// Larger inputs to test leftover chunks at the end of aligned blocks
{ 8, 8, 627613930 },
{ 8, 9, 2105929409 },
{ 8, 10, 2447068514 },
{ 8, 11, 863807079 },
{ 8, 12, 292050879 },
{ 8, 13, 1411837737 },
{ 8, 14, 2614515001 },
{ 8, 15, 3579076296 },
{ 8, 16, 2897079161 },
{ 8, 17, 675168386 },
// Much larger inputs
{ 0, BUFFER_SIZE, 2961263300 },
{ 1, BUFFER_SIZE / 2, 1708529329 },
};
void testCRC32C(
std::function<uint32_t(const uint8_t*, size_t, uint32_t)> impl) {
for (auto expected : expectedResults) {
uint32_t result = impl(buffer + expected.offset, expected.length, ~0U);
EXPECT_EQ(result, expected.crc32c);
}
}
void testCRC32CContinuation(
std::function<uint32_t(const uint8_t*, size_t, uint32_t)> impl) {
for (auto expected : expectedResults) {
size_t partialLength = expected.length / 2;
uint32_t partialChecksum = impl(
buffer + expected.offset, partialLength, ~0U);
uint32_t result = impl(
buffer + expected.offset + partialLength,
expected.length - partialLength, partialChecksum);
EXPECT_EQ(result, expected.crc32c);
}
}
} // namespace
TEST(Checksum, crc32c_software) {
testCRC32C(folly::detail::crc32c_sw);
}
TEST(Checksum, crc32c_continuation_software) {
testCRC32CContinuation(folly::detail::crc32c_sw);
}
TEST(Checksum, crc32c_hardware) {
if (folly::detail::crc32c_hw_supported()) {
testCRC32C(folly::detail::crc32c_hw);
} else {
LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" <<
" (not supported on this CPU)";
}
}
TEST(Checksum, crc32c_continuation_hardware) {
if (folly::detail::crc32c_hw_supported()) {
testCRC32CContinuation(folly::detail::crc32c_hw);
} else {
LOG(WARNING) << "skipping hardware-accelerated CRC-32C tests" <<
" (not supported on this CPU)";
}
}
TEST(Checksum, crc32c_autodetect) {
testCRC32C(folly::crc32c);
}
TEST(Checksum, crc32c_continuation_autodetect) {
testCRC32CContinuation(folly::crc32c);
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
google::ParseCommandLineFlags(&argc, &argv, true);
// Populate a buffer with a deterministic pattern
// on which to compute checksums
const uint8_t* src = buffer;
uint64_t* dst = (uint64_t*)buffer;
const uint64_t* end = (const uint64_t*)(buffer + BUFFER_SIZE);
*dst++ = 0;
while (dst < end) {
*dst++ = folly::hash::fnv64_buf((const char*)src, sizeof(uint64_t));
src += sizeof(uint64_t);
}
auto ret = RUN_ALL_TESTS();
if (!ret && FLAGS_benchmark) {
folly::runBenchmarks();
}
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment