Commit 59bd43f8 authored by Henry Filgueiras's avatar Henry Filgueiras Committed by Sara Golemon

Improve IPAddress::toFullyQualified() CPU performance

Summary:
Currently IPAddress::toFullyQualified() is fairly slow for IPv6.

Change here implements more lightweight in_addr/in6_addr to string functions.

I also added a benchmark for comparison with inet_ntop.

This makes IPAddressV6::toFullyQualified() significantly faster than inet_ntop, and makes IPAddressV4::str() ~20ns faster than previous impementation (previously ~80ns).

Previous benchmark:
============================================================================
folly/test/IPAddressBenchmark.cpp               relative  time/iter  iters/s
============================================================================
ipv4_to_string_inet_ntop                                   238.91ns    4.19M
ipv4_to_fully_qualified                          289.96%    82.39ns   12.14M
----------------------------------------------------------------------------
ipv6_to_string_inet_ntop                                   780.72ns    1.28M
ipv6_to_fully_qualified                           51.11%     1.53us  654.59K
============================================================================

With this change:
============================================================================
folly/test/IPAddressBenchmark.cpp               relative  time/iter  iters/s
============================================================================
ipv4_to_string_inet_ntop                                   238.06ns    4.20M
ipv4_to_fully_qualified                          364.76%    65.26ns   15.32M
----------------------------------------------------------------------------
ipv6_to_string_inet_ntop                                   770.74ns    1.30M
ipv6_to_fully_qualified                          791.63%    97.36ns   10.27M
============================================================================

Test Plan:
fbconfig folly/test:network_address_test folly/test:network_address_benchmark
fbmake runtests_opt

Reviewed By: simpkins@fb.com

Subscribers: ps, bmatheny

FB internal diff: D1477925

Tasks: 4832974
parent 1fc548e1
......@@ -185,35 +185,8 @@ IPAddressV4 IPAddressV4::mask(size_t numBits) const {
}
// public
// Taken from TSocketAddress::getAddressStrIPv4Fast
string IPAddressV4::str() const {
char buf[INET_ADDRSTRLEN] = {0};
const uint8_t* ip = addr_.bytes_.data();
int pos = 0;
for (int k = 0; k < 4; ++k) {
uint8_t num = ip[k];
if (num >= 200) {
buf[pos++] = '2';
num -= 200;
} else if (num >= 100) {
buf[pos++] = '1';
num -= 100;
}
// num < 100
if (ip[k] >= 10) {
buf[pos++] = '0' + num / 10;
buf[pos++] = '0' + num % 10;
} else {
buf[pos++] = '0' + num;
}
buf[pos++] = '.';
}
buf[pos-1] = '\0';
string ipAddr(buf);
return std::move(ipAddr);
return detail::fastIpv4ToString(addr_.inAddr_);
}
// public
......
......@@ -319,14 +319,7 @@ string IPAddressV6::str() const {
// public
string IPAddressV6::toFullyQualified() const {
auto asHex = detail::Bytes::toHex(bytes(), 16);
uint8_t chunks = asHex.size() / 4;
for (int chunk = 1; chunk < chunks; chunk++) {
// position changes as new characters are inserted
int pos = (chunk*4) + (chunk - 1);
asHex.insert(pos, ":");
}
return asHex;
return detail::fastIpv6ToString(addr_.in6Addr_);
}
// public
......
......@@ -183,4 +183,113 @@ struct Bytes : private boost::noncopyable {
~Bytes() = delete;
};
//
// Write a maximum amount of base-converted character digits, of a
// given base, from an unsigned integral type into a byte buffer of
// sufficient size.
//
// This function does not append null terminators.
//
// Output buffer size must be guaranteed by caller (indirectly
// controlled by DigitCount template parameter).
//
// Having these parameters at compile time allows compiler to
// precompute several of the values, use smaller instructions, and
// better optimize surrounding code.
//
// IntegralType:
// - Something like uint8_t, uint16_t, etc
//
// DigitCount is the maximum number of digits to be printed
// - This is tied to IntegralType and Base. For example:
// - uint8_t in base 10 will print at most 3 digits ("255")
// - uint16_t in base 16 will print at most 4 hex digits ("FFFF")
//
// Base is the desired output base of the string
// - Base 10 will print [0-9], base 16 will print [0-9a-f]
//
// PrintAllDigits:
// - Whether or not leading zeros should be printed
//
template<class IntegralType,
IntegralType DigitCount,
IntegralType Base = 10,
bool PrintAllDigits = false,
class = typename std::enable_if<
std::is_integral<IntegralType>::value &&
std::is_unsigned<IntegralType>::value,
bool>::type>
inline void writeIntegerString(
IntegralType val,
char** buffer) {
char* buf = *buffer;
if (!PrintAllDigits && val == 0) {
*(buf++) = '0';
*buffer = buf;
return;
}
IntegralType powerToPrint = 1;
for (int i = 1; i < DigitCount; ++i) {
powerToPrint *= Base;
}
bool found = PrintAllDigits;
while (powerToPrint) {
if (found || powerToPrint <= val) {
IntegralType value = val/powerToPrint;
if (Base == 10 || value < 10) {
value += '0';
} else {
value += ('a'-10);
}
*(buf++) = value;
val %= powerToPrint;
found = true;
}
powerToPrint /= Base;
}
*buffer = buf;
}
inline std::string fastIpv4ToString(
const in_addr& inAddr) {
const uint8_t* octets = reinterpret_cast<const uint8_t*>(&inAddr.s_addr);
char str[sizeof("255.255.255.255")];
char* buf = str;
writeIntegerString<uint8_t, 3>(octets[0], &buf);
*(buf++) = '.';
writeIntegerString<uint8_t, 3>(octets[1], &buf);
*(buf++) = '.';
writeIntegerString<uint8_t, 3>(octets[2], &buf);
*(buf++) = '.';
writeIntegerString<uint8_t, 3>(octets[3], &buf);
return std::string(str, buf-str);
}
inline std::string fastIpv6ToString(const in6_addr& in6Addr) {
const uint16_t* bytes = reinterpret_cast<const uint16_t*>(&in6Addr.s6_addr16);
char str[sizeof("2001:0db8:0000:0000:0000:ff00:0042:8329")];
char* buf = str;
for (int i = 0; i < 8; ++i) {
writeIntegerString<uint16_t,
4, // at most 4 hex digits per ushort
16, // base 16 (hex)
true>(htons(bytes[i]), &buf);
if(i != 7) {
*(buf++) = ':';
}
}
return std::string(str, buf-str);
}
}} // folly::detail
/*
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <folly/IPAddress.h>
#include <glog/logging.h>
#include <folly/Benchmark.h>
using namespace folly;
using std::string;
BENCHMARK(ipv4_to_string_inet_ntop, iters) {
folly::IPAddressV4 ipv4Addr("127.0.0.1");
in_addr ip = ipv4Addr.toAddr();
char outputString[INET_ADDRSTRLEN] = {0};
while (iters--) {
const char* val = inet_ntop(
AF_INET,
&ip,
outputString,
sizeof(outputString));
}
}
BENCHMARK_RELATIVE(ipv4_to_fully_qualified, iters) {
IPAddressV4 ip("127.0.0.1");
while (iters--) {
string outputString = ip.toFullyQualified();
}
}
BENCHMARK_DRAW_LINE()
BENCHMARK(ipv6_to_string_inet_ntop, iters) {
IPAddressV6 ipv6Addr("F1E0:0ACE:FB94:7ADF:22E8:6DE6:9672:3725");
in6_addr ip = ipv6Addr.toAddr();
char outputString[INET6_ADDRSTRLEN] = {0};
bool checkResult = (iters == 1);
while (iters--) {
const char* val = inet_ntop(
AF_INET6,
&ip,
outputString,
sizeof(outputString));
}
}
BENCHMARK_RELATIVE(ipv6_to_fully_qualified, iters) {
IPAddressV6 ip("F1E0:0ACE:FB94:7ADF:22E8:6DE6:9672:3725");
string outputString;
while (iters--) {
outputString = ip.toFullyQualified();
}
}
// Benchmark results on Intel Xeon CPU E5-2660 @ 2.20GHz
// ============================================================================
// folly/test/IPAddressBenchmark.cpp relative time/iter iters/s
// ============================================================================
// ipv4_to_string_inet_ntop 237.87ns 4.20M
// ipv4_to_fully_qualified 362.31% 65.65ns 15.23M
// ----------------------------------------------------------------------------
// ipv6_to_string_inet_ntop 768.60ns 1.30M
// ipv6_to_fully_qualified 821.81% 93.53ns 10.69M
// ============================================================================
int main(int argc, char *argv[]) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
runBenchmarks();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment