Commit 8470cb6e authored by Jason Evans's avatar Jason Evans Committed by Sara Golemon

Convert from jemalloc's obsolete *allocm() to *allocx().

Summary:
Convert from jemalloc's obsolete *allocm() to *allocx().

Strengthen goodMallocSize() to always return a jemalloc size class, so
that xallocx() success/failure detection is simple.

@override-unit-failures

Test Plan: Folly, unicorn, and HHVM tests.

Reviewed By: andrei.alexandrescu@fb.com

Subscribers: trunkagent, hphp-diffs@, ps, chaoyc, search-fbcode-diffs@, unicorn-diffs@, ptc, njormrod

FB internal diff: D1535841

Tasks: 4996808
parent accce27e
......@@ -98,16 +98,16 @@ private:
// constructors
Impl() : Allocator(), b_(nullptr), e_(nullptr), z_(nullptr) {}
Impl(const Allocator& a)
/* implicit */ Impl(const Allocator& a)
: Allocator(a), b_(nullptr), e_(nullptr), z_(nullptr) {}
Impl(Allocator&& a)
/* implicit */ Impl(Allocator&& a)
: Allocator(std::move(a)), b_(nullptr), e_(nullptr), z_(nullptr) {}
Impl(size_type n, const Allocator& a = Allocator())
/* implicit */ Impl(size_type n, const Allocator& a = Allocator())
: Allocator(a)
{ init(n); }
Impl(Impl&& other)
Impl(Impl&& other) noexcept
: Allocator(std::move(other)),
b_(other.b_), e_(other.e_), z_(other.z_)
{ other.b_ = other.e_ = other.z_ = nullptr; }
......@@ -716,7 +716,7 @@ public:
fbvector(const fbvector& other, const Allocator& a)
: fbvector(other.begin(), other.end(), a) {}
fbvector(fbvector&& other, const Allocator& a) : impl_(a) {
/* may throw */ fbvector(fbvector&& other, const Allocator& a) : impl_(a) {
if (impl_ == other.impl_) {
impl_.swapData(other.impl_);
} else {
......@@ -978,10 +978,11 @@ public:
if (newCap >= oldCap) return;
void* p = impl_.b_;
if ((rallocm && usingStdAllocator::value) &&
// xallocx() will shrink to precisely newCapacityBytes (which was generated
// by goodMallocSize()) if it successfully shrinks in place.
if ((usingJEMalloc() && usingStdAllocator::value) &&
newCapacityBytes >= folly::jemallocMinInPlaceExpandable &&
rallocm(&p, nullptr, newCapacityBytes, 0, ALLOCM_NO_MOVE)
== ALLOCM_SUCCESS) {
xallocx(p, newCapacityBytes, 0, 0) == newCapacityBytes) {
impl_.z_ += newCap - oldCap;
} else {
T* newB; // intentionally uninitialized
......@@ -1007,7 +1008,7 @@ public:
private:
bool reserve_in_place(size_type n) {
if (!usingStdAllocator::value || !rallocm) return false;
if (!usingStdAllocator::value || !usingJEMalloc()) return false;
// jemalloc can never grow in place blocks smaller than 4096 bytes.
if ((impl_.z_ - impl_.b_) * sizeof(T) <
......@@ -1015,8 +1016,7 @@ private:
auto const newCapacityBytes = folly::goodMallocSize(n * sizeof(T));
void* p = impl_.b_;
if (rallocm(&p, nullptr, newCapacityBytes, 0, ALLOCM_NO_MOVE)
== ALLOCM_SUCCESS) {
if (xallocx(p, newCapacityBytes, 0, 0) == newCapacityBytes) {
impl_.z_ = impl_.b_ + newCapacityBytes / sizeof(T);
return true;
}
......@@ -1515,17 +1515,17 @@ void fbvector<T, Allocator>::emplace_back_aux(Args&&... args) {
size_type byte_sz = folly::goodMallocSize(
computePushBackCapacity() * sizeof(T));
if (usingStdAllocator::value
&& rallocm
&& usingJEMalloc()
&& ((impl_.z_ - impl_.b_) * sizeof(T) >=
folly::jemallocMinInPlaceExpandable)) {
// Try to reserve in place.
// Ask rallocm to allocate in place at least size()+1 and at most sz space.
// rallocm will allocate as much as possible within that range, which
// Ask xallocx to allocate in place at least size()+1 and at most sz space.
// xallocx will allocate as much as possible within that range, which
// is the best possible outcome: if sz space is available, take it all,
// otherwise take as much as possible. If nothing is available, then fail.
// In this fashion, we never relocate if there is a possibility of
// expanding in place, and we never relocate by less than the desired
// amount unless we cannot expand further. Hence we will not relocate
// expanding in place, and we never reallocate by less than the desired
// amount unless we cannot expand further. Hence we will not reallocate
// sub-optimally twice in a row (modulo the blocking memory being freed).
size_type lower = folly::goodMallocSize(sizeof(T) + size() * sizeof(T));
size_type upper = byte_sz;
......@@ -1534,8 +1534,7 @@ void fbvector<T, Allocator>::emplace_back_aux(Args&&... args) {
void* p = impl_.b_;
size_t actual;
if (rallocm(&p, &actual, lower, extra, ALLOCM_NO_MOVE)
== ALLOCM_SUCCESS) {
if ((actual = xallocx(p, lower, extra, 0)) >= lower) {
impl_.z_ = impl_.b_ + actual / sizeof(T);
M_construct(impl_.e_, std::forward<Args>(args)...);
++impl_.e_;
......
......@@ -27,9 +27,11 @@ namespace folly {
// with --enable-stats.
bool usingJEMallocSlow() {
// Some platforms (*cough* OSX *cough*) require weak symbol checks to be
// in the form if (mallctl != NULL). Not if (mallctl) or if (!mallctl) (!!).
// http://goo.gl/xpmctm
if (allocm == nullptr || rallocm == nullptr || mallctl == nullptr) {
// in the form if (mallctl != nullptr). Not if (mallctl) or if (!mallctl)
// (!!). http://goo.gl/xpmctm
if (mallocx == nullptr || rallocx == nullptr || xallocx == nullptr
|| sallocx == nullptr || dallocx == nullptr || nallocx == nullptr
|| mallctl == nullptr) {
return false;
}
......
......@@ -20,6 +20,18 @@
#ifndef FOLLY_MALLOC_H_
#define FOLLY_MALLOC_H_
/**
* Define various MALLOCX_* macros normally provided by jemalloc. We define
* them so that we don't have to include jemalloc.h, in case the program is
* built without jemalloc support.
*/
#ifndef MALLOCX_LG_ALIGN
#define MALLOCX_LG_ALIGN(la) (la)
#endif
#ifndef MALLOCX_ZERO
#define MALLOCX_ZERO (static_cast<int>(0x40))
#endif
// If using fbstring from libstdc++, then just define stub code
// here to typedef the fbstring type into the folly namespace.
// This provides backwards compatibility for code that explicitly
......@@ -46,13 +58,21 @@ namespace folly {
#pragma GCC system_header
/**
* Declare rallocm(), allocm(), and mallctl() as weak symbols. These will be
* provided by jemalloc if we are using jemalloc, or will be NULL if we are
* using another malloc implementation.
* Declare *allocx() and mallctl() as weak symbols. These will be provided by
* jemalloc if we are using jemalloc, or will be NULL if we are using another
* malloc implementation.
*/
extern "C" int rallocm(void**, size_t*, size_t, size_t, int)
extern "C" void* mallocx(size_t, int)
__attribute__((__weak__));
extern "C" void* rallocx(void*, size_t, int)
__attribute__((__weak__));
extern "C" size_t xallocx(void*, size_t, size_t, int)
__attribute__((__weak__));
extern "C" int allocm(void**, size_t*, size_t, int)
extern "C" size_t sallocx(const void*, int)
__attribute__((__weak__));
extern "C" void dallocx(void*, int)
__attribute__((__weak__));
extern "C" size_t nallocx(size_t, int)
__attribute__((__weak__));
extern "C" int mallctl(const char*, void*, size_t*, void*, size_t)
__attribute__((__weak__));
......@@ -60,10 +80,12 @@ __attribute__((__weak__));
#include <bits/functexcept.h>
#define FOLLY_HAVE_MALLOC_H 1
#else
#include <folly/detail/Malloc.h>
#include <folly/detail/Malloc.h> /* nolint */
#include <folly/Portability.h>
#endif
#include <folly/ScopeGuard.h>
// for malloc_usable_size
// NOTE: FreeBSD 9 doesn't have malloc.h. It's defitions
// are found in stdlib.h.
......@@ -80,29 +102,6 @@ __attribute__((__weak__));
#include <new>
/**
* Define various ALLOCM_* macros normally provided by jemalloc. We define
* them so that we don't have to include jemalloc.h, in case the program is
* built without jemalloc support.
*/
#ifndef ALLOCM_SUCCESS
#define ALLOCM_SUCCESS 0
#define ALLOCM_ERR_OOM 1
#define ALLOCM_ERR_NOT_MOVED 2
#define ALLOCM_ZERO 64
#define ALLOCM_NO_MOVE 128
#define ALLOCM_LG_ALIGN(la) (la)
#if defined(JEMALLOC_MANGLE) && defined(JEMALLOC_EXPERIMENTAL)
#define rallocm je_rallocm
#define allocm je_allocm
#endif
#endif /* ALLOCM_SUCCESS */
#ifdef _LIBSTDCXX_FBSTRING
namespace std _GLIBCXX_VISIBILITY(default) {
_GLIBCXX_BEGIN_NAMESPACE_VERSION
......@@ -116,11 +115,11 @@ bool usingJEMallocSlow();
* Determine if we are using jemalloc or not.
*/
inline bool usingJEMalloc() {
// Checking for rallocm != NULL is not sufficient; we may be in a dlopen()ed
// module that depends on libjemalloc, so rallocm is resolved, but the main
// Checking for rallocx != NULL is not sufficient; we may be in a dlopen()ed
// module that depends on libjemalloc, so rallocx is resolved, but the main
// program might be using a different memory allocator. Look at the
// implementation of usingJEMallocSlow() for the (hacky) details.
static bool result = usingJEMallocSlow();
static const bool result = usingJEMallocSlow();
return result;
}
......@@ -128,33 +127,37 @@ inline bool usingJEMalloc() {
* For jemalloc's size classes, see
* http://www.canonware.com/download/jemalloc/jemalloc-latest/doc/jemalloc.html
*/
inline size_t goodMallocSize(size_t minSize) {
inline size_t goodMallocSize(size_t minSize) noexcept {
if (!usingJEMalloc()) {
// Not using jemalloc - no smarts
return minSize;
}
size_t goodSize;
SCOPE_EXIT { assert(nallocx(goodSize, 0) == goodSize); };
if (minSize <= 64) {
// Choose smallest allocation to be 64 bytes - no tripping over
// cache line boundaries, and small string optimization takes care
// of short strings anyway.
return 64;
return goodSize = 64;
}
if (minSize <= 512) {
// Round up to the next multiple of 64; we don't want to trip over
// cache line boundaries.
return (minSize + 63) & ~size_t(63);
return goodSize = (minSize + 63) & ~size_t(63);
}
if (minSize <= 3840) {
// Round up to the next multiple of 256
return (minSize + 255) & ~size_t(255);
if (minSize <= 3584) {
// Round up to the next multiple of 256. For some size classes jemalloc
// will additionally round up to the nearest multiple of 512, hence the
// nallocx() call.
return goodSize = nallocx((minSize + 255) & ~size_t(255), 0);
}
if (minSize <= 4072 * 1024) {
// Round up to the next multiple of 4KB
return (minSize + 4095) & ~size_t(4095);
return goodSize = (minSize + 4095) & ~size_t(4095);
}
// Holy Moly
// Round up to the next multiple of 4MB
return (minSize + 4194303) & ~size_t(4194303);
return goodSize = (minSize + 4194303) & ~size_t(4194303);
}
// We always request "good" sizes for allocation, so jemalloc can
......@@ -208,8 +211,14 @@ inline void* smartRealloc(void* p,
if (usingJEMalloc()) {
// using jemalloc's API. Don't forget that jemalloc can never grow
// in place blocks smaller than 4096 bytes.
//
// NB: newCapacity may not be precisely equal to a jemalloc size class,
// i.e. newCapacity is not guaranteed to be the result of a
// goodMallocSize() call, therefore xallocx() may return more than
// newCapacity bytes of space. Use >= rather than == to check whether
// xallocx() successfully expanded in place.
if (currentCapacity >= jemallocMinInPlaceExpandable &&
rallocm(&p, nullptr, newCapacity, 0, ALLOCM_NO_MOVE) == ALLOCM_SUCCESS) {
xallocx(p, newCapacity, 0, 0) >= newCapacity) {
// Managed to expand in place
return p;
}
......
......@@ -78,7 +78,7 @@ AC_CHECK_LIB([double-conversion],[ceil],[],[AC_MSG_ERROR(
AC_CHECK_LIB([event], [event_set], [], [AC_MSG_ERROR([Unable to find libevent])])
AC_CHECK_LIB([jemalloc], [rallocm])
AC_CHECK_LIB([jemalloc], [xallocx])
# Checks for typedefs, structures, and compiler characteristics.
AC_HEADER_STDBOOL
......@@ -269,7 +269,6 @@ AC_CHECK_FUNCS([getdelim \
pow \
strerror \
pthread_yield \
rallocm \
malloc_size \
malloc_usable_size \
memrchr \
......
......@@ -24,12 +24,20 @@
extern "C" {
#if FOLLY_HAVE_WEAK_SYMBOLS
int rallocm(void**, size_t*, size_t, size_t, int) __attribute__((__weak__));
int allocm(void**, size_t*, size_t, int) __attribute__((__weak__));
void* mallocx(size_t, int) __attribute__((__weak__));
void* rallocx(void*, size_t, int) __attribute__((__weak__));
size_t xallocx(void*, size_t, size_t, int) __attribute__((__weak__));
size_t sallocx(const void*, int) __attribute__((__weak__));
void dallocx(void*, int) __attribute__((__weak__));
size_t nallocx(size_t, int) __attribute__((__weak__));
int mallctl(const char*, void*, size_t*, void*, size_t) __attribute__((__weak__));
#else
extern int (*rallocm)(void**, size_t*, size_t, size_t, int);
extern int (*allocm)(void**, size_t*, size_t, int);
extern void* (*mallocx)(size_t, int);
extern void* (*rallocx)(void*, size_t, int);
extern size_t (*xallocx)(void*, size_t, size_t, int);
extern size_t (*sallocx)(const void*, int);
extern void (*dallocx)(void*, int);
extern size_t (*nallocx)(size_t, int);
extern int (*mallctl)(const char*, void*, size_t*, void*, size_t);
#endif
......
......@@ -19,8 +19,12 @@
extern "C" {
#if !FOLLY_HAVE_WEAK_SYMBOLS
int (*rallocm)(void**, size_t*, size_t, size_t, int) = nullptr;
int (*allocm)(void**, size_t*, size_t, int) = nullptr;
void* (*mallocx)(size_t, int) = nullptr;
void* (*rallocx)(void*, size_t, int) = nullptr;
size_t (*xallocx)(void*, size_t, size_t, int) = nullptr;
size_t (*sallocx)(const void*, int) = nullptr;
void (*dallocx)(void*, int) = nullptr;
size_t (*nallocx)(size_t, int) = nullptr;
int (*mallctl)(const char*, void*, size_t*, void*, size_t) = nullptr;
#endif
......
......@@ -336,40 +336,30 @@ struct StaticMeta {
// under the lock.
if (usingJEMalloc()) {
bool success = false;
size_t newByteSize = newCapacity * sizeof(ElementWrapper);
size_t realByteSize = 0;
size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0);
// Try to grow in place.
//
// Note that rallocm(ALLOCM_ZERO) will only zero newly allocated memory,
// Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
// even if a previous allocation allocated more than we requested.
// This is fine; we always use ALLOCM_ZERO with jemalloc and we
// This is fine; we always use MALLOCX_ZERO with jemalloc and we
// always expand our allocation to the real size.
if (prevCapacity * sizeof(ElementWrapper) >=
jemallocMinInPlaceExpandable) {
success = (rallocm(reinterpret_cast<void**>(&threadEntry->elements),
&realByteSize,
newByteSize,
0,
ALLOCM_NO_MOVE | ALLOCM_ZERO) == ALLOCM_SUCCESS);
success = (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO)
== newByteSize);
}
// In-place growth failed.
if (!success) {
// Note that, unlike calloc,allocm(... ALLOCM_ZERO) zeros all
// allocated bytes (*realByteSize) and not just the requested
// bytes (newByteSize)
success = (allocm(reinterpret_cast<void**>(&reallocated),
&realByteSize,
newByteSize,
ALLOCM_ZERO) == ALLOCM_SUCCESS);
success = ((reallocated = static_cast<ElementWrapper*>(
mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
}
if (success) {
// Expand to real size
assert(realByteSize / sizeof(ElementWrapper) >= newCapacity);
newCapacity = realByteSize / sizeof(ElementWrapper);
assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
newCapacity = newByteSize / sizeof(ElementWrapper);
} else {
throw std::bad_alloc();
}
......
......@@ -707,14 +707,14 @@ void IOBuf::reserveSlow(uint64_t minHeadroom, uint64_t minTailroom) {
uint64_t oldHeadroom = headroom();
// If we have a buffer allocated with malloc and we just need more tailroom,
// try to use realloc()/rallocm() to grow the buffer in place.
// try to use realloc()/xallocx() to grow the buffer in place.
SharedInfo* info = sharedInfo();
if (info && (info->freeFn == nullptr) && length_ != 0 &&
oldHeadroom >= minHeadroom) {
if (usingJEMalloc()) {
size_t headSlack = oldHeadroom - minHeadroom;
// We assume that tailroom is more useful and more important than
// headroom (not least because realloc / rallocm allow us to grow the
// headroom (not least because realloc / xallocx allow us to grow the
// buffer at the tail, but not at the head) So, if we have more headroom
// than we need, we consider that "wasted". We arbitrarily define "too
// much" headroom to be 25% of the capacity.
......@@ -722,23 +722,12 @@ void IOBuf::reserveSlow(uint64_t minHeadroom, uint64_t minTailroom) {
size_t allocatedCapacity = capacity() + sizeof(SharedInfo);
void* p = buf_;
if (allocatedCapacity >= jemallocMinInPlaceExpandable) {
// rallocm can write to its 2nd arg even if it returns
// ALLOCM_ERR_NOT_MOVED. So, we pass a temporary to its 2nd arg and
// update newAllocatedCapacity only on success.
size_t allocatedSize;
int r = rallocm(&p, &allocatedSize, newAllocatedCapacity,
0, ALLOCM_NO_MOVE);
if (r == ALLOCM_SUCCESS) {
if (xallocx(p, newAllocatedCapacity, 0, 0) == newAllocatedCapacity) {
newBuffer = static_cast<uint8_t*>(p);
newHeadroom = oldHeadroom;
newAllocatedCapacity = allocatedSize;
} else if (r == ALLOCM_ERR_OOM) {
// shouldn't happen as we don't actually allocate new memory
// (due to ALLOCM_NO_MOVE)
throw std::bad_alloc();
newAllocatedCapacity = newAllocatedCapacity;
}
// if ALLOCM_ERR_NOT_MOVED, do nothing, fall back to
// malloc/memcpy/free
// if xallocx failed, do nothing, fall back to malloc/memcpy/free
}
}
} else { // Not using jemalloc
......
......@@ -173,7 +173,7 @@ class TypedIOBuf {
void push(IT begin, IT end) {
uint32_t n = std::distance(begin, end);
if (usingJEMalloc()) {
// Rely on rallocm() and avoid exponential growth to limit
// Rely on xallocx() and avoid exponential growth to limit
// amount of memory wasted.
reserve(headroom(), n);
} else if (tailroom() < n) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment