Commit 60be5ec6 authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

extract locality info from /proc/cpuinfo instead of sysfs

Summary:
Cache locality information under /sys is dispersed across a
very large number of files. This is a problem for short-lived processes
due to direct overheads and lock contention in the kernel. This diff
switches to a heuristic strategy that infers the interference pattern from
/proc/cpuinfo instead of computing it exactly. This doesn't necessarily
produce exactly the correct cache hierarchy info, but it yields the
correct topological sort for machines that have only core-local and
socket-local cache locality.

Differential Revision: D16459331

fbshipit-source-id: a322c126d1a4775d015bfb81451dbc6ad6fcc0fd
parent 2af6f0c7
...@@ -32,11 +32,11 @@ namespace folly { ...@@ -32,11 +32,11 @@ namespace folly {
///////////// CacheLocality ///////////// CacheLocality
/// Returns the best real CacheLocality information available /// Returns the CacheLocality information best for this machine
static CacheLocality getSystemLocalityInfo() { static CacheLocality getSystemLocalityInfo() {
if (kIsLinux) { if (kIsLinux) {
try { try {
return CacheLocality::readFromSysfs(); return CacheLocality::readFromProcCpuinfo();
} catch (...) { } catch (...) {
// keep trying // keep trying
} }
...@@ -187,6 +187,90 @@ CacheLocality CacheLocality::readFromSysfs() { ...@@ -187,6 +187,90 @@ CacheLocality CacheLocality::readFromSysfs() {
}); });
} }
static bool procCpuinfoLineRelevant(std::string const& line) {
return line.size() > 4 && (line[0] == 'p' || line[0] == 'c');
}
CacheLocality CacheLocality::readFromProcCpuinfoLines(
std::vector<std::string> const& lines) {
size_t physicalId = 0;
size_t coreId = 0;
std::vector<std::tuple<size_t, size_t, size_t>> cpus;
for (auto iter = lines.rbegin(); iter != lines.rend(); ++iter) {
auto& line = *iter;
if (!procCpuinfoLineRelevant(line)) {
continue;
}
auto sepIndex = line.find(':');
if (sepIndex == std::string::npos || sepIndex + 2 > line.size()) {
continue;
}
auto arg = line.substr(sepIndex + 2);
// "physical id" is socket, which is the most important locality
// context. "core id" is a real core, so two "processor" entries with
// the same physical id and core id are hyperthreads of each other.
// "processor" is the top line of each record, so when we hit it in
// the reverse order then we can emit a record.
if (line.find("physical id") == 0) {
physicalId = parseLeadingNumber(arg);
} else if (line.find("core id") == 0) {
coreId = parseLeadingNumber(arg);
} else if (line.find("processor") == 0) {
auto cpu = parseLeadingNumber(arg);
cpus.emplace_back(physicalId, coreId, cpu);
}
}
if (cpus.empty()) {
throw std::runtime_error("no CPUs parsed from /proc/cpuinfo");
}
std::sort(cpus.begin(), cpus.end());
size_t cpusPerCore = 1;
while (cpusPerCore < cpus.size() &&
std::get<0>(cpus[cpusPerCore]) == std::get<0>(cpus[0]) &&
std::get<1>(cpus[cpusPerCore]) == std::get<1>(cpus[0])) {
++cpusPerCore;
}
// we can't tell the real cache hierarchy from /proc/cpuinfo, but it
// works well enough to assume there are 3 levels, L1 and L2 per-core
// and L3 per socket
std::vector<size_t> numCachesByLevel;
numCachesByLevel.push_back(cpus.size() / cpusPerCore);
numCachesByLevel.push_back(cpus.size() / cpusPerCore);
numCachesByLevel.push_back(std::get<0>(cpus.back()) + 1);
std::vector<size_t> indexes(cpus.size());
for (size_t i = 0; i < cpus.size(); ++i) {
indexes[std::get<2>(cpus[i])] = i;
}
return CacheLocality{
cpus.size(), std::move(numCachesByLevel), std::move(indexes)};
}
CacheLocality CacheLocality::readFromProcCpuinfo() {
std::vector<std::string> lines;
{
std::ifstream xi("/proc/cpuinfo");
if (xi.fail()) {
throw std::runtime_error("unable to open /proc/cpuinfo");
}
char buf[8192];
while (xi.good() && lines.size() < 20000) {
xi.getline(buf, sizeof(buf));
std::string str(buf);
if (procCpuinfoLineRelevant(str)) {
lines.emplace_back(std::move(str));
}
}
}
return readFromProcCpuinfoLines(lines);
}
CacheLocality CacheLocality::uniform(size_t numCpus) { CacheLocality CacheLocality::uniform(size_t numCpus) {
CacheLocality rv; CacheLocality rv;
......
...@@ -111,6 +111,18 @@ struct CacheLocality { ...@@ -111,6 +111,18 @@ struct CacheLocality {
/// Throws an exception if no cache information can be loaded. /// Throws an exception if no cache information can be loaded.
static CacheLocality readFromSysfs(); static CacheLocality readFromSysfs();
/// readFromProcCpuinfo(), except input is taken from memory rather
/// than the file system.
static CacheLocality readFromProcCpuinfoLines(
std::vector<std::string> const& lines);
/// Returns an estimate of the CacheLocality information by reading
/// /proc/cpuinfo. This isn't as accurate as readFromSysfs(), but
/// is a lot faster because the info isn't scattered across
/// hundreds of files. Throws an exception if no cache information
/// can be loaded.
static CacheLocality readFromProcCpuinfo();
/// Returns a usable (but probably not reflective of reality) /// Returns a usable (but probably not reflective of reality)
/// CacheLocality structure with the specified number of cpus and a /// CacheLocality structure with the specified number of cpus and a
/// single cache level that associates one cpu per cache. /// single cache level that associates one cpu per cache.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment