Commit 60be5ec6 authored by Nathan Bronson's avatar Nathan Bronson Committed by Facebook Github Bot

extract locality info from /proc/cpuinfo instead of sysfs

Summary:
Cache locality information under /sys is dispersed across a
very large number of files. This is a problem for short-lived processes
due to direct overheads and lock contention in the kernel. This diff
switches to a heuristic strategy that infers the interference pattern from
/proc/cpuinfo instead of computing it exactly. This doesn't necessarily
produce exactly the correct cache hierarchy info, but it yields the
correct topological sort for machines that have only core-local and
socket-local cache locality.

Differential Revision: D16459331

fbshipit-source-id: a322c126d1a4775d015bfb81451dbc6ad6fcc0fd
parent 2af6f0c7
......@@ -32,11 +32,11 @@ namespace folly {
///////////// CacheLocality
/// Returns the best real CacheLocality information available
/// Returns the CacheLocality information best for this machine
static CacheLocality getSystemLocalityInfo() {
if (kIsLinux) {
try {
return CacheLocality::readFromSysfs();
return CacheLocality::readFromProcCpuinfo();
} catch (...) {
// keep trying
}
......@@ -187,6 +187,90 @@ CacheLocality CacheLocality::readFromSysfs() {
});
}
static bool procCpuinfoLineRelevant(std::string const& line) {
return line.size() > 4 && (line[0] == 'p' || line[0] == 'c');
}
CacheLocality CacheLocality::readFromProcCpuinfoLines(
std::vector<std::string> const& lines) {
size_t physicalId = 0;
size_t coreId = 0;
std::vector<std::tuple<size_t, size_t, size_t>> cpus;
for (auto iter = lines.rbegin(); iter != lines.rend(); ++iter) {
auto& line = *iter;
if (!procCpuinfoLineRelevant(line)) {
continue;
}
auto sepIndex = line.find(':');
if (sepIndex == std::string::npos || sepIndex + 2 > line.size()) {
continue;
}
auto arg = line.substr(sepIndex + 2);
// "physical id" is socket, which is the most important locality
// context. "core id" is a real core, so two "processor" entries with
// the same physical id and core id are hyperthreads of each other.
// "processor" is the top line of each record, so when we hit it in
// the reverse order then we can emit a record.
if (line.find("physical id") == 0) {
physicalId = parseLeadingNumber(arg);
} else if (line.find("core id") == 0) {
coreId = parseLeadingNumber(arg);
} else if (line.find("processor") == 0) {
auto cpu = parseLeadingNumber(arg);
cpus.emplace_back(physicalId, coreId, cpu);
}
}
if (cpus.empty()) {
throw std::runtime_error("no CPUs parsed from /proc/cpuinfo");
}
std::sort(cpus.begin(), cpus.end());
size_t cpusPerCore = 1;
while (cpusPerCore < cpus.size() &&
std::get<0>(cpus[cpusPerCore]) == std::get<0>(cpus[0]) &&
std::get<1>(cpus[cpusPerCore]) == std::get<1>(cpus[0])) {
++cpusPerCore;
}
// we can't tell the real cache hierarchy from /proc/cpuinfo, but it
// works well enough to assume there are 3 levels, L1 and L2 per-core
// and L3 per socket
std::vector<size_t> numCachesByLevel;
numCachesByLevel.push_back(cpus.size() / cpusPerCore);
numCachesByLevel.push_back(cpus.size() / cpusPerCore);
numCachesByLevel.push_back(std::get<0>(cpus.back()) + 1);
std::vector<size_t> indexes(cpus.size());
for (size_t i = 0; i < cpus.size(); ++i) {
indexes[std::get<2>(cpus[i])] = i;
}
return CacheLocality{
cpus.size(), std::move(numCachesByLevel), std::move(indexes)};
}
CacheLocality CacheLocality::readFromProcCpuinfo() {
std::vector<std::string> lines;
{
std::ifstream xi("/proc/cpuinfo");
if (xi.fail()) {
throw std::runtime_error("unable to open /proc/cpuinfo");
}
char buf[8192];
while (xi.good() && lines.size() < 20000) {
xi.getline(buf, sizeof(buf));
std::string str(buf);
if (procCpuinfoLineRelevant(str)) {
lines.emplace_back(std::move(str));
}
}
}
return readFromProcCpuinfoLines(lines);
}
CacheLocality CacheLocality::uniform(size_t numCpus) {
CacheLocality rv;
......
......@@ -111,6 +111,18 @@ struct CacheLocality {
/// Throws an exception if no cache information can be loaded.
static CacheLocality readFromSysfs();
/// readFromProcCpuinfo(), except input is taken from memory rather
/// than the file system.
static CacheLocality readFromProcCpuinfoLines(
std::vector<std::string> const& lines);
/// Returns an estimate of the CacheLocality information by reading
/// /proc/cpuinfo. This isn't as accurate as readFromSysfs(), but
/// is a lot faster because the info isn't scattered across
/// hundreds of files. Throws an exception if no cache information
/// can be loaded.
static CacheLocality readFromProcCpuinfo();
/// Returns a usable (but probably not reflective of reality)
/// CacheLocality structure with the specified number of cpus and a
/// single cache level that associates one cpu per cache.
......
......@@ -321,6 +321,684 @@ TEST(CacheLocality, FakeSysfs) {
EXPECT_EQ(expected.localityIndexByCpu, parsed.localityIndexByCpu);
}
static const std::vector<std::string> fakeProcCpuinfo = {
"processor : 0",
"vendor_id : GenuineIntel",
"cpu family : 6",
"model : 79",
"model name : Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz",
"stepping : 1",
"microcode : 0xb00001b",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"siblings : 28",
"core id : 0",
"cpu cores : 14",
"apicid : 0",
"initial apicid : 0",
"fpu : yes",
"fpu_exception : yes",
"cpuid level : 20",
"wp : yes",
"flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch epb cat_l3 cdp_l3 intel_ppin intel_pt tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm cqm rdt_a rdseed adx smap xsaveopt cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local dtherm ida arat pln pts",
"bugs :",
"bogomips : 4788.90",
"clflush size : 64",
"cache_alignment : 64",
"address sizes : 46 bits physical, 48 bits virtual",
"power management:",
"",
"processor : 1",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 1",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 2",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 2",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 3",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 3",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 4",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 4",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 5",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 5",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 6",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 6",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 7",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 8",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 8",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 9",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 9",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 10",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 10",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 11",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 11",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 12",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 12",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 13",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 13",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 14",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 14",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 0",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 15",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 1",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 16",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 2",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 17",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 3",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 18",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 4",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 19",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 5",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 20",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 6",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 21",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 8",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 22",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 9",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 23",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 10",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 24",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 11",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 25",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 12",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 26",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 13",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 27",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 14",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 28",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 0",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 29",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 1",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 30",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 2",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 31",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 3",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 32",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 4",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 33",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 5",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 34",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 6",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 35",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 8",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 36",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 9",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 37",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 10",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 38",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 11",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 39",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 12",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 40",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 13",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 41",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 0",
"core id : 14",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 42",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 0",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 43",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 1",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 44",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 2",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 45",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 3",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 46",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 4",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 47",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 5",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 48",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 6",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 49",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 8",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 50",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 9",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 51",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 10",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 52",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 11",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 53",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 12",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 54",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 13",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
"processor : 55",
"cpu family : 6",
"cpu MHz : 2401.000",
"cache size : 35840 KB",
"physical id : 1",
"core id : 14",
"cpu cores : 14",
"cpuid level : 20",
"clflush size : 64",
"cache_alignment : 64",
"power management:",
};
/// This is the expected CacheLocality structure for fakeProcCpuinfo
static const CacheLocality fakeProcCpuinfoLocality = {
56,
{28, 28, 2},
{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36,
38, 40, 42, 44, 46, 48, 50, 52, 54, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19,
21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55}};
TEST(CacheLocality, ProcCpu) {
auto parsed = CacheLocality::readFromProcCpuinfoLines(fakeProcCpuinfo);
auto& expected = fakeProcCpuinfoLocality;
EXPECT_EQ(expected.numCpus, parsed.numCpus);
EXPECT_EQ(expected.numCachesByLevel, parsed.numCachesByLevel);
EXPECT_EQ(expected.localityIndexByCpu, parsed.localityIndexByCpu);
}
TEST(CacheLocality, LinuxActual) {
if (!kIsLinux) {
return;
}
auto parsed1 = CacheLocality::readFromProcCpuinfo();
EXPECT_EQ(parsed1.numCpus, std::thread::hardware_concurrency());
auto parsed2 = CacheLocality::readFromSysfs();
EXPECT_EQ(parsed2.numCpus, std::thread::hardware_concurrency());
EXPECT_EQ(parsed1.localityIndexByCpu, parsed2.localityIndexByCpu);
}
TEST(CacheLocality, LogSystem) {
auto& sys = CacheLocality::system<>();
LOG(INFO) << "numCpus= " << sys.numCpus;
LOG(INFO) << "numCachesByLevel= ";
for (std::size_t i = 0; i < sys.numCachesByLevel.size(); ++i) {
LOG(INFO) << " [" << i << "]= " << sys.numCachesByLevel[i];
}
LOG(INFO) << "localityIndexByCpu= ";
for (std::size_t i = 0; i < sys.localityIndexByCpu.size(); ++i) {
LOG(INFO) << " [" << i << "]= " << sys.localityIndexByCpu[i];
}
}
#if FOLLY_HAVE_LINUX_VDSO
TEST(Getcpu, VdsoGetcpu) {
unsigned cpu;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment