Commit de2cc5ee authored by Peter Griess's avatar Peter Griess Committed by Owen Yamauchi

Add symbol name resolution and value retrieval

Summary:
- Add ElfFile::getSymbolByName(), which finds a Symbol object
corresponding to the symbol w/ the given name
- Add ElfFile::getSymbolValue(), which resolves the Symbol object to a
value in the mapped file, following pointers if necessary

Test Plan: - Unit tests

Reviewed By: simpkins@fb.com

FB internal diff: D740183
parent d7050ad5
......@@ -59,6 +59,34 @@ const char* ElfFile::iterateStrings(const ElfW(Shdr)& stringTable, Fn fn)
return ptr != end ? ptr : nullptr;
}
template <class Fn>
const ElfW(Sym)* ElfFile::iterateSymbols(const ElfW(Shdr)& section, Fn fn)
const {
enforce(section.sh_entsize == sizeof(ElfW(Sym)),
"invalid entry size in symbol table");
const ElfW(Sym)* sym = &at<ElfW(Sym)>(section.sh_offset);
const ElfW(Sym)* end = sym + (section.sh_size / section.sh_entsize);
while (sym < end) {
if (fn(*sym)) {
return sym;
}
++sym;
}
return nullptr;
}
template <class Fn>
const ElfW(Sym)* ElfFile::iterateSymbolsWithType(const ElfW(Shdr)& section,
uint32_t type, Fn fn) const {
// N.B. st_info has the same representation on 32- and 64-bit platforms
return iterateSymbols(section, [&](const ElfW(Sym)& sym) -> bool {
return ELF32_ST_TYPE(sym.st_info) == type && fn(sym);
});
}
} // namespace symbolizer
} // namespace folly
......
......@@ -234,42 +234,76 @@ const ElfW(Shdr)* ElfFile::getSectionByName(const char* name) const {
ElfFile::Symbol ElfFile::getDefinitionByAddress(uintptr_t address) const {
Symbol foundSymbol {nullptr, nullptr};
auto find = [&] (const ElfW(Shdr)& section) {
enforce(section.sh_entsize == sizeof(ElfW(Sym)),
"invalid entry size in symbol table");
const ElfW(Sym)* sym = &at<ElfW(Sym)>(section.sh_offset);
const ElfW(Sym)* end = &at<ElfW(Sym)>(section.sh_offset + section.sh_size);
for (; sym != end; ++sym) {
// st_info has the same representation on 32- and 64-bit platforms
auto type = ELF32_ST_TYPE(sym->st_info);
// TODO(tudorb): Handle STT_TLS, but then we'd have to understand
// thread-local relocations. If all we're looking up is functions
// (instruction pointers), it doesn't matter, though.
if (type != STT_OBJECT && type != STT_FUNC) {
continue;
auto findSection = [&](const ElfW(Shdr)& section) {
auto findSymbols = [&](const ElfW(Sym)& sym) {
if (sym.st_shndx == SHN_UNDEF) {
return false; // not a definition
}
if (sym->st_shndx == SHN_UNDEF) {
continue; // not a definition
}
if (address >= sym->st_value && address < sym->st_value + sym->st_size) {
if (address >= sym.st_value && address < sym.st_value + sym.st_size) {
foundSymbol.first = &section;
foundSymbol.second = sym;
foundSymbol.second = &sym;
return true;
}
return false;
};
return iterateSymbolsWithType(section, STT_OBJECT, findSymbols) ||
iterateSymbolsWithType(section, STT_FUNC, findSymbols);
};
// Try the .dynsym section first if it exists, it's smaller.
(iterateSectionsWithType(SHT_DYNSYM, findSection) ||
iterateSectionsWithType(SHT_SYMTAB, findSection));
return foundSymbol;
}
ElfFile::Symbol ElfFile::getSymbolByName(const char* name) const {
Symbol foundSymbol{nullptr, nullptr};
auto findSection = [&](const ElfW(Shdr)& section) -> bool {
// This section has no string table associated w/ its symbols; hence we
// can't get names for them
if (section.sh_link == SHN_UNDEF) {
return false;
}
return false;
auto findSymbols = [&](const ElfW(Sym)& sym) -> bool {
if (sym.st_shndx == SHN_UNDEF) {
return false; // not a definition
}
if (sym.st_name == 0) {
return false; // no name for this symbol
}
const char* sym_name = getString(
*getSectionByIndex(section.sh_link), sym.st_name);
if (strcmp(sym_name, name) == 0) {
foundSymbol.first = &section;
foundSymbol.second = &sym;
return true;
}
return false;
};
return iterateSymbolsWithType(section, STT_OBJECT, findSymbols) ||
iterateSymbolsWithType(section, STT_FUNC, findSymbols);
};
// Try the .dynsym section first if it exists, it's smaller.
(iterateSectionsWithType(SHT_DYNSYM, find) ||
iterateSectionsWithType(SHT_SYMTAB, find));
iterateSectionsWithType(SHT_DYNSYM, findSection) ||
iterateSectionsWithType(SHT_SYMTAB, findSection);
return foundSymbol;
}
const ElfW(Shdr)* ElfFile::getSectionContainingAddress(ElfW(Addr) addr) const {
return iterateSections([&](const ElfW(Shdr)& sh) -> bool {
return (addr >= sh.sh_addr) && (addr < (sh.sh_addr + sh.sh_size));
});
}
const char* ElfFile::getSymbolName(Symbol symbol) const {
if (!symbol.first || !symbol.second) {
return nullptr;
......
......@@ -33,6 +33,13 @@
namespace folly {
namespace symbolizer {
template <class... Args>
inline void enforce(bool v, Args... args) {
if (UNLIKELY(!v)) {
throw std::runtime_error(folly::to<std::string>(args...));
}
}
/**
* ELF file parser.
*
......@@ -101,6 +108,18 @@ class ElfFile {
template <class Fn>
const ElfW(Shdr)* iterateSectionsWithType(uint32_t type, Fn fn) const;
/**
* Iterate over all symbols witin a given section.
*
* Returns a pointer to the current ("found") symbol when fn returned true,
* or nullptr if fn returned false for all symbols.
*/
template <class Fn>
const ElfW(Sym)* iterateSymbols(const ElfW(Shdr)& section, Fn fn) const;
template <class Fn>
const ElfW(Sym)* iterateSymbolsWithType(const ElfW(Shdr)& section,
uint32_t type, Fn fn) const;
/**
* Find symbol definition by address.
* Note that this is the file virtual address, so you need to undo
......@@ -109,11 +128,52 @@ class ElfFile {
typedef std::pair<const ElfW(Shdr)*, const ElfW(Sym)*> Symbol;
Symbol getDefinitionByAddress(uintptr_t address) const;
/**
* Find symbol definition by name.
*
* If a symbol with this name cannot be found, a <nullptr, nullptr> Symbol
* will be returned. This is O(N) in the number of symbols in the file.
*/
Symbol getSymbolByName(const char* name) const;
/**
* Get the value of a symbol.
*/
template <class T>
const T& getSymbolValue(const ElfW(Sym)* symbol) const {
const ElfW(Shdr)* section = getSectionByIndex(symbol->st_shndx);
enforce(section, "Symbol's section index is invalid");
return valueAt<T>(*section, symbol->st_value);
}
/**
* Get the value of the object stored at the given address.
*
* This is the function that you want to use in conjunction with
* getSymbolValue() to follow pointers. For example, to get the value of
* a char* symbol, you'd do something like this:
*
* auto sym = getSymbolByName("someGlobalValue");
* auto addr = getSymbolValue<ElfW(Addr)>(sym.second);
* const char* str = &getSymbolValue<const char>(addr);
*/
template <class T>
const T& getAddressValue(const ElfW(Addr) addr) const {
const ElfW(Shdr)* section = getSectionContainingAddress(addr);
enforce(section, "Address does not refer to existing section");
return valueAt<T>(*section, addr);
}
/**
* Retrieve symbol name.
*/
const char* getSymbolName(Symbol symbol) const;
/** Find the section containing the given address */
const ElfW(Shdr)* getSectionContainingAddress(ElfW(Addr) addr) const;
private:
void init();
void destroy();
......@@ -124,10 +184,33 @@ class ElfFile {
template <class T>
const typename std::enable_if<std::is_pod<T>::value, T>::type&
at(off_t offset) const {
at(ElfW(Off) offset) const {
enforce(offset + sizeof(T) <= length_,
"Offset is not contained within our mmapped file");
return *reinterpret_cast<T*>(file_ + offset);
}
template <class T>
const T& valueAt(const ElfW(Shdr)& section, const ElfW(Addr) addr) const {
// For exectuables and shared objects, st_value holds a virtual address
// that refers to the memory owned by sections. Since we didn't map the
// sections into the addresses that they're expecting (sh_addr), but
// instead just mmapped the entire file directly, we need to translate
// between addresses and offsets into the file.
//
// TODO: For other file types, st_value holds a file offset directly. Since
// I don't have a use-case for that right now, just assert that
// nobody wants this. We can always add it later.
enforce(elfHeader().e_type == ET_EXEC || elfHeader().e_type == ET_DYN,
"Only exectuables and shared objects are supported");
enforce(addr >= section.sh_addr &&
(addr + sizeof(T)) <= (section.sh_addr + section.sh_size),
"Address is not contained within the provided segment");
return at<T>(section.sh_offset + (addr - section.sh_addr));
}
int fd_;
char* file_; // mmap() location
size_t length_; // mmap() length
......@@ -135,13 +218,6 @@ class ElfFile {
uintptr_t baseAddress_;
};
template <class... Args>
inline void enforce(bool v, Args... args) {
if (UNLIKELY(!v)) {
throw std::runtime_error(folly::to<std::string>(args...));
}
}
} // namespace symbolizer
} // namespace folly
......
/*
* Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include "folly/experimental/symbolizer/Elf.h"
using folly::symbolizer::ElfFile;
// Add some symbols for testing. Note that we have to be careful with type
// signatures here to prevent name mangling
uint64_t kIntegerValue = 1234567890UL;
const char* kStringValue = "coconuts";
class ElfTest : public ::testing::Test {
public:
// Path to the test binary itself; set by main()
static std::string binaryPath;
ElfTest() : elfFile_(binaryPath.c_str()) {
}
virtual ~ElfTest() {
}
protected:
ElfFile elfFile_;
};
std::string ElfTest::binaryPath;
TEST_F(ElfTest, IntegerValue) {
auto sym = elfFile_.getSymbolByName("kIntegerValue");
EXPECT_NE(nullptr, sym.first) <<
"Failed to look up symbol kIntegerValue";
EXPECT_EQ(kIntegerValue, elfFile_.getSymbolValue<uint64_t>(sym.second));
}
TEST_F(ElfTest, PointerValue) {
auto sym = elfFile_.getSymbolByName("kStringValue");
EXPECT_NE(nullptr, sym.first) <<
"Failed to look up symbol kStringValue";
ElfW(Addr) addr = elfFile_.getSymbolValue<ElfW(Addr)>(sym.second);
const char *str = &elfFile_.getAddressValue<const char>(addr);
EXPECT_STREQ(kStringValue, str);
}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
google::ParseCommandLineFlags(&argc, &argv, true);
ElfTest::binaryPath = argv[0];
return RUN_ALL_TESTS();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment