Commit 94d0da77 authored by Kenny Yu's avatar Kenny Yu Committed by Facebook GitHub Bot

walk async stack traces correctly

Summary:
This correctly walks the async stack traces. Previously, we were not following the async stack root
from the last async stack frame.

High level stack walking strategy is as follows:
1) Start walking the normal stack up to the first normal stack frame holding the first async stack root
2) Walk the async stack frame chain from the provided async stack root
3) When we reach the end of the current async stack frame chain, check if the last async stack frame references an async stack root R1.
   If there is one, use R1 to find the start of the next normal stack frame when walking the normal stack frame.
4) If that async stack root R1 has a next async stack root R2, the normal stack frame walk should end at the normal stack frame
   holding the next async stack root R2. Otherwise the normal stack frame walk should continue all the way until it hits nullptr.
5) The next async stack walk should begin at the top async frame referenced by the next async stack root R2.
6) Repeat until we've reached the end of both the normal and async stack frame chains.

Reviewed By: andriigrynenko

Differential Revision: D28102128

fbshipit-source-id: c412a2a253720867c257d5cbaa9a6b22e96154f2
parent 29ba83e5
......@@ -241,10 +241,105 @@ def get_async_stack_addrs_from_initial_frame(
return addrs
def walk_normal_stack(
normal_stack_frame_addr: gdb.Value,
normal_stack_frame_stop_addr: gdb.Value,
) -> List[gdb.Value]:
"""
Returns the list of return addresses in the normal stack.
Does not include stop_addr
"""
addrs: List[gdb.Value] = []
while int(normal_stack_frame_addr) != 0:
normal_stack_frame = StackFrame.from_addr(normal_stack_frame_addr)
if (
int(normal_stack_frame_stop_addr) != 0
and normal_stack_frame.stack_frame == normal_stack_frame_stop_addr
):
# Reached end of normal stack, transition to the async stack
# Do not include the return address in the stack trace that points
# to the frame that registered the AsyncStackRoot.
break
addrs.append(normal_stack_frame.return_address)
normal_stack_frame_addr = normal_stack_frame.stack_frame
return addrs
@dataclass
class WalkAsyncStackResult:
addrs: List[gdb.Value]
# Normal stack frame to start the next normal stack walk
normal_stack_frame_addr: gdb.Value
normal_stack_frame_stop_addr: gdb.Value
# Async stack frame to start the next async stack walk after the next
# normal stack walk
async_stack_frame_addr: gdb.Value
def walk_async_stack(async_stack_frame_addr: gdb.Value) -> WalkAsyncStackResult:
"""
Walks the async stack and returns the next normal stack and async stack
addresses to walk.
"""
addrs: List[gdb.Value] = []
normal_stack_frame_addr = nullptr()
normal_stack_frame_stop_addr = nullptr()
async_stack_frame_next_addr = nullptr()
while int(async_stack_frame_addr) != 0:
async_stack_frame = AsyncStackFrame.from_addr(async_stack_frame_addr)
addrs.append(async_stack_frame.instruction_pointer)
if int(async_stack_frame.parent_frame) == 0:
# Reached end of async stack
# Check if there is an AsyncStackRoot and if so, whether there
# is an associated stack frame that indicates the normal stack
# frame we should continue walking at.
async_stack_root_addr = async_stack_frame.stack_root
if int(async_stack_root_addr) == 0:
# This is a detached async stack. We are done
break
async_stack_root = AsyncStackRoot.from_addr(async_stack_root_addr)
normal_stack_frame_addr = async_stack_root.stack_frame_ptr
if int(normal_stack_frame_addr) == 0:
# No associated normal stack frame for this async stack root.
# This means we should treat this as a top-level/detached
# stack and not try to walk any further.
break
# Skip to the parent stack frame pointer
normal_stack_frame = StackFrame.from_addr(normal_stack_frame_addr)
normal_stack_frame_addr = normal_stack_frame.stack_frame
# Check if there is a higher-level AsyncStackRoot that defines
# the stop point we should stop walking normal stack frames at.
# If there is no higher stack root then we will walk to the
# top of the normal stack (normalStackFrameStop == nullptr).
# Otherwise we record the frame pointer that we should stop
# at and walk normal stack frames until we hit that frame.
# Also get the async stack frame where the next async stack walk
# should begin after the next normal stack walk finishes.
async_stack_root_addr = async_stack_root.next_root
if int(async_stack_root_addr) != 0:
async_stack_root = AsyncStackRoot.from_addr(async_stack_root_addr)
normal_stack_frame_stop_addr = async_stack_root.stack_frame_ptr
async_stack_frame_next_addr = async_stack_root.top_frame
async_stack_frame_addr = async_stack_frame.parent_frame
return WalkAsyncStackResult(
addrs=addrs,
normal_stack_frame_addr=normal_stack_frame_addr,
normal_stack_frame_stop_addr=normal_stack_frame_stop_addr,
async_stack_frame_addr=async_stack_frame_next_addr,
)
def get_async_stack_addrs() -> List[gdb.Value]:
"""
Gets the async stack trace, including normal stack frames with async
stack frames.
See C++ implementation in `getAsyncStackTraceSafe` in
folly/experimental/symbolizer/StackTrace.cpp
"""
async_stack_root_addr = get_async_stack_root_addr()
......@@ -253,33 +348,29 @@ def get_async_stack_addrs() -> List[gdb.Value]:
if int(async_stack_root_addr) == 0:
return []
# start the stack trace from the top
# Start the stack trace from the top
gdb.execute("f 0", from_tty=False, to_string=True)
# Start by walking the normal stack until we get to the frame right before
# the frame that holds the async root.
async_stack_root = AsyncStackRoot.from_addr(async_stack_root_addr)
normal_stack_frame_addr = gdb.parse_and_eval("$rbp")
normal_stack_frame_stop_addr = async_stack_root.stack_frame_ptr
addrs: List[gdb.Value] = []
addrs.append(gdb.parse_and_eval("$pc"))
normal_stack_frame_addr = gdb.parse_and_eval("$rbp")
while int(normal_stack_frame_addr) != 0 and int(async_stack_root_addr) != 0:
normal_stack_frame = StackFrame.from_addr(normal_stack_frame_addr)
async_stack_root = AsyncStackRoot.from_addr(async_stack_root_addr)
async_stack_frame_addr = async_stack_root.top_frame
# Walk the normal stack to find the caller of the frame that holds the
# AsyncStackRoot. If the caller holds the AsyncStackRoot, then the
# current frame is part of an async operation, and we should get the
# async stack trace before adding the current frame.
if int(normal_stack_frame.stack_frame) == int(async_stack_root.stack_frame_ptr):
addrs += get_async_stack_addrs_from_initial_frame(
async_stack_root.top_frame
while int(normal_stack_frame_addr) != 0 or int(async_stack_frame_addr) != 0:
addrs += walk_normal_stack(
normal_stack_frame_addr, normal_stack_frame_stop_addr
)
# There could be more related work at the next async stack root.
# Anything after the stack frame containing the last async stack root
# is potentially unrelated to the current async stack.
async_stack_root_addr = async_stack_root.next_root
if int(async_stack_root_addr) == 0:
break
addrs.append(normal_stack_frame.return_address)
normal_stack_frame_addr = normal_stack_frame.stack_frame
walk_async_stack_result = walk_async_stack(async_stack_frame_addr)
addrs += walk_async_stack_result.addrs
normal_stack_frame_addr = walk_async_stack_result.normal_stack_frame_addr
normal_stack_frame_stop_addr = (
walk_async_stack_result.normal_stack_frame_stop_addr
)
async_stack_frame_addr = walk_async_stack_result.async_stack_frame_addr
return addrs
......
......@@ -15,6 +15,7 @@
*/
#include <folly/experimental/symbolizer/StackTrace.h>
#include <folly/tracing/AsyncStack.h>
#include <memory>
......@@ -121,13 +122,6 @@ ssize_t getStackTraceInPlace(
}
#endif // FOLLY_HAVE_LIBUNWIND
// Helper struct for manually walking the stack using stack frame pointers
struct StackFrame {
StackFrame* parentFrame;
void* returnAddress;
};
} // namespace
ssize_t getStackTraceSafe(
......@@ -170,39 +164,138 @@ ssize_t getStackTraceHeap(
#endif
}
namespace {
// Helper struct for manually walking the stack using stack frame pointers
struct StackFrame {
StackFrame* parentFrame;
void* returnAddress;
};
size_t walkNormalStack(
uintptr_t* addresses,
size_t maxAddresses,
StackFrame* normalStackFrame,
StackFrame* normalStackFrameStop) {
size_t numFrames = 0;
while (numFrames < maxAddresses && normalStackFrame != nullptr) {
auto* normalStackFrameNext = normalStackFrame->parentFrame;
if (normalStackFrameStop != nullptr &&
normalStackFrameNext == normalStackFrameStop) {
// Reached end of normal stack, need to transition to the async stack.
// Do not include the return address in the stack trace that points
// to the frame that registered the AsyncStackRoot.
// Use the return address from the AsyncStackFrame as the current frame's
// return address rather than the return address from the normal
// stack frame, which would be the address of the executor function
// that invoked the callback.
break;
}
addresses[numFrames++] =
reinterpret_cast<std::uintptr_t>(normalStackFrame->returnAddress);
normalStackFrame = normalStackFrameNext;
}
return numFrames;
}
struct WalkAsyncStackResult {
// Number of frames added in this walk
size_t numFrames{0};
// Normal stack frame to start the next normal stack walk
StackFrame* normalStackFrame{nullptr};
StackFrame* normalStackFrameStop{nullptr};
// Async stack frame to start the next async stack walk after the next
// normal stack walk
AsyncStackFrame* asyncStackFrame{nullptr};
};
WalkAsyncStackResult walkAsyncStack(
uintptr_t* addresses,
size_t maxAddresses,
AsyncStackFrame* asyncStackFrame) {
WalkAsyncStackResult result;
while (result.numFrames < maxAddresses && asyncStackFrame != nullptr) {
addresses[result.numFrames++] =
reinterpret_cast<std::uintptr_t>(asyncStackFrame->getReturnAddress());
auto* asyncStackFrameNext = asyncStackFrame->getParentFrame();
if (asyncStackFrameNext == nullptr) {
// Reached end of async-stack.
// Check if there is an AsyncStackRoot and if so, whether there
// is an associated stack frame that indicates the normal stack
// frame we should continue walking at.
const auto* asyncStackRoot = asyncStackFrame->getStackRoot();
if (asyncStackRoot == nullptr) {
// This is a detached async stack. We are done
break;
}
// Get the normal stack frame holding this async root.
result.normalStackFrame =
reinterpret_cast<StackFrame*>(asyncStackRoot->getStackFramePointer());
if (result.normalStackFrame == nullptr) {
// No associated normal stack frame for this async stack root.
// This means we should treat this as a top-level/detached
// stack and not try to walk any further.
break;
}
// Skip to the parent stack-frame pointer
result.normalStackFrame = result.normalStackFrame->parentFrame;
// Check if there is a higher-level AsyncStackRoot that defines
// the stop point we should stop walking normal stack frames at.
// If there is no higher stack root then we will walk to the
// top of the normal stack (normalStackFrameStop == nullptr).
// Otherwise we record the frame pointer that we should stop
// at and walk normal stack frames until we hit that frame.
// Also get the async stack frame where the next async stack walk
// should begin after the next normal stack walk finishes.
asyncStackRoot = asyncStackRoot->getNextRoot();
if (asyncStackRoot != nullptr) {
result.normalStackFrameStop = reinterpret_cast<StackFrame*>(
asyncStackRoot->getStackFramePointer());
result.asyncStackFrame = asyncStackRoot->getTopFrame();
}
}
asyncStackFrame = asyncStackFrameNext;
}
return result;
}
} // namespace
ssize_t getAsyncStackTraceSafe(uintptr_t* addresses, size_t maxAddresses) {
size_t numFrames = 0;
const auto* asyncStackRoot = tryGetCurrentAsyncStackRoot();
// If we have no async stack root, this should return no frames.
// If we do have a stack root, also include the current return address.
if (asyncStackRoot != nullptr && numFrames < maxAddresses) {
addresses[numFrames++] = (uintptr_t)FOLLY_ASYNC_STACK_RETURN_ADDRESS();
}
for (const auto* normalStackFrame =
(StackFrame*)FOLLY_ASYNC_STACK_FRAME_POINTER();
normalStackFrame != nullptr && asyncStackRoot != nullptr &&
numFrames < maxAddresses;
normalStackFrame = normalStackFrame->parentFrame) {
// Walk the normal stack to find the caller of the frame that holds the
// AsyncStackRoot. If the caller holds the AsyncStackRoot, then the
// current frame is part of an async operation, and we should get the
// async stack trace before adding the current frame.
if (normalStackFrame->parentFrame ==
asyncStackRoot->getStackFramePointer()) {
// Follow the async stack trace starting from the root
numFrames += getAsyncStackTraceFromInitialFrame(
asyncStackRoot->getTopFrame(),
&addresses[numFrames],
maxAddresses - numFrames);
// There could be more related work at the next async stack root.
// Anything after the stack frame containing the last async stack root
// is potentially unrelated to the current async stack.
asyncStackRoot = asyncStackRoot->getNextRoot();
if (asyncStackRoot == nullptr) {
break;
// No async operation in progress. Return empty stack
return numFrames;
}
// Start by walking the normal stack until we get to the frame right before
// the frame that holds the async root.
auto* normalStackFrame =
reinterpret_cast<StackFrame*>(FOLLY_ASYNC_STACK_FRAME_POINTER());
auto* normalStackFrameStop =
reinterpret_cast<StackFrame*>(asyncStackRoot->getStackFramePointer());
if (numFrames < maxAddresses) {
addresses[numFrames++] =
reinterpret_cast<std::uintptr_t>(FOLLY_ASYNC_STACK_RETURN_ADDRESS());
}
addresses[numFrames++] = (uintptr_t)normalStackFrame->returnAddress;
auto* asyncStackFrame = asyncStackRoot->getTopFrame();
while (numFrames < maxAddresses &&
(normalStackFrame != nullptr || asyncStackFrame != nullptr)) {
numFrames += walkNormalStack(
addresses + numFrames,
maxAddresses - numFrames,
normalStackFrame,
normalStackFrameStop);
auto walkAsyncStackResult = walkAsyncStack(
addresses + numFrames, maxAddresses - numFrames, asyncStackFrame);
numFrames += walkAsyncStackResult.numFrames;
normalStackFrame = walkAsyncStackResult.normalStackFrame;
normalStackFrameStop = walkAsyncStackResult.normalStackFrameStop;
asyncStackFrame = walkAsyncStackResult.asyncStackFrame;
}
return numFrames;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment