Commit 3fb0bf3b authored by Ming Zhao's avatar Ming Zhao Committed by Facebook Github Bot

Add terminateOrKill function to folly::Subprocess to use a combination of...

Add terminateOrKill function to folly::Subprocess to use a combination of SIGTERM/SIGKILL to terminate a process.

Summary:
Add terminateOrKill function to folly::Subprocess to use a
combination of SIGTERM/SIGKILL to terminate a process. The document
has clearly indicated this class is thread-unsafe, and it's strongly
suggested to call signal and wait in the same thread to avoid
unintended consequence, yet there is code that tried to do that in
mutliple threads and ends with potential kill(-1).

By providing a function that basically does `terminate();
waitpid_with_timeout(); kill(); waitpid()`, it should help eliminate
usage errors mentioned above.

Reviewed By: yfeldblum

Differential Revision: D17662349

fbshipit-source-id: 3a68ede5ca09270f804c88839a798ae31b615ae3
parent 50d64f75
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <algorithm> #include <algorithm>
#include <array> #include <array>
#include <system_error> #include <system_error>
#include <thread>
#include <boost/container/flat_set.hpp> #include <boost/container/flat_set.hpp>
#include <boost/range/adaptors.hpp> #include <boost/range/adaptors.hpp>
...@@ -671,6 +672,45 @@ void Subprocess::sendSignal(int signal) { ...@@ -671,6 +672,45 @@ void Subprocess::sendSignal(int signal) {
checkUnixError(r, "kill"); checkUnixError(r, "kill");
} }
ProcessReturnCode Subprocess::terminateOrKill(int sigtermTimeoutSeconds) {
returnCode_.enforce(ProcessReturnCode::RUNNING);
DCHECK_GT(pid_, 0) << "The subprocess has been waited already";
// 1. Send SIGTERM to kill the process
terminate();
// 2. check whether subprocess has terminated using non-blocking waitpid
for (int i = 0; i < sigtermTimeoutSeconds * 10; i++) {
int status;
pid_t found;
// warp waitpid in the while loop to deal with EINTR.
do {
found = ::waitpid(pid_, &status, WNOHANG);
} while (found == -1 && errno == EINTR);
PCHECK(found != -1) << "waitpid(" << pid_ << ", &status, WNOHANG)";
if (found == 0) {
// The subprocess is still running, sleep for 100ms
std::this_thread::sleep_for(std::chrono::milliseconds(100));
continue;
} else {
// Just on the safe side, make sure it's the actual pid we are waiting.
DCHECK_EQ(found, pid_);
returnCode_ = ProcessReturnCode::make(status);
// Change pid_ to -1 to detect programming error like calling
// this method multiple times.
pid_ = -1;
return returnCode_;
}
}
// 3. If we are at this point, we have waited enough time after
// sending SIGTERM, we have to use nuclear option SIGKILL to kill
// the subprocess.
LOG(INFO) << "Send SIGKILL to " << pid_;
kill();
// 4. SIGKILL should kill the process otherwise there must be
// something seriously wrong, just use blocking wait to wait for the
// subprocess to finish.
return wait();
}
pid_t Subprocess::pid() const { pid_t Subprocess::pid() const {
return pid_; return pid_;
} }
......
...@@ -613,6 +613,15 @@ class Subprocess { ...@@ -613,6 +613,15 @@ class Subprocess {
sendSignal(SIGKILL); sendSignal(SIGKILL);
} }
/**
* Send the SIGTERM to terminate the process, call `waitpid`
* non-blockingly for several times, up to `sigtermTimeoutSeconds`.
* If the process hasn't terminated after that, send SIGKILL to kill
* the process and call `waitpid` blockingly. Return the exit code of
* process.
*/
ProcessReturnCode terminateOrKill(int sigtermTimeoutSeconds);
//// ////
//// The methods below only affect the process's communication pipes, but //// The methods below only affect the process's communication pipes, but
//// not its return code or state (they do not poll() or wait()). //// not its return code or state (they do not poll() or wait()).
......
...@@ -174,6 +174,68 @@ TEST(SimpleSubprocessTest, ChangeChildDirectoryWithError) { ...@@ -174,6 +174,68 @@ TEST(SimpleSubprocessTest, ChangeChildDirectoryWithError) {
} }
} }
// This method verifies terminateOrKill shouldn't affect the exit
// status if the process has exitted already.
TEST(SimpleSubprocessTest, TerminateAfterProcessExit) {
Subprocess proc(
std::vector<std::string>{"/bin/bash", "-c", "echo hello; exit 1"},
Subprocess::Options().pipeStdout().pipeStderr());
const auto [stdout, stderr] = proc.communicate();
EXPECT_EQ("hello\n", stdout);
auto retCode = proc.terminateOrKill(1);
EXPECT_TRUE(retCode.exited());
EXPECT_EQ(1, retCode.exitStatus());
}
namespace {
// Wait for the given subprocess to write anything in stdout to ensure
// it has started.
bool waitForAnyOutput(Subprocess& proc) {
// We couldn't use communicate here because it blocks until the
// stdout/stderr is closed.
char buffer;
ssize_t len;
do {
len = ::read(proc.stdoutFd(), &buffer, 1);
} while (len == -1 and errno == EINTR);
LOG(INFO) << "Read " << buffer;
return len == 1;
}
} // namespace
// This method tests that if the subprocess handles SIGTERM faster
// enough, we don't have to use SIGKILL to kill it.
TEST(SimpleSubprocessTest, TerminateWithoutKill) {
// Start a bash process that would sleep for 60 seconds, and the
// default signal handler should exit itself upon receiving SIGTERM.
Subprocess proc(
std::vector<std::string>{
"/bin/bash", "-c", "echo TerminateWithoutKill; sleep 60"},
Subprocess::Options().pipeStdout().pipeStderr());
EXPECT_TRUE(waitForAnyOutput(proc));
auto retCode = proc.terminateOrKill(1);
EXPECT_TRUE(retCode.killed());
EXPECT_EQ(SIGTERM, retCode.killSignal());
}
// This method tests that if the subprocess ignores SIGTERM, we have
// to use SIGKILL to kill it when calling terminateOrKill.
TEST(SimpleSubprocessTest, KillAfterTerminate) {
Subprocess proc(
std::vector<std::string>{
"/bin/bash",
"-c",
// use trap to register handler that sleeps for 60 seconds
// upon receiving SIGTERM, so SIGKILL would be triggered to
// kill it.
"trap \"sleep 120\" SIGTERM; echo KillAfterTerminate; sleep 60"},
Subprocess::Options().pipeStdout().pipeStderr());
EXPECT_TRUE(waitForAnyOutput(proc));
auto retCode = proc.terminateOrKill(1);
EXPECT_TRUE(retCode.killed());
EXPECT_EQ(SIGKILL, retCode.killSignal());
}
namespace { namespace {
boost::container::flat_set<int> getOpenFds() { boost::container::flat_set<int> getOpenFds() {
auto pid = getpid(); auto pid = getpid();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment