Commit a14c8b75 authored by Jason Flinn's avatar Jason Flinn Committed by Facebook Github Bot

writeFileAtomic optionally syncs to provide durable atomicity

Summary:
The current implementation does not guarantee atomicity on power
failure or OS crash.  For instance, if a crash happens after writing
the directory to storage but before writing the file data to storage,
then a zero-byte file can result.

Provide an optional parameter to sync and provide atomicity in these
situations.

Adjust comments to reflect the actual guarantees provided.

Differential Revision: D19754974

fbshipit-source-id: 5ddb2f3a1b9e72b523ee13937f648e0913c94c2f
parent 93232f95
...@@ -167,7 +167,8 @@ int writeFileAtomicNoThrow( ...@@ -167,7 +167,8 @@ int writeFileAtomicNoThrow(
StringPiece filename, StringPiece filename,
iovec* iov, iovec* iov,
int count, int count,
mode_t permissions) { mode_t permissions,
SyncType syncType) {
// We write the data to a temporary file name first, then atomically rename // We write the data to a temporary file name first, then atomically rename
// it into place. This ensures that the file contents will always be valid, // it into place. This ensures that the file contents will always be valid,
// even if we crash or are killed partway through writing out data. // even if we crash or are killed partway through writing out data.
...@@ -213,6 +214,15 @@ int writeFileAtomicNoThrow( ...@@ -213,6 +214,15 @@ int writeFileAtomicNoThrow(
return errno; return errno;
} }
// To guarantee atomicity across power failues on POSIX file systems,
// the temporary file must be explicitly sync'ed before the rename.
if (syncType == SyncType::WITH_SYNC) {
rc = fsyncNoInt(tmpFD);
if (rc == -1) {
return errno;
}
}
// Close the file before renaming to make sure all data has // Close the file before renaming to make sure all data has
// been successfully written. // been successfully written.
rc = close(tmpFD); rc = close(tmpFD);
...@@ -233,26 +243,32 @@ void writeFileAtomic( ...@@ -233,26 +243,32 @@ void writeFileAtomic(
StringPiece filename, StringPiece filename,
iovec* iov, iovec* iov,
int count, int count,
mode_t permissions) { mode_t permissions,
auto rc = writeFileAtomicNoThrow(filename, iov, count, permissions); SyncType syncType) {
auto rc = writeFileAtomicNoThrow(filename, iov, count, permissions, syncType);
if (rc != 0) { if (rc != 0) {
auto msg = std::string(__func__) + "() failed to update " + filename.str(); auto msg = std::string(__func__) + "() failed to update " + filename.str();
throw std::system_error(rc, std::generic_category(), msg); throw std::system_error(rc, std::generic_category(), msg);
} }
} }
void writeFileAtomic(StringPiece filename, ByteRange data, mode_t permissions) { void writeFileAtomic(
StringPiece filename,
ByteRange data,
mode_t permissions,
SyncType syncType) {
iovec iov; iovec iov;
iov.iov_base = const_cast<unsigned char*>(data.data()); iov.iov_base = const_cast<unsigned char*>(data.data());
iov.iov_len = data.size(); iov.iov_len = data.size();
writeFileAtomic(filename, &iov, 1, permissions); writeFileAtomic(filename, &iov, 1, permissions, syncType);
} }
void writeFileAtomic( void writeFileAtomic(
StringPiece filename, StringPiece filename,
StringPiece data, StringPiece data,
mode_t permissions) { mode_t permissions,
writeFileAtomic(filename, ByteRange(data), permissions); SyncType syncType) {
writeFileAtomic(filename, ByteRange(data), permissions, syncType);
} }
} // namespace folly } // namespace folly
...@@ -84,7 +84,8 @@ ssize_t pwritevNoInt(int fd, const iovec* iov, int count, off_t offset); ...@@ -84,7 +84,8 @@ ssize_t pwritevNoInt(int fd, const iovec* iov, int count, off_t offset);
* is unspecified. * is unspecified.
*/ */
FOLLY_NODISCARD ssize_t readFull(int fd, void* buf, size_t count); FOLLY_NODISCARD ssize_t readFull(int fd, void* buf, size_t count);
FOLLY_NODISCARD ssize_t preadFull(int fd, void* buf, size_t count, off_t offset); FOLLY_NODISCARD ssize_t
preadFull(int fd, void* buf, size_t count, off_t offset);
FOLLY_NODISCARD ssize_t readvFull(int fd, iovec* iov, int count); FOLLY_NODISCARD ssize_t readvFull(int fd, iovec* iov, int count);
FOLLY_NODISCARD ssize_t preadvFull(int fd, iovec* iov, int count, off_t offset); FOLLY_NODISCARD ssize_t preadvFull(int fd, iovec* iov, int count, off_t offset);
...@@ -223,6 +224,12 @@ bool writeFile( ...@@ -223,6 +224,12 @@ bool writeFile(
return closeNoInt(fd) == 0 && ok; return closeNoInt(fd) == 0 && ok;
} }
/* For atomic writes, do we sync to guarantee ordering or not? */
enum class SyncType {
WITH_SYNC,
WITHOUT_SYNC,
};
/** /**
* Write file contents "atomically". * Write file contents "atomically".
* *
...@@ -233,20 +240,33 @@ bool writeFile( ...@@ -233,20 +240,33 @@ bool writeFile(
* *
* Note that on platforms that do not provide atomic filesystem rename * Note that on platforms that do not provide atomic filesystem rename
* functionality (e.g., Windows) this behavior may not be truly atomic. * functionality (e.g., Windows) this behavior may not be truly atomic.
*
* The default implementation does not sync the data to storage before
* the rename. Therefore, the write is *not* atomic in the event of a
* power failure or OS crash. To guarantee atomicity in these cases,
* specify syncType = WITH_SYNC, which will incur a performance cost
* of waiting for the data to be persisted to storage. Note that the
* return of the function does not guarantee the directory
* modifications have been written to disk; a further sync of the
* directory after the function returns is required to ensure the
* modification is durable.
*/ */
void writeFileAtomic( void writeFileAtomic(
StringPiece filename, StringPiece filename,
iovec* iov, iovec* iov,
int count, int count,
mode_t permissions = 0644); mode_t permissions = 0644,
SyncType syncType = SyncType::WITHOUT_SYNC);
void writeFileAtomic( void writeFileAtomic(
StringPiece filename, StringPiece filename,
ByteRange data, ByteRange data,
mode_t permissions = 0644); mode_t permissions = 0644,
SyncType syncType = SyncType::WITHOUT_SYNC);
void writeFileAtomic( void writeFileAtomic(
StringPiece filename, StringPiece filename,
StringPiece data, StringPiece data,
mode_t permissions = 0644); mode_t permissions = 0644,
SyncType syncType = SyncType::WITHOUT_SYNC);
/** /**
* A version of writeFileAtomic() that returns an errno value instead of * A version of writeFileAtomic() that returns an errno value instead of
...@@ -258,6 +278,7 @@ int writeFileAtomicNoThrow( ...@@ -258,6 +278,7 @@ int writeFileAtomicNoThrow(
StringPiece filename, StringPiece filename,
iovec* iov, iovec* iov,
int count, int count,
mode_t permissions = 0644); mode_t permissions = 0644,
SyncType syncType = SyncType::WITHOUT_SYNC);
} // namespace folly } // namespace folly
...@@ -403,6 +403,19 @@ TEST_F(WriteFileAtomic, writeNew) { ...@@ -403,6 +403,19 @@ TEST_F(WriteFileAtomic, writeNew) {
EXPECT_EQ(0644, getPerms(path)); EXPECT_EQ(0644, getPerms(path));
} }
TEST_F(WriteFileAtomic, withSync) {
// Call writeFileAtomic() to create a new file
auto path = tmpPath("foo");
auto contents = StringPiece{"contents\n"};
auto permissions = mode_t{0644};
writeFileAtomic(path, contents, permissions, SyncType::WITH_SYNC);
// The directory should contain exactly 1 file now, with the correct contents
EXPECT_EQ(set<string>{"foo"}, listTmpDir());
EXPECT_EQ(contents, readData(path));
EXPECT_EQ(0644, getPerms(path));
}
TEST_F(WriteFileAtomic, overwrite) { TEST_F(WriteFileAtomic, overwrite) {
// Call writeFileAtomic() to create a new file // Call writeFileAtomic() to create a new file
auto path = tmpPath("foo"); auto path = tmpPath("foo");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment