Commit c2ed0fac authored by Alexey Spiridonov's avatar Alexey Spiridonov Committed by Dave Watson

Part 1: Local time label <=> UTC timestamp conversion

Summary: See the block comment in date_time_utils.h -- the actual Cron code comes in later diffs.

Test Plan: unit tests

Reviewed By: agoder@fb.com

FB internal diff: D1181554
parent 3a9422d1
/*
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "folly/experimental/cron/date_time_utils.h"
#include <boost/date_time/c_local_time_adjustor.hpp>
#include <cerrno>
#include "folly/Format.h"
namespace folly { namespace cron {
using namespace boost::local_time;
using namespace boost::posix_time;
using namespace std;
// NB: The exceptions below are intended to confirm that the underlying
// libraries behave in a sane way. This makes them untestable. I got each
// of them to fire by temporarily changing their checks, so if they do fire,
// the printouts should be okay. It's fine to change them to CHECKs.
time_t getUTCOffset(time_t utc_time, time_zone_ptr tz) {
auto utc_pt = from_time_t(utc_time);
auto local_pt = utcPTimeToTimezoneLocalPTime(utc_pt, tz);
return (local_pt - utc_pt).total_seconds();
}
ptime utcPTimeToTimezoneLocalPTime(ptime utc_pt, time_zone_ptr tz) {
if (tz) {
return local_date_time{utc_pt, tz}.local_time();
} else {
return boost::date_time::c_local_adjustor<ptime>::utc_to_local(utc_pt);
}
}
UTCTimestampsForLocalTime _boostTimezoneLocalPTimeToUTCTimestamps(
ptime local_pt,
time_zone_ptr tz
) {
UTCTimestampsForLocalTime res;
auto local_date = local_pt.date();
auto local_time = local_pt.time_of_day();
auto save_timestamp_if_valid = [&](bool is_dst, time_t *out) {
try {
auto local_dt = local_date_time(local_date, local_time, tz, is_dst);
// local_date_time() ignores is_dst if the timezone does not have
// DST (instead of throwing dst_not_valid). So, we must confirm
// that our is_dst guess was correct to avoid storing the same
// timestamp in both fields of res (same as problem (b) in the
// localtime_r code path).
if (local_dt.is_dst() == is_dst) {
*out = (local_dt.utc_time() - from_time_t(0)).total_seconds();
}
} catch (dst_not_valid& e) {
// Continue, we're trying both values of is_dst
}
};
try {
save_timestamp_if_valid(true, &res.dst_time);
save_timestamp_if_valid(false, &res.non_dst_time);
} catch (time_label_invalid& e) {
// This local time label was skipped by DST, so res will be empty.
}
return res;
}
UTCTimestampsForLocalTime _systemTimezoneLocalPTimeToUTCTimestamps(
ptime local_pt
) {
UTCTimestampsForLocalTime res;
struct tm tm = to_tm(local_pt);
auto save_timestamp_if_valid = [tm, &local_pt](int is_dst, time_t *out) {
// Try to make a UTC timestamp based on our DST guess and local time.
struct tm tmp_tm = tm; // Make a copy since mktime changes the tm
tmp_tm.tm_isdst = is_dst;
time_t t = mktime(&tmp_tm);
if (t == -1) { // Not sure of the error cause or how to handle it.
throw runtime_error(folly::format(
"{}: mktime error {}", to_simple_string(local_pt), errno
).str());
}
// Convert the timestamp to a local time to see if the guess was right.
struct tm new_tm;
auto out_tm = localtime_r(&t, &new_tm);
if (out_tm == nullptr) { // Not sure if such errors can be handled.
throw runtime_error(folly::format(
"{}: localtime_r error {}", to_simple_string(local_pt), errno
).str());
}
// Does the original tm argree with the tm generated from the mktime()
// UTC timestamp? (We'll check tm_isdst separately.)
//
// This test never passes when we have a local time label that is
// skipped when a DST change moves the clock forward.
//
// A valid local time label always has one or two valid DST values.
// When the timezone has not DST, that value is "false".
//
// This test always passes when:
// - The DST value is ambiguous (due to the local clock moving back).
// - We guessed the uniquely valid DST value.
//
// The test may or may not always pass (implementation-dependent) when
// we did not guess a valid DST value.
// (a) If it does not pass, we are good, because we also try the other
// DST value, which will make the test pass, and then res will have
// a unique timestamp.
// (b) If it does pass, we're in more trouble, because it means that
// the implementation ignored our is_dst value. Then, the timestamp
// t is the same as for the other is_dst value. But, we don't want
// res to be labeled ambiguous, and we don't want to randomly pick
// a DST value to set to kNotATime, because clients may want to
// know the real DST value. The solution is the extra test below.
if (
tm.tm_sec == new_tm.tm_sec && tm.tm_min == new_tm.tm_min &&
tm.tm_hour == new_tm.tm_hour && tm.tm_mday == new_tm.tm_mday &&
tm.tm_mon == new_tm.tm_mon && tm.tm_year == new_tm.tm_year &&
// To fix problem (b) above, we must assume that localtime_r returns
// the correct tm_isdst (if not, it's a system bug anyhow). Then, we
// can just check our DST guess against the truth. If our guess was
// invalid, we shouldn't store the result, avoiding (b).
!( // tm_isdst can also be negative but we'll check that later
(new_tm.tm_isdst == 0 && is_dst) || (new_tm.tm_isdst > 0 && !is_dst)
)
) {
*out = t;
}
return new_tm.tm_isdst < 0; // Used for a sanity-check below.
};
bool neg_isdst1 = save_timestamp_if_valid(1, &res.dst_time);
bool neg_isdst2 = save_timestamp_if_valid(0, &res.non_dst_time);
// The only legitimate way for localtime_r() to give back a negative
// tm_isdst is if the input local time label is ambiguous due to DST.
if (neg_isdst1 || neg_isdst2) {
if (neg_isdst1 ^ neg_isdst2) { // Can't be ambiguous half the time
throw runtime_error(folly::format(
"{}: one tm_isdst negative but not both", to_simple_string(local_pt)
).str());
}
if (!res.isAmbiguous()) {
throw runtime_error(folly::format(
"{}: negative tm_isdst but time label is unambiguous",
to_simple_string(local_pt)
).str());
}
}
return res;
}
UTCTimestampsForLocalTime timezoneLocalPTimeToUTCTimestamps(
ptime local_pt,
time_zone_ptr tz
) {
UTCTimestampsForLocalTime res;
if (tz) {
res = _boostTimezoneLocalPTimeToUTCTimestamps(local_pt, tz);
} else {
res = _systemTimezoneLocalPTimeToUTCTimestamps(local_pt);
}
// Both code paths have fixes to prevent this (see e.g. problem (b) above).
if (res.isAmbiguous() && res.dst_time == res.non_dst_time) {
throw runtime_error(folly::format(
"{}: local time maps to {} regardless of tm_isdst",
to_simple_string(local_pt), res.dst_time
).str());
}
return res;
}
}}
/*
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This is a small extension on top of boost::date_time, which handles
* conversions between "local time labels" (e.g. "2am, March 10, 2013") and
* POSIX UTC timestamps.
*
* Our library hides two sources of complexity:
*
* - Time zones. You can easily use the system time zone (pass a NULL
* pointer), or provide a boost::time_zone_ptr (usually created via a
* POSIX-like timezone format, or from the boost::date_time timezone DB).
*
* - The one-to-many relationship between time labels and UTC timestamps.
*
* UTC timestamps are effectively monotonic (aside from leap seconds,
* which are ignored in POSIX time, and are irrelevant for Cron).
*
* Local time labels, on the other hand, can move forward or backward due
* to daylight-savings changes. Thus, when the local clock rewinds due
* to DST, some local time labels become ambiguous (is it 1:30am before
* or after the DST rewind?). When the local time moves forward due to
* DST, some local time labels are skipped (in the US Pacific timezone,
* 2:30am never happened on March 10, 2013).
*
* As a consequence, timezoneLocalPTimeToUTCTimestamps() returns a struct
* UTCTimestampsForLocalTime that can represent 0, 1, or 2 UTC timestamps.
*
* The ambiguity could be avoided by adding an 'is_dst' flag to the local
* time label, but this is not useful for the purposes of Cron (and is
* handled adequately in existing libraries).
*
* Going from UTC to a local time label is easy and unambiguous, see
* utcPTimeToTimezoneLocalPTime().
*
* CAVEAT: We use boost::posix_time::ptime to represent both UTC timestamps,
* *and* local time labels. This is confusing -- it would be better if
* local time labels should used a separate type. However, a ptime is very
* convenient for the purpose, since it supports all the usual time
* operations you might want to do. Patches are welcome.
*
* Our library thus accounts for the following deficiencies of
* boost::date_time:
*
* - boost::date_time has almost no support for the system timezone (the only
* related feature is the hacky "c_local_adjustor"). In contrast, our
* library interprets a time_zone_ptr value of NULL as referring to the
* system timezone, and then does the right thing.
*
* - boost::date_time has a rather annoying exception-based API for
* determining whether a local time label is ambiguous, nonexistent, or
* unique. Our struct is much more usable.
*/
#pragma once
#include <boost/date_time/local_time/local_time_types.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <ctime>
#include <stdexcept>
#include <utility>
#include "folly/Format.h"
namespace folly { namespace cron {
/**
* How many seconds must be added to UTC in order to get the local time for
* the given time point?
*/
time_t getUTCOffset(time_t utc_time, boost::local_time::time_zone_ptr tz);
/**
* Convert a UTC ptime into a timezone-local ptime.
*
* If tz is a null pointer, use the local timezone.
*
* This is a lossy transformation, since the UTC offset of a timezone
* is not constant -- see timezoneLocalPTimeToUTCTimestamps()
* for a detailed explanation.
*/
boost::posix_time::ptime utcPTimeToTimezoneLocalPTime(
boost::posix_time::ptime utc_pt,
boost::local_time::time_zone_ptr tz
);
/**
* A local time label can correspond to 0, 1, or 2 UTC timestamps due to
* DST time shifts:
* - If the clock went back and your label lands in the repeated interval,
* you'll get both timestamps.
* - If the clock went forward, and your label landed in the skipped time,
* you get neither.
* - For all other labels you get exactly one timestamp.
* See also timezoneLocalPTimeToUTCTimestamps().
*/
struct UTCTimestampsForLocalTime {
static const time_t kNotATime = -1; // Might not portable, but easy.
UTCTimestampsForLocalTime() : dst_time{kNotATime}, non_dst_time{kNotATime} {}
bool isNotATime() const {
return dst_time == kNotATime && non_dst_time == kNotATime;
}
bool isAmbiguous() const {
return dst_time != kNotATime && non_dst_time != kNotATime;
}
time_t getUnique() const {
if (isAmbiguous()) {
throw std::runtime_error(folly::format(
"Local time maps to both {} and {}", dst_time, non_dst_time
).str());
} else if (dst_time != kNotATime) {
return dst_time;
} else if (non_dst_time != kNotATime) {
return non_dst_time;
} else {
throw std::runtime_error("This local time was skipped due to DST");
}
}
/**
* For ambiguous local time labels, return the pair of (lesser UTC
* timestamp, greater UTC timestamp).
*
* NOTE: This may not be strictly necessary, since DST is probably less
* than non-DST in all real timezones, but it's better to be safe than
* sorry.
*
* More specifically, the POSIX timezone specification (IEEE Std 1003.1)
* allows DST to be either ahead or behind of the regular timezone, so the
* local timezone could shift either way. The docs for
* boost::local_time::posix_time_zone (which is not even a POSIX-compliant
* implementation, see README) are ambiguous, but can be read as intending
* to forbid DST that sets the clock backwards.
*/
std::pair<time_t, time_t> getBothInOrder() const {
if (!isAmbiguous()) {
throw std::runtime_error(folly::format(
"{} and {} is not ambiguous", dst_time, non_dst_time
).str());
}
if (dst_time < non_dst_time) {
return std::make_pair(dst_time, non_dst_time);
}
return std::make_pair(non_dst_time, dst_time);
}
time_t dst_time;
time_t non_dst_time;
};
/**
* Convert a timezone-local ptime into UTC epoch timestamp(s).
*
* If tz is a null pointer, use the local timezone.
*
* WARNING 1: When DST sets back the clock, some local times become
* ambiguous -- you cannot tell if the timestamp lies before or after the
* DST change. For example, "November 3 01:30:00 2013" could be either PST
* or PDT, with a difference of one hour.
*
* WARNING 2: You can inadvertently make a local time that does not exist
* because a daylight savings change skips that time period.
*/
UTCTimestampsForLocalTime timezoneLocalPTimeToUTCTimestamps(
boost::posix_time::ptime local_pt,
boost::local_time::time_zone_ptr tz
);
}}
/*
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/local_time/local_time.hpp> // for posix_time_zone
#include <cstdlib> // for setenv()
#include "folly/experimental/cron/date_time_utils.h"
using namespace folly::cron;
using namespace boost::local_time;
using namespace boost::gregorian;
using namespace boost::posix_time;
using namespace std;
enum class Ambiguity {
Unique,
Ambiguous,
Unknown,
};
void check_not_a_local_time(ptime local_pt, time_zone_ptr tz) {
EXPECT_TRUE(
timezoneLocalPTimeToUTCTimestamps(local_pt, tz).isNotATime()
);
}
void check_local_ptime(time_t utc_t, time_zone_ptr tz, ptime expected_pt) {
EXPECT_EQ(
expected_pt, utcPTimeToTimezoneLocalPTime(from_time_t(utc_t), tz)
);
}
void check_to_local_and_back(time_t utc_t, time_zone_ptr tz, Ambiguity n) {
auto utc_ts = timezoneLocalPTimeToUTCTimestamps(
utcPTimeToTimezoneLocalPTime(from_time_t(utc_t), tz), tz
);
if (n == Ambiguity::Unique) {
EXPECT_FALSE(utc_ts.isAmbiguous());
EXPECT_FALSE(utc_ts.isNotATime());
} else if (n == Ambiguity::Ambiguous) {
EXPECT_TRUE(utc_ts.isAmbiguous());
}
EXPECT_FALSE(utc_ts.isNotATime()); // Cannot get here from a UTC timestamp
if (utc_ts.isAmbiguous()) {
EXPECT_PRED3(
[](time_t t1, time_t t2, time_t t3){ return t1 == t3 || t2 == t3; },
utc_ts.dst_time, utc_ts.non_dst_time, utc_t
);
} else {
EXPECT_EQ(utc_t, utc_ts.getUnique());
}
}
const int k1980_Feb29_4AM_PST = 320673600;
const time_t kTestTimestamps[] = {
0, k1980_Feb29_4AM_PST, // Some edge cases
// Random values from [randrange(0, int(time())) for i in range(200)]
851763124, 261861130, 743855544, 30239098, 569168784, 850101954,
1113053877, 1364858223, 1082354444, 1294020427, 258495434, 1121030318,
192467213, 484525368, 579184768, 167376207, 689233030, 1351587900,
1214561991, 661713049, 381308132, 665152213, 94230657, 1349426746,
298195324, 1257615713, 682132890, 1018217831, 916554585, 995955072,
1117317370, 802646927, 608115326, 633743809, 109769810, 543272111,
609037871, 104418231, 264752638, 306399494, 1035358804, 766418015,
1128611920, 181391436, 839616511, 796842798, 653512454, 1010622273,
875647954, 708203495, 822980713, 991547420, 1265028641, 1347606382,
1002331337, 1164592802, 31466919, 1065361177, 1225097252, 631276316,
527190864, 492850662, 327182508, 869358924, 140894012, 1146198515,
501023608, 933017248, 324137101, 710311561, 556527520, 38622381,
203388537, 475269797, 724361468, 814834023, 208189749, 815722762,
45610280, 761977400, 933451311, 660014659, 494207495, 765580653,
1243453093, 234300455, 1345693003, 158935011, 1173706097, 315858792,
1184431509, 477296062, 276535773, 928860110, 103635291, 708434135,
51126476, 160505670, 153146671, 354980180, 890292051, 1155669986,
630375563, 349261331, 620264499, 477756621, 901672130, 618524356,
252709868, 1213920374, 233303580, 3012130, 969038324, 202252395,
1187016766, 669825568, 257426556, 214600753, 995259569, 360335117,
1199390931, 925221855, 616957946, 745607758, 1304023574, 383936310,
952313824, 251320075, 1018206981, 18254870, 949794553, 794223010,
22167074, 971353751, 836775665, 132713147, 1385328705, 564225254,
89489672, 970288768, 727638691, 1384138213, 295605253, 565194711,
268066246, 262980328, 878120933, 501014040, 950529654, 899180133,
452320225, 1232572199, 894784724, 24260103, 331355470, 593434097,
986752149, 590771435, 36704582, 1081058342, 231884390, 418573190,
580513906, 416611430, 778410883, 393299067, 891265387, 545143528,
242177530, 43413747, 774970054, 623606322, 1088170511, 925487121,
276552897, 904380544, 407117624, 877143874, 901504406, 1060658206,
378376447, 566370202, 903180278, 299280550, 1064440994, 742066503,
402041226, 1388625249, 1316863228, 749053705, 426181185, 1239538923,
221164890, 1049484190, 98669029, 414059052, 930992061, 34048214,
496162677, 206881990
};
void check_timezone_without_dst(time_zone_ptr tz) {
for (time_t utc_t: kTestTimestamps) {
check_to_local_and_back(utc_t, tz, Ambiguity::Unique);
}
}
void check_timezone_with_dst(
time_zone_ptr tz, int amb_first, int amb_mid, int amb_last, int after_skip
) {
// DST-ambiguous values
for (time_t utc_t : {amb_first, amb_mid, amb_mid + 1, amb_last}) {
check_to_local_and_back(utc_t, tz, Ambiguity::Ambiguous);
}
// Timestamps bordering the DST transitions
for (time_t utc_t : {
amb_first - 1, amb_last + 1, // The ambiguous range is tight
after_skip, after_skip - 1 // The DST-skipped interval has no impact
}) {
check_to_local_and_back(utc_t, tz, Ambiguity::Unique);
}
// Lots of random timestamps
for (time_t utc_t: kTestTimestamps) {
check_to_local_and_back(utc_t, tz, Ambiguity::Unknown);
}
}
// These are 3 hours apart with the same DST rules in 2013, so it's easy to
// check UTC => local ptime conversions, and invalid local time labels.
void check_us_eastern_or_pacific(time_zone_ptr tz, int offset_from_pacific) {
// 2013: Nov 3 - 1AM PDT, 1:59:59AM PDT, 1:59:59AM PST; Mar 10 - 3AM PDT
time_t amb_start = 1383465600 + offset_from_pacific;
time_t amb_mid = 1383469199 + offset_from_pacific;
time_t amb_end = 1383472799 + offset_from_pacific;
time_t after_skip = 1362909600 + offset_from_pacific;
ptime amb_start_pt(date(2013, 11, 3), hours(1));
ptime amb_mid_end_pt(date(2013, 11, 3), time_duration(1, 59, 59));
ptime before_skip_pt(date(2013, 3, 10), time_duration(1, 59, 59));
ptime after_skip_pt(date(2013, 3, 10), hours(3));
// Test mapping to local ptimes and back to UTC timestamps.
check_timezone_with_dst(tz, amb_start, amb_mid, amb_end, after_skip);
check_local_ptime(amb_start, tz, amb_start_pt);
check_local_ptime(amb_mid, tz, amb_mid_end_pt);
check_local_ptime(amb_end, tz, amb_mid_end_pt);
check_local_ptime(after_skip - 1, tz, before_skip_pt);
check_local_ptime(
k1980_Feb29_4AM_PST + offset_from_pacific, tz,
ptime(date(1980, 2, 29), hours(4))
);
check_not_a_local_time(before_skip_pt + seconds(1), tz);
check_not_a_local_time(before_skip_pt + seconds(1800), tz);
check_not_a_local_time(before_skip_pt + seconds(3600), tz);
check_not_a_local_time(after_skip_pt - seconds(1), tz);
check_not_a_local_time(after_skip_pt - seconds(1800), tz);
check_not_a_local_time(after_skip_pt - seconds(3600), tz);
check_local_ptime(after_skip, tz, after_skip_pt);
// A light test for getUTCOffset(), since its constituents are well-tested.
EXPECT_EQ(-25200 - offset_from_pacific, getUTCOffset(amb_start, tz));
EXPECT_EQ(-25200 - offset_from_pacific, getUTCOffset(amb_mid, tz));
EXPECT_EQ(-28800 - offset_from_pacific, getUTCOffset(amb_mid + 1, tz));
EXPECT_EQ(-28800 - offset_from_pacific, getUTCOffset(amb_end, tz));
EXPECT_EQ(
-28800 - offset_from_pacific, getUTCOffset(k1980_Feb29_4AM_PST, tz)
);
}
TEST(TestDateTimeUtils, AllTheThings) {
// Exercise the local timezone code path: US Pacific & US Eastern
time_zone_ptr tz;
setenv("TZ", "PST+8PDT,M3.2.0,M11.1.0", 1);
tzset();
check_us_eastern_or_pacific(tz, 0);
setenv("TZ", "EST+5EDT,M3.2.0,M11.1.0", 1);
tzset();
check_us_eastern_or_pacific(tz, -10800);
// Local timezone code with DST-free timezones
for (auto& tz_name : {"MST+7", "GMT-14", "GMT+12", "GMT-4:30"}) {
setenv("TZ", tz_name, 1);
tzset();
check_timezone_without_dst(tz);
}
// Also US Pacific & US Eastern, but with the boost::date_time code.
// The signs differ from the setenv() calls above, since boost::local_time
// incorrectly implements the standard. Compare these:
// http://tools.ietf.org/html/draft-ietf-dhc-timezone-01
// http://www.boost.org/doc/libs/1_55_0/doc/html/date_time/local_time.html#date_time.local_time.posix_time_zone
tz.reset(new posix_time_zone{"PST-8PDT,M3.2.0,M11.1.0"});
check_us_eastern_or_pacific(tz, 0);
tz.reset(new posix_time_zone{"EST-5EDT,M3.2.0,M11.1.0"});
check_us_eastern_or_pacific(tz, -10800);
// DST-free timezones with the boost::date_time code (signs also flipped)
for (auto& tz_name : {"MST-7", "GMT+14", "GMT-12", "GMT+4:30"}) {
tz.reset(new posix_time_zone{tz_name});
check_timezone_without_dst(tz);
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment