Commit cd3fcbcf authored by Marc Celani's avatar Marc Celani Committed by Dave Watson

folly::merge() - std::merge() with stronger guarantees (probably same implementation in practice)

Summary:
std::merge() does not guarantee the ordering when equal elements belong in two ranges(comparator(it_a, it_b) == comparator(it_b, it_a) == 0). For maps, it is important that we can specify the ordering (see array_merge in php, where we guarantee which array's value will be present in the output if a key is present in both inputs).

Also removes folly::merge that is specfic for sorted_vector_map since this will not be needed. NOTE: I expect this to break feed, will fix in a separate non-folly diff.

Test Plan: This implementation is directly ripped from cppreference.com, but unit tests added none-the-less. Specifically, one is added where the output is a std::map to demonstrate its usefulness.

Reviewed By: delong.j@fb.com

FB internal diff: D1223401

@override-unit-failures
parent c3633870
/*
* Copyright 2014 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* folly::merge() is an implementation of std::merge with one additonal
* guarantee: if the input ranges overlap, the order that values *from the two
* different ranges* appear in the output is well defined (std::merge only
* guarantees relative ordering is maintained within a single input range).
* This semantic is very useful when the output container removes duplicates
* (such as std::map) to guarantee that elements from b override elements from
* a.
*
* ex. Let's say we have two vector<pair<int, int>> as input, and we are
* merging into a vector<pair<int, int>>. The comparator is returns true if the
* first argument has a lesser 'first' value in the pair.
*
* a = {{1, 1}, {2, 2}, {3, 3}};
* b = {{1, 2}, {2, 3}};
*
* folly::merge<...>(a.begin(), a.end(), b.begin(), b.end(), outputIter) is
* guaranteed to produce {{1, 1}, {1, 2}, {2, 2}, {2, 3}, {3, 3}}. That is,
* if comp(it_a, it_b) == comp(it_b, it_a) == false, we first insert the element
* from a.
*/
#ifndef FOLLY_MERGE_H_
#define FOLLY_MERGE_H_
#include <algorithm>
namespace folly {
template<class InputIt1, class InputIt2, class OutputIt, class Compare>
OutputIt merge(InputIt1 first1, InputIt1 last1,
InputIt2 first2, InputIt2 last2,
OutputIt d_first, Compare comp) {
for (; first1 != last1; ++d_first) {
if (first2 == last2) {
return std::copy(first1, last1, d_first);
}
if (comp(*first2, *first1)) {
*d_first = *first2;
++first2;
} else {
*d_first = *first1;
++first1;
}
}
return std::copy(first2, last2, d_first);
}
template<class InputIt1, class InputIt2, class OutputIt>
OutputIt merge(InputIt1 first1, InputIt1 last1,
InputIt2 first2, InputIt2 last2,
OutputIt d_first) {
for (; first1 != last1; ++d_first) {
if (first2 == last2) {
return std::copy(first1, last1, d_first);
}
if (*first2 < *first1) {
*d_first = *first2;
++first2;
} else {
*d_first = *first1;
++first1;
}
}
return std::copy(first2, last2, d_first);
}
}
#endif
......@@ -617,71 +617,6 @@ inline void swap(sorted_vector_map<K,V,C,A,G>& a,
return a.swap(b);
}
/*
* Efficiently moves all elements from b into a by taking advantage of sorted
* inputs. Any keys that belong to both a and b will have the value from b.
* Assumes that C and A can be constructed using the default constructor.
*
* std::merge cannot be used for this use case because in the event of equal
* keys belonging to both a and b, it undefined which element will be inserted
* into the output map last (and therefore be present in the map).
*/
template<class K, class V, class C, class A, class G>
inline void merge(sorted_vector_map<K,V,C,A,G>& a,
sorted_vector_map<K,V,C,A,G>& b) {
auto size = a.size();
auto it_a = a.begin();
auto it_b = b.begin();
while (it_a != a.end() && it_b != b.end()) {
auto comp = a.key_comp()(it_a->first, it_b->first);
if (!comp) {
if (!a.key_comp()(it_b->first, it_a->first)) {
++it_a;
++it_b;
} else {
++size;
++it_b;
}
} else {
++it_a;
}
}
if (it_b != b.end()) {
size += b.end() - it_b;
}
sorted_vector_map<K,V,C,A,G> c;
c.reserve(size);
it_a = a.begin();
it_b = b.begin();
while (it_a != a.end() && it_b != b.end()) {
auto comp = a.key_comp()(it_a->first, it_b->first);
if (!comp) {
if (!a.key_comp()(it_b->first, it_a->first)) {
c.insert(c.end(), std::move(*it_b));
++it_a;
++it_b;
} else {
c.insert(c.end(), std::move(*it_b));
++it_b;
}
} else {
c.insert(c.end(), std::move(*it_a));
++it_a;
}
}
while (it_a != a.end()) {
c.insert(c.end(), std::move(*it_a));
++it_a;
}
while (it_b != b.end()) {
c.insert(c.end(), std::move(*it_b));
++it_b;
}
a.swap(c);
b.clear();
}
//////////////////////////////////////////////////////////////////////
}
......
......@@ -14,56 +14,55 @@
* limitations under the License.
*/
#include "folly/Format.h"
#include "folly/Merge.h"
#include <gtest/gtest.h>
#include <map>
#include <vector>
#include <glog/logging.h>
TEST(MergeTest, NonOverlapping) {
std::vector<int> a = {0, 2, 4, 6};
std::vector<int> b = {1, 3, 5, 7};
std::vector<int> c;
#include "folly/sorted_vector_types.h"
#include "folly/Benchmark.h"
folly::merge(a.begin(), a.end(),
b.begin(), b.end(),
std::back_inserter(c));
EXPECT_EQ(8, c.size());
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(i, c[i]);
}
}
namespace {
TEST(MergeTest, OverlappingInSingleInputRange) {
std::vector<std::pair<int, int>> a = {{0, 0}, {0, 1}};
std::vector<std::pair<int, int>> b = {{2, 2}, {3, 3}};
std::map<int, int> c;
using folly::sorted_vector_map;
folly::merge(a.begin(), a.end(),
b.begin(), b.end(),
std::inserter(c, c.begin()));
EXPECT_EQ(3, c.size());
sorted_vector_map<int, int> a;
sorted_vector_map<int, int> b;
// First value is inserted, second is not
EXPECT_EQ(c[0], 0);
BENCHMARK(merge_by_setting, iters) {
while (iters--) {
// copy to match merge benchmark
auto a_cpy = a;
auto b_cpy = b;
for (const auto& kv : b_cpy) {
a_cpy[kv.first] = kv.second;
}
}
EXPECT_EQ(c[2], 2);
EXPECT_EQ(c[3], 3);
}
BENCHMARK_RELATIVE(merge, iters) {
while (iters--) {
auto a_cpy = a;
auto b_cpy = b;
merge(a_cpy, b_cpy);
}
}
}
TEST(MergeTest, OverlappingInDifferentInputRange) {
std::vector<std::pair<int, int>> a = {{0, 0}, {1, 1}};
std::vector<std::pair<int, int>> b = {{0, 2}, {3, 3}};
std::map<int, int> c;
// Benchmark results on my dev server (Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz)
//
// ============================================================================
// folly/test/SortedVectorBenchmark.cpp relative time/iter iters/s
// ============================================================================
// merge_by_setting 482.01us 2.07K
// merge 2809.19% 17.16us 58.28K
// ============================================================================
folly::merge(a.begin(), a.end(),
b.begin(), b.end(),
std::inserter(c, c.begin()));
EXPECT_EQ(3, c.size());
int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
for (int i = 0; i < 1000; ++i) {
a[2 * i] = 2 * i;
b[2 * i + 1] = 2 * i + 1;
}
// Value from a is inserted, value from b is not.
EXPECT_EQ(c[0], 0);
folly::runBenchmarks();
return 0;
EXPECT_EQ(c[1], 1);
EXPECT_EQ(c[3], 3);
}
......@@ -301,29 +301,3 @@ TEST(SortedVectorTest, EmptyTest) {
EXPECT_TRUE(emptyMap.lower_bound(10) == emptyMap.end());
EXPECT_TRUE(emptyMap.find(10) == emptyMap.end());
}
TEST(SortedVectorTest, MergeTest) {
sorted_vector_map<int, int> a;
a[0] = 0;
a[1] = 1;
a[5] = 5;
a[10] = 10;
sorted_vector_map<int, int> b;
b[0] = 10;
b[3] = 13;
b[7] = 17;
b[11] = 111;
merge(a, b);
EXPECT_TRUE(b.empty());
EXPECT_EQ(a.size(), 7);
EXPECT_EQ(a[0], 10);
EXPECT_EQ(a[1], 1);
EXPECT_EQ(a[3], 13);
EXPECT_EQ(a[5], 5);
EXPECT_EQ(a[7], 17);
EXPECT_EQ(a[10], 10);
EXPECT_EQ(a[11], 111);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment