Commit df324b0a authored by Tom Jackson's avatar Tom Jackson Committed by Sara Golemon

GroupBy

Reviewed By: @ot

Differential Revision: D2188740
parent 17751e04
......@@ -37,6 +37,56 @@ struct ArgumentReference
T&& // int -> int&&
>::type> {};
template <class Key, class Value>
class Group : public GenImpl<Value&&, Group<Key, Value>> {
public:
static_assert(!std::is_reference<Key>::value &&
!std::is_reference<Value>::value,
"Key and Value must be decayed types");
typedef std::vector<Value> VectorType;
typedef Key KeyType;
typedef Value ValueType;
Group(Key key, VectorType values)
: key_(std::move(key)), values_(std::move(values)) {}
const Key& key() const { return key_; }
size_t size() const { return values_.size(); }
const VectorType& values() const { return values_; }
VectorType& values() { return values_; }
VectorType operator|(const detail::Collect<VectorType>&) const {
return values();
}
VectorType operator|(const detail::CollectTemplate<std::vector>&) const {
return values();
}
template <class Body>
void foreach(Body&& body) const {
for (auto& value : values_) {
body(std::move(value));
}
}
template <class Handler>
bool apply(Handler&& handler) const {
for (auto& value : values_) {
if (!handler(std::move(value))) {
return false;
}
}
return true;
}
private:
Key key_;
mutable VectorType values_;
};
namespace detail {
/*
......@@ -941,6 +991,79 @@ class Order : public Operator<Order<Selector, Comparer>> {
}
};
/**
* GroupBy - Group values by a given key selector, producing a sequence of
* groups.
*
* This type is usually used through the 'groupBy' helper function, like:
*
* auto bests
* = from(places)
* | groupBy([](const Place& p) {
* return p.city;
* })
* | [](Group<std::string, Place>&& g) {
* cout << g.key() << ": " << (g | first).description;
* };
*/
template <class Selector>
class GroupBy : public Operator<GroupBy<Selector>> {
Selector selector_;
public:
GroupBy() {}
explicit GroupBy(Selector selector) : selector_(std::move(selector)) {}
template <class Value,
class Source,
class ValueDecayed = typename std::decay<Value>::type,
class Key = typename std::result_of<Selector(Value)>::type,
class KeyDecayed = typename std::decay<Key>::type>
class Generator
: public GenImpl<
Group<KeyDecayed, ValueDecayed>&&,
Generator<Value, Source, ValueDecayed, Key, KeyDecayed>> {
static_assert(!Source::infinite, "Cannot group infinite source!");
Source source_;
Selector selector_;
public:
Generator(Source source, Selector selector)
: source_(std::move(source)), selector_(std::move(selector)) {}
typedef Group<KeyDecayed, ValueDecayed> GroupType;
template <class Handler>
bool apply(Handler&& handler) const {
std::unordered_map<KeyDecayed, typename GroupType::VectorType> groups;
source_ | [&](Value value) {
const Value& cv = value;
auto& group = groups[selector_(cv)];
group.push_back(std::forward<Value>(value));
};
for (auto& kg : groups) {
GroupType group(kg.first, std::move(kg.second));
if (!handler(std::move(group))) {
return false;
}
kg.second.clear();
}
return true;
}
};
template <class Source, class Value, class Gen = Generator<Value, Source>>
Gen compose(GenImpl<Value, Source>&& source) const {
return Gen(std::move(source.self()), selector_);
}
template <class Source, class Value, class Gen = Generator<Value, Source>>
Gen compose(const GenImpl<Value, Source>& source) const {
return Gen(source.self(), selector_);
}
};
/*
* TypeAssertion - For verifying the exact type of the value produced by a
* generator. Useful for testing and debugging, and acts as a no-op at runtime.
......@@ -1897,7 +2020,7 @@ class Indirect : public Operator<Indirect> {
explicit Generator(Source source) : source_(std::move(source)) {}
template <class Body>
void foreach (Body&& body) const {
void foreach(Body&& body) const {
source_.foreach([&](Value value) {
return body(&value);
});
......
......@@ -17,18 +17,19 @@
#ifndef FOLLY_GEN_BASE_H
#define FOLLY_GEN_BASE_H
#include <algorithm>
#include <functional>
#include <memory>
#include <random>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <algorithm>
#include <random>
#include <vector>
#include <unordered_set>
#include <folly/Range.h>
#include <folly/Optional.h>
#include <folly/Conv.h>
#include <folly/Optional.h>
#include <folly/Range.h>
#include <folly/gen/Core.h>
/**
......@@ -241,6 +242,9 @@ class To<StringPiece> {
}
};
template<class Key, class Value>
class Group;
namespace detail {
template<class Self>
......@@ -326,6 +330,9 @@ class Skip;
template<class Selector, class Comparer = Less>
class Order;
template<class Selector>
class GroupBy;
template<class Selector>
class Distinct;
......@@ -640,6 +647,12 @@ Order orderByDescending(Selector selector = Selector()) {
return Order(std::move(selector));
}
template<class Selector,
class GroupBy = detail::GroupBy<Selector>>
GroupBy groupBy(Selector selector = Identity()) {
return GroupBy(std::move(selector));
}
template<class Selector = Identity,
class Distinct = detail::Distinct<Selector>>
Distinct distinctBy(Selector selector = Selector()) {
......
......@@ -1106,6 +1106,28 @@ TEST(Gen, Just) {
}
}
TEST(Gen, GroupBy) {
vector<string> strs{"zero", "one", "two", "three", "four",
"five", "six", "seven", "eight", "nine"};
auto gb = from(strs) | groupBy([](const string& str) { return str.size(); });
EXPECT_EQ(10, gb | mapOp(count) | sum);
EXPECT_EQ(3, gb | count);
vector<string> mode{"zero", "four", "five", "nine"};
EXPECT_EQ(
mode,
gb | maxBy([](const Group<size_t, string>& g) { return g.size(); })
| as<vector>());
vector<string> largest{"three", "seven", "eight"};
EXPECT_EQ(
largest,
gb | maxBy([](const Group<size_t, string>& g) { return g.key(); })
| as<vector>());
}
int main(int argc, char *argv[]) {
testing::InitGoogleTest(&argc, argv);
gflags::ParseCommandLineFlags(&argc, &argv, true);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment