All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
icarusalg/icarusalg/Utilities/SimpleClustering.h
Go to the documentation of this file.
1 /**
2  * @file icarusalg/Utilities/SimpleClustering.h
3  * @brief Algorithms to cluster objects according to specified criteria.
4  * @author Gianluca Petrillo (petrillo@slac.stanford.edu)
5  * @date January 7, 2020
6  *
7  * This is a header-only library.
8  */
9 
10 #ifndef ICARUSALG_UTILITIES_SIMPLECLUSTERING_H
11 #define ICARUSALG_UTILITIES_SIMPLECLUSTERING_H
12 
13 // C/C++ standard libraries
14 #include <vector>
15 #include <algorithm> // std::transform(), std::sort()
16 #include <utility> // std::pair, std::move(), std::declval()
17 #include <type_traits> // std::decay_t
18 #include <cstddef> // std::size_t
19 
20 
21 // -----------------------------------------------------------------------------
22 namespace util {
23 
24  /**
25  * @brief Performs a simple clustering.
26  * @tparam Coll type of collection of objects to cluster
27  * @tparam KeyOp type of operation extracting the relevant key for clustering
28  * @tparam CmpOp type of operation determining if object belongs to a cluster
29  * @tparam RefOp type of operation extracting the object to store in cluster
30  * @tparam KeySortOp type of operation sorting the clustering keys
31  * @param coll collection of objects to cluster
32  * @param keyFunc operation extracting the relevant key for clustering
33  * @param sameGroup operation determining if an object belongs to a cluster
34  * @param objRef operation extracting the object to store in the cluster
35  * @param keySort operation sorting the clustering keys
36  * @return a STL vector of clusters of object "references"
37  *
38  * The algorithm clusters objects whose key is compatible.
39  * The key must be sortable (`keySort`). Each cluster is assigned a key value,
40  * and all the unclustered objects whose key (`keyFunc`) is compatible
41  * (`sameGroup`) with that cluster key are added to that cluster.
42  * The objects from `coll` are considered in order of key value, with the
43  * order being defined by `keySort`.
44  * Each cluster contains a derivative of the original object (`objRef`), which
45  * may be for example a pointer to the original object, a copy of it, or
46  * in fact anything `objRef` returns.
47  *
48  * The return value is in the form of a `std::vector` in which each element
49  * represents a cluster. Each of these clusters is a `std::vector` of
50  * the derivative objects.
51  *
52  *
53  * Requirements
54  * -------------
55  *
56  * * `Coll`: an iterable collection of objects of type `Object_t`;
57  * * `KeyOp`: a functor returning the key of an object, like
58  * `Key_t keyFunc(Object_t const&)`;
59  * * `KeySortOp`: a functor like `bool keySort(Key_t a, Key_t b)`
60  * returning strict ordering between key values `a` and `b`;
61  * * `CmpOp`: a functor like `bool keySort(Key_t a, Key_t b)` returning if
62  * an object with key `b` should belong to a cluster with key `a`;
63  * * `RefOp`: a functor returning the object to store in the cluster starting
64  * from an object in the original collection: `ObjRef_t objRef(Object_t)`;
65  * note that the result should probably *not* be a C++ reference since
66  * they don't go along well with containers.
67  *
68  */
69  template <
70  typename Coll,
71  typename KeyOp, typename CmpOp, typename RefOp, typename KeySortOp
72  >
73  auto clusterBy(
74  Coll const& objs,
75  KeyOp keyFunc, CmpOp sameGroup, RefOp objRef, KeySortOp keySort
76  );
77 
78  /// A version of `clusterBy()` storing a copy of each object as "reference".
79  template <typename Coll, typename KeyOp, typename CmpOp, typename KeySortOp>
80  auto clusterBy
81  (Coll const& objs, KeyOp keyFunc, CmpOp sameGroup, KeySortOp keySort);
82 
83  // ---------------------------------------------------------------------------
84 
85 } // namespace util
86 
87 
88 
89 // -----------------------------------------------------------------------------
90 // --- template implementation
91 // -----------------------------------------------------------------------------
92 namespace util::details {
93 
94  template <std::size_t I, typename KeySort>
95  struct TupleElementOp {
96  KeySort sorter;
97 
98  TupleElementOp(KeySort keySort): sorter(keySort) {}
99 
100  template <typename A, typename B>
101  auto operator() (A&& a, B&& b) const
102  { return sorter(std::forward<A>(a), std::forward<B>(b)); }
103 
104  }; // TupleElementOp<>
105 
106  template <std::size_t I, typename KeySort>
108  { return { keySort }; }
109 
110 } // namespace util::details
111 
112 
113 // -----------------------------------------------------------------------------
114 template <
115  typename Coll,
116  typename KeyOp, typename CmpOp, typename RefOp, typename KeySortOp
117  >
119  Coll const& objs,
120  KeyOp keyFunc, CmpOp sameGroup, RefOp objRef, KeySortOp keySort
121 ) {
122 
123  /*
124  * 1. create a list of object key (`keyFunc`) and object reference (`objRef`)
125  * 2. sort that list by key (`keySort`)
126  * 3. cluster object references with nearby keys (`sameGroup`)
127  *
128  * A new cluster is started every time for a new object `sameGroup` return
129  * `false` when applied on the key of the first object in the current cluster
130  * and the key of the new object.
131  *
132  */
133 
134  //
135  // some definitions
136  //
137  using Object_t = typename Coll::value_type;
138  using ObjectRef_t = std::decay_t<decltype(objRef(std::declval<Object_t>()))>;
139  using Cluster_t = std::vector<ObjectRef_t>;
140  using Clusters_t = std::vector<Cluster_t>;
141 
142  auto makeKeyAndObjRef = [keyFunc, objRef](auto&& obj)
143  { return std::make_pair(keyFunc(obj), objRef(obj)); };
144 
145  using KeyAndObjRef_t = decltype(makeKeyAndObjRef(*(util::begin(objs))));
146 
147  //
148  // working data: "map" of objects by key
149  //
150  std::vector<KeyAndObjRef_t> KeysAndObjRefs;
151  KeysAndObjRefs.reserve(objs.size());
153  util::begin(objs), util::end(objs), std::back_inserter(KeysAndObjRefs),
154  makeKeyAndObjRef
155  );
156 
157  // sort the map by key
158  std::sort(
159  util::begin(KeysAndObjRefs), util::end(KeysAndObjRefs),
160  details::makeTupleElementOp<0U>(keySort)
161  );
162 
163  //
164  // cluster the objects in the map by key proximity
165  //
166  Clusters_t clusters;
167  if (KeysAndObjRefs.empty()) return clusters;
168 
169  auto iKeyAndObjRef = util::begin(KeysAndObjRefs);
170  auto const end = util::end(KeysAndObjRefs);
171 
172  auto startCluster = [](auto& keyAndObjRef)
173  {
174  return
175  std::make_pair(keyAndObjRef.first, Cluster_t{ keyAndObjRef.second });
176  };
177  auto addToCluster = [](auto& cluster, auto& keyAndObjRef)
178  { cluster.second.push_back(std::move(keyAndObjRef.second)); };
179 
180  auto currentCluster = startCluster(*iKeyAndObjRef);
181  while(++iKeyAndObjRef != end) {
182 
183  if (sameGroup(iKeyAndObjRef->first, currentCluster.first)) {
184  addToCluster(currentCluster, *iKeyAndObjRef);
185  }
186  else {
187  clusters.push_back(std::move(currentCluster.second));
188  currentCluster = startCluster(*iKeyAndObjRef);
189  }
190 
191  } // while
192  clusters.push_back(std::move(currentCluster.second));
193 
194  return clusters;
195 
196 } // util::clusterBy(Coll, KeyOp, CmpOp, RefOp, KeySortOp)
197 
198 
199 // -----------------------------------------------------------------------------
200 template <typename Coll, typename KeyOp, typename CmpOp, typename KeySortOp>
201 auto util::clusterBy
202  (Coll const& objs, KeyOp keyFunc, CmpOp sameGroup, KeySortOp keySort)
203 {
204  return clusterBy(objs,
205  std::move(keyFunc), std::move(sameGroup),
206  [](auto const& obj){ return obj; },
207  std::move(keySort)
208  );
209 } // util::clusterBy(Coll, KeyOp, CmpOp, KeySortOp)
210 
211 
212 // -----------------------------------------------------------------------------
213 
214 #endif // ICARUSALG_UTILITIES_SIMPLECLUSTERING_H
static constexpr Sample_t transform(Sample_t sample)
TupleElementOp< I, KeySort > makeTupleElementOp(KeySort keySort)
process_name cluster
Definition: cheaterreco.fcl:51
process_name gaushit a
decltype(auto) constexpr end(T &&obj)
ADL-aware version of std::end.
Definition: StdUtils.h:77
auto end(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:585
auto clusterBy(Coll const &objs, KeyOp keyFunc, CmpOp sameGroup, RefOp objRef, KeySortOp keySort)
Performs a simple clustering.
decltype(auto) constexpr begin(T &&obj)
ADL-aware version of std::begin.
Definition: StdUtils.h:72
float A
Definition: dedx.py:137