All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBScanAlg.cxx
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 //
3 // \file DBScanAlg.cxx
4 //
5 // kinga.partyka@yale.edu
6 //
7 // This algorithm finds clusters of hits, they can be of arbitrary shape.You need to specify 2(3) parameters:
8 // epsilon, epsilon2 and MinPoints as explained in the corresponding xml file.In my comments a 'point' reference
9 // appears quite often. A 'point' is basically a simple hit which only contains wire and time information. This
10 // algorithm is based on DBSCAN(Density Based Spatial Clustering of Applications with Noise): M. Ester, H.-P. Kriegel,
11 // J. Sander, and X. Xu, A density-based algorithm for discovering clusters in large spatial databases with noise,
12 // Second International Conference on Knowledge Discovery and Data Mining, pp. 226-231, AAAI Press. 1996.
13 // ( Some of this code is from "Antonio Gulli's coding playground")
14 ////////////////////////////////////////////////////////////////////////
15 
16 //Framework includes:
17 #include "art/Framework/Services/Registry/ServiceHandle.h"
18 #include "fhiclcpp/ParameterSet.h"
19 #include "messagefacility/MessageLogger/MessageLogger.h"
20 
23 #include "larcorealg/CoreUtils/NumericUtils.h" // util::absDiff()
27 #include "RStarTree/RStarBoundingBox.h"
29 
30 #include <cmath>
31 #include <cstdlib>
32 
33 //----------------------------------------------------------
34 // RStarTree stuff
35 //----------------------------------------------------------
38 {
39  BoundingBox bb;
40  bb.edges[0].first = x - std::abs(dx);
41  bb.edges[0].second = x + std::abs(dx);
42 
43  bb.edges[1].first = y - std::abs(dy);
44  bb.edges[1].second = y + std::abs(dy);
45  return bb;
46 }
47 
48 //----------------------------------------------------------
49 // Set Visitor
50 //
51 // collects accepted leafs in the std::set sResult and the std::vector
52 // vResult
53 struct Visitor {
54  unsigned int count;
55  std::vector<unsigned int> vResult;
56  std::set<unsigned int> sResult;
57  const bool ContinueVisiting;
59  void
60  operator()(const RTree::Leaf* const leaf)
61  {
62  vResult.push_back(leaf->leaf);
63  sResult.insert(leaf->leaf);
64  count++;
65  }
66 };
67 
68 //----------------------------------------------------------
69 // Ellipse acceptor
70 //
71 // Roughly quivalent to what FindNeighbors was doing before, except
72 // that it doesn't handle dead wires
73 struct AcceptEllipse {
75  double r[2];
76  double c[2]; ///< center of the bounding box
77  double d[2]; ///< half
78  explicit AcceptEllipse(const BoundingBox& b, double r1, double r2) : m_bound(b), r(), c()
79  {
80  r[0] = r1;
81  r[1] = r2;
82  c[0] = (m_bound.edges[0].second + m_bound.edges[0].first) / 2.0;
83  c[1] = (m_bound.edges[1].second + m_bound.edges[1].first) / 2.0;
84  d[0] = (m_bound.edges[0].second - m_bound.edges[0].first) / 2.0;
85  d[1] = (m_bound.edges[1].second - m_bound.edges[1].first) / 2.0;
86  }
87  bool
88  operator()(const RTree::Node* const node) const
89  {
90  // At the node level we use a rectangualr overlap condition
91  return m_bound.overlaps(node->bound);
92  }
93  bool
94  operator()(const RTree::Leaf* const leaf) const
95  {
96  // At the leaf level we have to more careful
97  double C[2], D[2];
98  C[0] = (leaf->bound.edges[0].second + leaf->bound.edges[0].first) / 2.0;
99  C[1] = (leaf->bound.edges[1].second + leaf->bound.edges[1].first) / 2.0;
100  D[0] = (leaf->bound.edges[0].second - leaf->bound.edges[0].first) / 2.0;
101  D[1] = (leaf->bound.edges[1].second - leaf->bound.edges[1].first) / 2.0;
102  double t = 0;
103  for (int i = 0; i < 2; ++i) {
104  // This is only approximate, it will accept a few classes of
105  // non-overlapping ellipses
106  t += ((c[i] - C[i]) * (c[i] - C[i])) / ((d[i] + D[i]) * (d[i] + D[i]));
107  }
108  return (t < 1);
109  }
110 
111 private:
112  static const BoundingBox EmptyBoundingBox; // for uninitialized bounds
113 
115  {
116  r[0] = r[1] = 1.0;
117  c[0] = (m_bound.edges[0].second - m_bound.edges[0].first) / 2.0;
118  c[1] = (m_bound.edges[1].second - m_bound.edges[1].first) / 2.0;
119  }
120 };
121 
123 
124 //----------------------------------------------------------
125 // FindNeighbors acceptor
126 //
127 // Exactly equivalent to what FindNeighbors was doing before (assuming
128 // that points are preresented with no width in wire-space and with
129 // width in time-space
130 //
131 // We're going to make this work with nodal bounding boxes by
132 // accepting on center-inside the box or comparing to the nearest
133 // point on the edge using the point-to-point comparison function and
134 // a the maximum time-width.
137  double fEps[2];
138  double fMaxWidth;
139  double fWireDist;
140  std::vector<unsigned int>& fBadWireSum;
142  double eps,
143  double eps2,
144  double maxWidth,
145  double wireDist,
146  std::vector<unsigned int>& badWireSum)
147  : fBound(b), fEps(), fMaxWidth(maxWidth), fWireDist(wireDist), fBadWireSum(badWireSum)
148  {
149  fEps[0] = eps;
150  fEps[1] = eps2;
151  }
152  // return a point-like box at the center of b
154  center(const BoundingBox& b) const
155  {
156  BoundingBox c;
157  c.edges[0].first = c.edges[0].second = (b.edges[0].first + b.edges[0].second) / 2.0;
158  c.edges[1].first = c.edges[1].second = (b.edges[1].first + b.edges[1].second) / 2.0;
159  return c;
160  }
162  center() const
163  {
164  return center(fBound);
165  }
166  // Here we implement the findNeighbor algorithm from point to point
167  bool
168  isNear(const BoundingBox& b) const
169  {
170  // Precomupation of a few things...box centers, wire bridging
171  // quantities, etc...
172  double bCenter0 = center(b).edges[0].first;
173  double bCenter1 = center(b).edges[1].first;
174  double tCenter0 = center().edges[0].first; // "t" is for test-point
175  double tCenter1 = center().edges[1].first;
176  // widths in the time direction
177  double bWidth = std::abs(b.edges[1].second - b.edges[1].first);
178  double tWidth = std::abs(fBound.edges[1].second - fBound.edges[1].first);
179  // bad channel counting
180  unsigned int wire1 = (unsigned int)(tCenter0 / fWireDist + 0.5);
181  unsigned int wire2 = (unsigned int)(bCenter0 / fWireDist + 0.5);
182  // Clamp the wize number to something resonably.
183  ///\todo activating these should throw a warning or something
184  if (wire1 < fBadWireSum.size()) wire1 = fBadWireSum.size();
185  if (wire2 < fBadWireSum.size()) wire2 = fBadWireSum.size();
186  // The getSimilarity[2] wirestobridge calculation is asymmetric,
187  // but is plugged into the cache symmetrically.I am assuming that
188  // this is OK because the wires that are hit cannot be bad.
189  unsigned int wirestobridge = util::absDiff(fBadWireSum[wire1], fBadWireSum[wire2]);
190  double cmtobridge = wirestobridge * fWireDist;
191 
192  double sim = std::abs(tCenter0 - bCenter0) - cmtobridge;
193  sim *= sim; // square it
194 
195  if (std::abs(tCenter0 - bCenter0) > 1e-10) {
196  cmtobridge *= std::abs((tCenter1 - bCenter1) / (tCenter0 - bCenter0));
197  }
198  double sim2 = std::abs(tCenter1 - bCenter1) - cmtobridge;
199  sim2 *= sim2; // square it
200 
201  double k = 0.1;
202  double WFactor = (exp(4.6 * ((tWidth * tWidth) + (bWidth * bWidth)))) * k; // ??
203  // We clamp WFactor on [ 1.0, 6.25 ]
204  if (WFactor < 1.0) WFactor = 1.0;
205  if (WFactor > 6.25) WFactor = 6.25;
206 
207  // Now we implement the test...see FindNeighbors
208  return (((sim) / (fEps[0] * fEps[0])) + ((sim2) / (fEps[1] * fEps[1] * (WFactor))) <= 1);
209  }
211  nearestPoint(const BoundingBox& b) const
212  {
213  BoundingBox n;
214  BoundingBox c = center();
215  for (int i = 0; i < 2; ++i) {
216  // The work for finding the nearest point is the same in both
217  // dimensions
218  if (b.edges[i].first > c.edges[i].second) {
219  // Our point is lower than the low edge of the box
220  n.edges[i].first = n.edges[i].second = b.edges[i].first;
221  }
222  else if (b.edges[0].second < c.edges[0].first) {
223  // Our point is higher than the high edge of the box
224  n.edges[i].first = n.edges[i].second = b.edges[i].second;
225  }
226  else {
227  // In this dimension our point lies within the boxes bounds
228  n.edges[i].first = n.edges[i].second = c.edges[i].first;
229  }
230  }
231  // Now give the time dimension a width
232  n.edges[1].first -= fMaxWidth / 2.0;
233  n.edges[1].second += fMaxWidth / 2.0;
234  return n;
235  }
236  bool
237  operator()(const RTree::Node* const node) const
238  {
239  // if the our point overlaps the bounding box, accept immediately
240  if (fBound.overlaps(node->bound)) return true;
241  // No overlap, so compare to the nearest point on the bounding box
242  // under the assumption that the maximum width applies for that
243  // point
244  return isNear(nearestPoint(node->bound));
245  }
246  bool
247  operator()(const RTree::Leaf* const leaf) const
248  {
249  return isNear(leaf->bound);
250  }
251 };
252 
253 namespace cluster {
254  const unsigned int kNO_CLUSTER = UINT_MAX;
255  const unsigned int kNOISE_CLUSTER = UINT_MAX - 1;
256 }
257 
258 //----------------------------------------------------------
259 // DBScanAlg stuff
260 //----------------------------------------------------------
261 cluster::DBScanAlg::DBScanAlg(fhicl::ParameterSet const& p)
262 {
263  fEps = p.get<double>("eps");
264  fEps2 = p.get<double>("epstwo");
265  fMinPts = p.get<int>("minPts");
266  fClusterMethod = p.get<int>("Method");
267  fDistanceMetric = p.get<int>("Metric");
268 }
269 
270 //----------------------------------------------------------
271 void
274  const std::vector<art::Ptr<recob::Hit>>& allhits,
275  std::set<uint32_t> badChannels,
276  const std::vector<geo::WireID>& wireids)
277 {
278  if (wireids.size() && wireids.size() != allhits.size()) {
279  throw cet::exception("DBScanAlg") << "allhits size = " << allhits.size()
280  << " wireids size = " << wireids.size() << " do not match\n";
281  }
282  // clear all the data member vectors for the new set of hits
283  fps.clear();
284  fpointId_to_clusterId.clear();
285  fnoise.clear();
286  fvisited.clear();
287  fsim.clear();
288  fsim2.clear();
289  fsim3.clear();
290  fclusters.clear();
291  fWirePitch.clear();
292 
293  fBadChannels = badChannels;
294  fBadWireSum.clear();
295 
296  // Clear the RTree
297  fRTree.Remove(RTree::AcceptAny(), RTree::RemoveLeaf());
298  // and the bounds list
299  fRect.clear();
300 
301  //------------------------------------------------------------------
302  // Determine spacing between wires (different for each detector)
303  ///get 2 first wires and find their spacing (wire_dist)
304 
305  art::ServiceHandle<geo::Geometry const> geom;
306 
307  for (size_t p = 0; p < geom->Nplanes(); ++p)
308  fWirePitch.push_back(geom->WirePitch(p));
309 
310  // Collect the bad wire list into a useful form
311  if (fClusterMethod) { // Using the R*-tree
312  fBadWireSum.resize(geom->Nchannels());
313  unsigned int count = 0;
314  for (unsigned int i = 0; i < fBadWireSum.size(); ++i) {
315  count += fBadChannels.count(i);
316  fBadWireSum[i] = count;
317  }
318  }
319 
320  // Collect the hits in a useful form,
321  // and take note of the maximum time width
322  fMaxWidth = 0.0;
323  for (unsigned int j = 0; j < allhits.size(); ++j) {
324  int dims = 3; //our point is defined by 3 elements:wire#,center of the hit, and the hit width
325  std::vector<double> p(dims);
326 
327  double tickToDist = detProp.DriftVelocity(detProp.Efield(), detProp.Temperature());
328  tickToDist *= 1.e-3 * sampling_rate(clockData); // 1e-3 is conversion of 1/us to 1/ns
329  if (!wireids.size())
330  p[0] = (allhits[j]->WireID().Wire) * fWirePitch[allhits[j]->WireID().Plane];
331  else
332  p[0] = (wireids[j].Wire) * fWirePitch[allhits[j]->WireID().Plane];
333  p[1] = allhits[j]->PeakTime() * tickToDist;
334  p[2] = 2. * allhits[j]->RMS() * tickToDist; //width of a hit in cm
335 
336  // check on the maximum width condition
337  if (p[2] > fMaxWidth) fMaxWidth = p[2];
338 
339  fps.push_back(p);
340 
341  if (fClusterMethod) { // Using the R*-tree
342  // Convert these same values into dbsPoints to feed into the R*-tree
343  dbsPoint pp(p[0], p[1], 0.0, p[2] / 2.0); // note dividing by two
344  fRTree.Insert(j, pp.bounds());
345  // Keep a parallel list already made up. We could use fps instead, but...
346  fRect.push_back(pp);
347  }
348  }
349 
350  fpointId_to_clusterId.resize(fps.size(), kNO_CLUSTER); // Not zero as before!
351  fnoise.resize(fps.size(), false);
352  fvisited.resize(fps.size(), false);
353 
354  if (fClusterMethod) { // Using the R*-tree
355  Visitor visitor = fRTree.Query(RTree::AcceptAny(), Visitor());
356  mf::LogInfo("DBscan") << "InitScan: hits RTree loaded with " << visitor.count << " items.";
357  }
358  mf::LogInfo("DBscan") << "InitScan: hits vector size is " << fps.size();
359 
360  return;
361 }
362 
363 //----------------------------------------------------------
364 double
365 cluster::DBScanAlg::getSimilarity(const std::vector<double> v1, const std::vector<double> v2)
366 {
367 
368  //for Euclidean distance comment everything out except this-->>>
369  // return std::sqrt((v2[1]-v1[1])*(v2[1]-v1[1])+(v2[0]-v1[0])*(v2[0]-v1[0]));
370  //------------------------------------------------------------------------
371  // return std::abs( v2[0]-v1[0]); //for rectangle
372  //----------------------------------------------------------------------
373  //Manhattan distance:
374  //return std::abs(v1[0]-v2[0])+std::abs(v1[1]-v2[1]);
375 
376  /// \todo this code assumes that all planes have the same wire pitch
377  double wire_dist = fWirePitch[0];
378 
379  unsigned int wire1 =
380  (unsigned int)(v1[0] / wire_dist + 0.5); //to make sure to get desired integer
381  unsigned int wire2 = (unsigned int)(v2[0] / wire_dist + 0.5);
382  int wirestobridge = 0;
383 
384  if (wire1 > wire2) {
385  unsigned int wire = wire1;
386  wire1 = wire2;
387  wire2 = wire;
388  }
389 
390  for (unsigned int i = wire1; i < wire2; i++) {
391  if (fBadChannels.find(i) != fBadChannels.end()) wirestobridge++;
392  }
393 
394  double cmtobridge = wirestobridge * wire_dist;
395  //---------------------------------------------------------------------
396  return ((std::abs(v2[0] - v1[0]) - cmtobridge) *
397  (std::abs(v2[0] - v1[0]) - cmtobridge)); //for ellipse
398 }
399 
400 //----------------------------------------------------------------
401 double
402 cluster::DBScanAlg::getSimilarity2(const std::vector<double> v1, const std::vector<double> v2)
403 {
404 
405  //-------------------------------------------
406  //return std::abs( v2[1]-v1[1]);//for rectangle
407  //------------------------------------------
408 
409  /// \todo this code assumes all planes have the same wire pitch
410  double wire_dist = fWirePitch[0];
411 
412  unsigned int wire1 =
413  (unsigned int)(v1[0] / wire_dist + 0.5); //to make sure to get desired integer
414  unsigned int wire2 = (unsigned int)(v2[0] / wire_dist + 0.5);
415  int wirestobridge = 0;
416 
417  if (wire1 > wire2) {
418  unsigned int wire = wire1;
419  wire1 = wire2;
420  wire2 = wire;
421  }
422 
423  for (unsigned int i = wire1; i < wire2; i++) {
424  if (fBadChannels.find(i) != fBadChannels.end()) wirestobridge++;
425  }
426 
427  double cmtobridge = wirestobridge * wire_dist;
428 
429  if (std::abs(v2[0] - v1[0]) > 1e-10) {
430  cmtobridge *= std::abs((v2[1] - v1[1]) / (v2[0] - v1[0]));
431  }
432  else
433  cmtobridge = 0;
434 
435  return ((std::abs(v2[1] - v1[1]) - cmtobridge) *
436  (std::abs(v2[1] - v1[1]) - cmtobridge)); //for ellipse
437 }
438 
439 //----------------------------------------------------------------
440 double
441 cluster::DBScanAlg::getWidthFactor(const std::vector<double> v1, const std::vector<double> v2)
442 {
443 
444  //double k=0.13; //this number was determined by looking at flat muon hits' widths.
445  //The average width of these hits in cm is 0.505, so 4*2*(w1^2)=2.04
446  //where w1=w2=0.505, e^2.044= 7.69. In order not to change the distance
447  //in time direction of the ellipse we want to make it equal to 1 for
448  //these hits. Thus the k factor is k=1/7.69=0.13//for coeff=4
449 
450  //..................................................
451  double k = 0.1; //for 4.5 coeff
452  double WFactor = (exp(4.6 * ((v1[2] * v1[2]) + (v2[2] * v2[2])))) * k;
453  //........................................................
454  // Let's try something different:
455  if (WFactor > 1) {
456  if (WFactor < 6.25)
457  return WFactor; //remember that we are increasing the distance in
458  //eps2 as std::sqrt of this number (i.e std::sqrt(6.25))
459  else
460  return 6.25;
461  }
462  else
463  return 1.0;
464 }
465 
466 //----------------------------------------------------------------
467 //\todo this is O(n) in the number of hits, while the high performance
468 // claimed for DBSCAN relies on it being O(log n)!
469 std::vector<unsigned int>
470 cluster::DBScanAlg::findNeighbors(unsigned int pid, double threshold, double threshold2)
471 {
472  std::vector<unsigned int> ne;
473 
474  for (int unsigned j = 0; j < fsim.size(); j++) {
475  if ((pid != j) &&
476  (((fsim[pid][j]) / (threshold * threshold)) +
477  ((fsim2[pid][j]) / (threshold2 * threshold2 * (fsim3[pid][j])))) < 1) { //ellipse
478  ne.push_back(j);
479  }
480  } // end loop over fsim
481 
482  return ne;
483 }
484 
485 //-----------------------------------------------------------------
486 void
488 {
489  int size = fps.size();
490  fsim.resize(size, std::vector<double>(size));
491  for (int i = 0; i < size; i++) {
492  for (int j = i + 1; j < size; j++) {
493  fsim[j][i] = fsim[i][j] = getSimilarity(fps[i], fps[j]);
494  }
495  }
496 }
497 
498 //------------------------------------------------------------------
499 void
501 {
502  int size = fps.size();
503  fsim2.resize(size, std::vector<double>(size));
504  for (int i = 0; i < size; i++) {
505  for (int j = i + 1; j < size; j++) {
506  fsim2[j][i] = fsim2[i][j] = getSimilarity2(fps[i], fps[j]);
507  }
508  }
509 }
510 
511 //------------------------------------------------------------------
512 void
514 {
515  int size = fps.size();
516  fsim3.resize(size, std::vector<double>(size));
517 
518  for (int i = 0; i < size; i++) {
519  for (int j = i + 1; j < size; j++) {
520  fsim3[j][i] = fsim3[i][j] = getWidthFactor(fps[i], fps[j]);
521  }
522  }
523 }
524 
525 //----------------------------------------------------------------
526 /////////////////////////////////////////////////////////////////
527 // This is the algorithm that finds clusters:
528 // Run the selected clustering algorithm
529 void
531 {
532  switch (fClusterMethod) {
533  case 2: return run_dbscan_cluster();
534  case 1: return run_FN_cluster();
535  default:
536  computeSimilarity(); // watch out for this, they are *slow*
537  computeSimilarity2(); // "
538  computeWidthFactor(); // "
539  return run_FN_naive_cluster();
540  }
541 }
542 
543 //----------------------------------------------------------------
544 /////////////////////////////////////////////////////////////////
545 // This is the algorithm that finds clusters:
546 //
547 // DWM's implementation of DBScanAlg as much like the paper as possible
548 void
550 {
551  unsigned int cid = 0;
552  // foreach pid
553  for (size_t pid = 0; pid < fps.size(); pid++) {
554  // not already visited
555  if (fpointId_to_clusterId[pid] == kNO_CLUSTER) {
556  if (ExpandCluster(pid, cid)) { cid++; }
557  } // if (!visited
558  } // for
559  // END DBSCAN
560 
561  // Construct clusters, count noise, etc..
562  int noise = 0;
563  fclusters.resize(cid);
564  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
565  if (fpointId_to_clusterId[y] == kNO_CLUSTER) {
566  // This shouldn't happen...all points should be clasified by now!
567  mf::LogWarning("DBscan") << "Unclassified point!";
568  }
569  else if (fpointId_to_clusterId[y] == kNOISE_CLUSTER) {
570  ++noise;
571  }
572  else {
573  unsigned int c = fpointId_to_clusterId[y];
574  if (c >= cid) {
575  mf::LogWarning("DBscan") << "Point in cluster " << c << " when only " << cid
576  << " clusters wer found [0-" << cid - 1 << "]";
577  }
578  fclusters[c].push_back(y);
579  }
580  }
581  mf::LogInfo("DBscan") << "DWM (R*-tree): Found " << cid << " clusters...";
582  for (unsigned int c = 0; c < cid; ++c) {
583  mf::LogVerbatim("DBscan") << "\t"
584  << "Cluster " << c << ":\t" << fclusters[c].size();
585  }
586  mf::LogVerbatim("DBscan") << "\t"
587  << "...and " << noise << " noise points.";
588 }
589 
590 //----------------------------------------------------------------
591 // Find the neighbos of the given point
592 std::set<unsigned int>
594 {
595  dbsPoint region(fRect[point]);
596  Visitor visitor = fRTree.Query(AcceptFindNeighbors(region.bounds(),
597  fEps,
598  fEps2,
599  fMaxWidth,
600  fWirePitch[0], //\todo
601  fBadWireSum), // assumes
602  Visitor()); // equal
603  // pitch
604  return visitor.sResult;
605 }
606 //----------------------------------------------------------------
607 // Find the neighbos of the given point
608 std::vector<unsigned int>
610 {
611  dbsPoint region(fRect[point]);
612  Visitor visitor = fRTree.Query(AcceptFindNeighbors(region.bounds(),
613  fEps,
614  fEps2,
615  fMaxWidth,
616  fWirePitch[0], //\todo
617  fBadWireSum), // assumes
618  Visitor()); // equal
619  // pitch
620  std::vector<unsigned int>& v = visitor.vResult;
621  // find neighbors insures that the called point is not in the
622  // returned and this is intended as a drop-in replacement, so insure
623  // this condition
624  v.erase(std::remove(v.begin(), v.end(), point), v.end());
625  return v;
626 }
627 
628 //----------------------------------------------------------------
629 // Try to make a new cluster on the basis of point
630 bool
631 cluster::DBScanAlg::ExpandCluster(unsigned int point, unsigned int clusterID)
632 {
633  /* GetSetOfPoints for point*/
634  std::set<unsigned int> seeds = RegionQuery(point);
635 
636  // not enough support -> mark as noise
637  if (seeds.size() < fMinPts) {
638  fpointId_to_clusterId[point] = kNOISE_CLUSTER;
639  return false;
640  }
641  else {
642  // Add to the currecnt cluster
643  fpointId_to_clusterId[point] = clusterID;
644  for (std::set<unsigned int>::iterator itr = seeds.begin(); itr != seeds.end(); itr++) {
645  fpointId_to_clusterId[*itr] = clusterID;
646  }
647  seeds.erase(point);
648  while (!seeds.empty()) {
649  unsigned int currentP = *(seeds.begin());
650  std::set<unsigned int> result = RegionQuery(currentP);
651 
652  if (result.size() >= fMinPts) {
653  for (std::set<unsigned int>::iterator itr = result.begin(); itr != result.end(); itr++) {
654  unsigned int resultP = *itr;
655  // not already assigned to a cluster
656  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER ||
657  fpointId_to_clusterId[resultP] == kNOISE_CLUSTER) {
658  if (fpointId_to_clusterId[resultP] == kNO_CLUSTER) { seeds.insert(resultP); }
659  fpointId_to_clusterId[resultP] = clusterID;
660  } // unclassified or noise
661  } // for
662  } // enough support
663  seeds.erase(currentP);
664  } // while
665  return true;
666  }
667 }
668 
669 //----------------------------------------------------------------
670 /////////////////////////////////////////////////////////////////
671 // This is the algorithm that finds clusters:
672 //
673 // The original findNeignbor based code converted to use a R*-tree,
674 // but not rearranged
675 void
677 {
678 
679  unsigned int cid = 0;
680  // foreach pid
681  for (size_t pid = 0; pid < fps.size(); pid++) {
682  // not already visited
683  if (!fvisited[pid]) {
684 
685  fvisited[pid] = true;
686  // get the neighbors
687  std::vector<unsigned int> ne = RegionQuery_vector(pid);
688 
689  // not enough support -> mark as noise
690  if (ne.size() < fMinPts) { fnoise[pid] = true; }
691  else {
692  // Add p to current cluster
693 
694  std::vector<unsigned int> c; // a new cluster
695 
696  c.push_back(pid); // assign pid to cluster
697  fpointId_to_clusterId[pid] = cid;
698  // go to neighbors
699  for (size_t i = 0; i < ne.size(); ++i) {
700  unsigned int nPid = ne[i];
701 
702  // not already visited
703  if (!fvisited[nPid]) {
704  fvisited[nPid] = true;
705  // go to neighbors
706  std::vector<unsigned int> ne1 = RegionQuery_vector(nPid);
707  // enough support
708  if (ne1.size() >= fMinPts) {
709 
710  // join
711 
712  for (size_t i = 0; i < ne1.size(); ++i) {
713  // join neighbord
714  ne.push_back(ne1[i]);
715  }
716  }
717  }
718 
719  // not already assigned to a cluster
720  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER) {
721  c.push_back(nPid);
722  fpointId_to_clusterId[nPid] = cid;
723  }
724  }
725 
726  fclusters.push_back(c);
727 
728  cid++;
729  }
730  } // if (!visited
731  } // for
732 
733  int noise = 0;
734  // no_hits=fnoise.size();
735 
736  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
737  if (fpointId_to_clusterId[y] == kNO_CLUSTER) noise++;
738  }
739  mf::LogInfo("DBscan") << "FindNeighbors (R*-tree): Found " << cid << " clusters...";
740  for (unsigned int c = 0; c < cid; ++c) {
741  mf::LogVerbatim("DBscan") << "\t"
742  << "Cluster " << c << ":\t" << fclusters[c].size();
743  }
744  mf::LogVerbatim("DBscan") << "\t"
745  << "...and " << noise << " noise points.";
746 }
747 
748 //----------------------------------------------------------------
749 /////////////////////////////////////////////////////////////////
750 // This is the algorithm that finds clusters:
751 //
752 // The original findNeighrbor-based code.
753 void
755 {
756 
757  unsigned int cid = 0;
758  // foreach pid
759  for (size_t pid = 0; pid < fps.size(); ++pid) {
760  // not already visited
761  if (!fvisited[pid]) {
762 
763  fvisited[pid] = true;
764  // get the neighbors
765  std::vector<unsigned int> ne = findNeighbors(pid, fEps, fEps2);
766 
767  // not enough support -> mark as noise
768  if (ne.size() < fMinPts) { fnoise[pid] = true; }
769  else {
770  // Add p to current cluster
771 
772  std::vector<unsigned int> c; // a new cluster
773 
774  c.push_back(pid); // assign pid to cluster
775  fpointId_to_clusterId[pid] = cid;
776  // go to neighbors
777  for (size_t i = 0; i < ne.size(); ++i) {
778  unsigned int nPid = ne[i];
779 
780  // not already visited
781  if (!fvisited[nPid]) {
782  fvisited[nPid] = true;
783  // go to neighbors
784  std::vector<unsigned int> ne1 = findNeighbors(nPid, fEps, fEps2);
785  // enough support
786  if (ne1.size() >= fMinPts) {
787 
788  // join
789 
790  for (unsigned int i = 0; i < ne1.size(); i++) {
791  // join neighbord
792  ne.push_back(ne1[i]);
793  }
794  }
795  }
796 
797  // not already assigned to a cluster
798  if (fpointId_to_clusterId[nPid] == kNO_CLUSTER) {
799  c.push_back(nPid);
800  fpointId_to_clusterId[nPid] = cid;
801  }
802  }
803 
804  fclusters.push_back(c);
805 
806  cid++;
807  }
808  } // if (!visited
809  } // for
810 
811  int noise = 0;
812 
813  for (size_t y = 0; y < fpointId_to_clusterId.size(); ++y) {
814  if (fpointId_to_clusterId[y] == kNO_CLUSTER) ++noise;
815  }
816  mf::LogInfo("DBscan") << "FindNeighbors (naive): Found " << cid << " clusters...";
817  for (unsigned int c = 0; c < cid; ++c) {
818  mf::LogVerbatim("DBscan") << "\t"
819  << "Cluster " << c << ":\t" << fclusters[c].size() << " points";
820  }
821  mf::LogVerbatim("DBscan") << "\t"
822  << "...and " << noise << " noise points.";
823 }
double getSimilarity(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:365
const unsigned int kNO_CLUSTER
Definition: DBScanAlg.cxx:254
Functions to help with numbers.
Utilities related to art service access.
unsigned int count
Definition: DBScanAlg.cxx:54
void computeSimilarity()
Definition: DBScanAlg.cxx:487
void computeWidthFactor()
Definition: DBScanAlg.cxx:513
process_name cluster
Definition: cheaterreco.fcl:51
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:247
Declaration of signal hit object.
pdgs p
Definition: selectors.fcl:22
AcceptFindNeighbors(const BoundingBox &b, double eps, double eps2, double maxWidth, double wireDist, std::vector< unsigned int > &badWireSum)
Definition: DBScanAlg.cxx:141
double Temperature() const
In kelvin.
std::vector< unsigned int > & fBadWireSum
Definition: DBScanAlg.cxx:140
static const BoundingBox EmptyBoundingBox
Definition: DBScanAlg.cxx:112
std::size_t size(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:561
void run_dbscan_cluster()
Definition: DBScanAlg.cxx:549
std::set< unsigned int > RegionQuery(unsigned int point)
Definition: DBScanAlg.cxx:593
double dy
Definition: DBScanAlg.h:37
void run_FN_naive_cluster()
Definition: DBScanAlg.cxx:754
double d[2]
half
Definition: DBScanAlg.cxx:77
bool isNear(const BoundingBox &b) const
Definition: DBScanAlg.cxx:168
double Efield(unsigned int planegap=0) const
kV/cm
bool operator()(const RTree::Leaf *const leaf) const
Definition: DBScanAlg.cxx:94
double c[2]
center of the bounding box
Definition: DBScanAlg.cxx:76
auto vector(Vector const &v)
Returns a manipulator which will print the specified array.
Definition: DumpUtils.h:265
const unsigned int kNOISE_CLUSTER
Definition: DBScanAlg.cxx:255
T abs(T value)
process_name opflash particleana ie ie y
double r[2]
Definition: DBScanAlg.cxx:75
IDparameter< geo::WireID > WireID
Member type of validated geo::WireID parameter.
double dx
Definition: DBScanAlg.h:37
unsigned int fClusterMethod
Which clustering method to use.
Definition: DBScanAlg.h:91
RTree::BoundingBox BoundingBox
Definition: DBScanAlg.h:33
std::set< unsigned int > sResult
Definition: DBScanAlg.cxx:56
const BoundingBox & fBound
Definition: DBScanAlg.cxx:136
void operator()(const RTree::Leaf *const leaf)
Definition: DBScanAlg.cxx:60
BoundingBox bounds() const
Definition: DBScanAlg.cxx:37
const BoundingBox & m_bound
Definition: DBScanAlg.cxx:74
BoundingBox center(const BoundingBox &b) const
Definition: DBScanAlg.cxx:154
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:88
std::vector< unsigned int > vResult
Definition: DBScanAlg.cxx:55
double DriftVelocity(double efield=0., double temperature=0.) const
cm/us
DBScanAlg(fhicl::ParameterSet const &pset)
Definition: DBScanAlg.cxx:261
double getWidthFactor(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:441
bool operator()(const RTree::Node *const node) const
Definition: DBScanAlg.cxx:237
constexpr auto absDiff(A const &a, B const &b)
Returns the absolute value of the difference between two values.
Definition: NumericUtils.h:43
void computeSimilarity2()
Definition: DBScanAlg.cxx:500
std::vector< unsigned int > findNeighbors(unsigned int pid, double threshold, double threshold2)
Definition: DBScanAlg.cxx:470
AcceptEllipse(const BoundingBox &b, double r1, double r2)
Definition: DBScanAlg.cxx:78
std::vector< TrajPoint > seeds
Definition: DataStructs.cxx:14
std::vector< unsigned int > RegionQuery_vector(unsigned int point)
Definition: DBScanAlg.cxx:609
void InitScan(const detinfo::DetectorClocksData &clockData, const detinfo::DetectorPropertiesData &detProp, const std::vector< art::Ptr< recob::Hit >> &allhits, std::set< uint32_t > badChannels, const std::vector< geo::WireID > &wireids=std::vector< geo::WireID >())
Definition: DBScanAlg.cxx:272
bool ExpandCluster(unsigned int point, unsigned int clusterID)
Definition: DBScanAlg.cxx:631
unsigned int fMinPts
Definition: DBScanAlg.h:89
Contains all timing reference information for the detector.
double x
Definition: DBScanAlg.h:36
do i e
double y
Definition: DBScanAlg.h:36
double getSimilarity2(const std::vector< double > v1, const std::vector< double > v2)
Definition: DBScanAlg.cxx:402
pdgs k
Definition: selectors.fcl:22
std::size_t count(Cont const &cont)
double sampling_rate(DetectorClocksData const &data)
Returns the period of the TPC readout electronics clock.
BoundingBox center() const
Definition: DBScanAlg.cxx:162
BEGIN_PROLOG don t mess with this pandoraTrackGausCryoW true
art framework interface to geometry description
auto const detProp
const bool ContinueVisiting
Definition: DBScanAlg.cxx:57
unsigned int fDistanceMetric
Which distance metric to use.
Definition: DBScanAlg.h:92
BoundingBox nearestPoint(const BoundingBox &b) const
Definition: DBScanAlg.cxx:211