All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Public Member Functions | Public Attributes | Private Member Functions | List of all members
lib.glob.NTupleGlob Class Reference
Inheritance diagram for lib.glob.NTupleGlob:

Public Member Functions

def __init__
 
def dataframe
 
def histogram
 

Public Attributes

 glob
 
 branches
 

Private Member Functions

def _hadd
 
def _load_histogram
 

Detailed Description

Definition at line 126 of file glob.py.

Constructor & Destructor Documentation

def lib.glob.NTupleGlob.__init__ (   self,
  g,
  branches 
)

Definition at line 127 of file glob.py.

128  def __init__(self, g, branches):
129  if isinstance(g, list):
130  self.glob = g
131  else:
132  self.glob = glob.glob(g)
133  self.branches = branches

Member Function Documentation

def lib.glob.NTupleGlob._hadd (   self,
  hs 
)
private

Definition at line 243 of file glob.py.

244  def _hadd(self, *hs):
245  Ns = [N for N,_ in hs]
246  return np.sum(Ns, axis=0), hs[0][1]
def lib.glob.NTupleGlob._load_histogram (   self,
  cryo,
  run,
  valname,
  whenname,
  hist_dict 
)
private

Definition at line 247 of file glob.py.

248  def _load_histogram(self, cryo, run, valname, whenname, hist_dict):
249  for level in [cryo, run, valname]:
250  if level not in hist_dict:
251  hist_dict[level] = {}
252  hist_dict = hist_dict[level]
253 
254  if whenname not in hist_dict:
255  return None
256  else:
257  return hist_dict[whenname]
258 
259 
260 
def _load_histogram
Definition: glob.py:247
def lib.glob.NTupleGlob.dataframe (   self,
  branches = None,
  maxfile = None,
  nproc = 1,
  f = None 
)

Definition at line 134 of file glob.py.

135  def dataframe(self, branches=None, maxfile=None, nproc=1, f=None):
136  if nproc == "auto":
137  nproc = multiprocessing.cpu_count()
138  if branches is None:
139  branches = self.branches
140 
141  thisglob = self.glob
142  if maxfile:
143  thisglob = thisglob[:maxfile]
144 
145  ret = []
146  with Pool(processes=nproc) as pool:
147  thisglob = [(g, branches, i*2, f) for i,g in enumerate(thisglob)]
148  for df in tqdm(pool.imap_unordered(_loaddf, thisglob), total=len(thisglob), unit="file", delay=5):
149  ret.append(df)
150 
151  ret = pd.concat(ret, axis=0, ignore_index=False)
152 
153  # Fix the index So that we don't need __ntuple
154  sub_index = ret.index.names[2:]
155  ret = ret.reset_index()
156  ret.entry = ret.groupby(["__ntuple", "entry"]).ngroup()
157  ret.set_index(["entry"] + sub_index, inplace=True, verify_integrity=True)
158  ret.sort_index(inplace=True)
159  del ret["__ntuple"]
160 
161  return ret
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration.
Definition: enumerate.h:69
def lib.glob.NTupleGlob.histogram (   self,
  var,
  bins,
  when = NTupleProc(),
  flatten_runs = False,
  flatten_cryo = False,
  maxfile = None,
  nproc = 1 
)

Definition at line 162 of file glob.py.

163  def histogram(self, var, bins, when=NTupleProc(), flatten_runs=False, flatten_cryo=False, maxfile=None, nproc=1):
164  if nproc == "auto":
165  nproc = multiprocessing.cpu_count()
166 
167  if not isinstance(var, list):
168  var = [var]
169 
170  if not isinstance(when, list):
171  when = [when]
172 
173  ret = {}
174 
175  thisglob = self.glob
176  if maxfile:
177  thisglob = thisglob[:maxfile]
178 
179  globdata = [(f, self.branches, var, when, bins) for f in thisglob]
180 
181  with Pool(processes=nproc) as pool:
182  for hists in tqdm(pool.imap_unordered(_process, globdata), total=len(globdata), unit="file", delay=5):
183  for cname in hists.keys():
184  for runname in hists[cname].keys():
185  for varname in hists[cname][runname].keys():
186  for whenname in hists[cname][runname][varname].keys():
187  hist = self._load_histogram(cname, runname, varname, whenname, ret)
188  if hist is None:
189  ret[cname][runname][varname][whenname] = hists[cname][runname][varname][whenname]
190  else:
191  ret[cname][runname][varname][whenname] = self._hadd(hist, hists[cname][runname][varname][whenname])
192 
193  # Do flattening
194  flatret_cryo = {}
195  if flatten_cryo:
196  for runname in ret["E"].keys():
197  flatret_cryo[runname] = {}
198  for valname in ret["E"][runname].keys():
199  flatret_cryo[runname][valname] = {}
200  for whenname in ret["E"][runname][valname].keys():
201  flatret_cryo[runname][valname][whenname] = self._hadd(ret["E"][runname][valname][whenname], ret["W"][runname][valname][whenname])
202  ret = flatret_cryo
203 
204  flatret_run = {}
205  if not flatten_cryo:
206  flatret_run["E"] = {}
207  flatret_run["W"] = {}
208 
209  if flatten_runs:
210  histlist = [ret] if flatten_cryo else [ret["E"], ret["W"]]
211  makeflatlist = [flatret_run] if flatten_cryo else [flatret_run["E"], flatret_run["W"]]
212 
213  for hists, makeflat in zip(histlist, makeflatlist):
214  run0 = list(hists.keys())[0]
215 
216  for valname in hists[run0].keys():
217  makeflat[valname] = {}
218  for whenname in hists[run0][valname].keys():
219  makeflat[valname][whenname] = self._hadd(*[hists[runname][valname][whenname] for runname in hists.keys()])
220  ret = flatret_run
221 
222  return ret
223 
224  if len(when) == 1 and not when[0]:
225  if flatten_runs and flatten_cryo:
226  for varname in ret.keys():
227  ret[varname] = ret[varname]["None"]
228  elif flatten_runs:
229  for cname in ret.keys():
230  for varname in ret[cname].keys():
231  ret[cname][varname] = ret[cname][varname]["None"]
232  elif flatten_cryo:
233  for runname in ret.keys():
234  for varname in ret[runname].keys():
235  ret[runname][varname] = ret[runname][varname]["None"]
236  else:
237  for cname in ret.keys():
238  for runname in ret[cname].keys():
239  for varname in ret[cname][runname].keys():
240  ret[cname][runname][varname] = ret[cname][runname][varname]["None"]
241 
242  return ret
def _load_histogram
Definition: glob.py:247
auto zip(Iterables &&...iterables)
Range-for loop helper iterating across many collections at the same time.
Definition: zip.h:295
list
Definition: file_to_url.sh:28

Member Data Documentation

lib.glob.NTupleGlob.branches

Definition at line 132 of file glob.py.

lib.glob.NTupleGlob.glob

Definition at line 129 of file glob.py.


The documentation for this class was generated from the following file: