All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
rootstat.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 ######################################################################
3 #
4 # Name: rootstat.py
5 #
6 # Purpose: Analyze art root file and dump object statistics.
7 #
8 # Created: 27-Nov-2012 Herbert Greenlee
9 #
10 # Usage:
11 #
12 # stat.py <options> [@filelist] [file1 file2 ...]
13 #
14 # Options:
15 #
16 # [-h|--help] - Print help message.
17 # --level n - Branch level (default 1). Use --level 1 to see top
18 # branches only. Use --level 2 to also see subbranches.
19 # --nfile n - Number of files to analyze (default all).
20 # --all - Print analysis of each file (default is only summary).
21 # --s1 - Sort branches by uncompressed size.
22 # --s2 - Sort branches by compressed size (default).
23 # --s3 - Sort branches by name.
24 #
25 # Arguments:
26 #
27 # @filelist - File list containing one input file per line.
28 # file1 file2 ... - Input files.
29 #
30 ######################################################################
31 
32 from __future__ import absolute_import
33 from __future__ import print_function
34 import sys, os
35 import project_utilities
36 import larbatch_posix
37 
38 # Import ROOT module.
39 # Globally turn off root warnings.
40 # Don't let root see our command line options.
41 
42 myargv = sys.argv
43 sys.argv = myargv[0:1]
44 if 'TERM' in os.environ:
45  del os.environ['TERM']
46 import ROOT
47 ROOT.gErrorIgnoreLevel = ROOT.kError
48 sys.argv = myargv
49 
50 # Print help.
51 
52 def help():
53 
54  filename = sys.argv[0]
55  file = open(filename)
56 
57  doprint=0
58 
59  for line in file.readlines():
60  if line[2:9] == 'stat.py':
61  doprint = 1
62  elif line[0:6] == '######' and doprint:
63  doprint = 0
64  if doprint:
65  if len(line) > 2:
66  print(line[2:], end=' ')
67  else:
68  print()
69 
70 # Analyze root file.
71 
72 def analyze(root, level, gtrees, gbranches, doprint, sorttype):
73 
74  trees = {}
75  events = None
76  keys = root.GetListOfKeys()
77  for key in keys:
78  objname = key.GetName()
79  if objname not in trees:
80  obj = root.Get(objname)
81  if obj and obj.InheritsFrom('TTree'):
82  trees[objname] = obj
83  if objname == 'Events':
84  events = obj
85 
86  # Print summary of trees.
87 
88  if doprint:
89  print('\nTrees:\n')
90  for key in sorted(trees.keys()):
91  tree = trees[key]
92  nentry = tree.GetEntriesFast()
93  if doprint:
94  print('%s has %d entries.' % (key, nentry))
95 
96  # Remember information about trees.
97 
98  if key in gtrees:
99  gtrees[key] = gtrees[key] + nentry
100  else:
101  gtrees[key] = nentry
102 
103  # Print summary of branches in Events tree.
104 
105  if doprint:
106  print('\nBranches of Events tree:\n')
107 
108  # If level is zero, we are done (don't analyze branches).
109 
110  if level == 0:
111  return
112 
113  if events:
114 
115  branch_tuples = {}
116 
117  if doprint:
118  print(' Total bytes Zipped bytes Comp. Branch name')
119  print(' ----------- ------------ ----- -----------')
120 
121  branches = events.GetListOfBranches()
122  ntotall = 0
123  nzipall = 0
124 
125  # Loop over branche of Events tree.
126 
127  for branch in branches:
128  branch_class = branch.GetClass().GetName()
129 
130  # Only look at data products (class art::Wrapper<T>).
131 
132  if branch_class[0: 13] == 'art::Wrapper<':
133 
134  # Loop over subbranches.
135 
136  subbranches = branch.GetListOfBranches()
137  for subbranch in subbranches:
138  name = subbranch.GetName()
139 
140  # Only look at '.obj' subbranch (wrapped object).
141 
142  if name[-4:] == '.obj':
143  ntot = subbranch.GetTotBytes("*")
144  nzip = subbranch.GetZipBytes("*")
145  ntotall = ntotall + ntot
146  nzipall = nzipall + nzip
147  if doprint:
148  if nzip != 0:
149  comp = float(ntot) / float(nzip)
150  else:
151  comp = 0.
152  branch_key = None
153  if sorttype == 1:
154  branch_key = ntot
155  elif sorttype == 2:
156  branch_key = nzip
157  else:
158  branch_key = name
159  branch_tuples[branch_key] = (ntot, nzip, comp, name)
160  #print('%14d%14d%8.2f %s' % (ntot, nzip, comp, name))
161 
162  # Remember information about branches.
163 
164  if name in gbranches:
165  gbranches[name][0] = gbranches[name][0] + ntot
166  gbranches[name][1] = gbranches[name][1] + nzip
167  else:
168  gbranches[name] = [ntot, nzip]
169 
170  # Loop over subsubbranches (attributes of wrapped object).
171 
172  if level > 1:
173  subsubbranches = subbranch.GetListOfBranches()
174  for subsubbranch in subsubbranches:
175  name = subsubbranch.GetName()
176  ntot = subsubbranch.GetTotBytes("*")
177  nzip = subsubbranch.GetZipBytes("*")
178  if doprint:
179  if nzip != 0:
180  comp = float(ntot) / float(nzip)
181  else:
182  comp = 0.
183  branch_key = None
184  if sorttype == 1:
185  branch_key = ntot
186  elif sorttype == 2:
187  branch_key = nzip
188  else:
189  branch_key = name
190  branch_tuples[branch_key] = (ntot, nzip, comp, name)
191  #print('%14d%14d%8.2f %s' % (ntot, nzip, comp,
192  # subsubbranch.GetName()))
193 
194  # Remember information about branches.
195 
196  if name in gbranches:
197  gbranches[name][0] = gbranches[name][0] + ntot
198  gbranches[name][1] = gbranches[name][1] + nzip
199  else:
200  gbranches[name] = [ntot, nzip]
201 
202  # Print sorted information about branches.
203 
204  if doprint:
205  for branch_key in sorted(branch_tuples.keys()):
206  branch_tuple = branch_tuples[branch_key]
207  ntot = branch_tuple[0]
208  nzip = branch_tuple[1]
209  comp = branch_tuple[2]
210  name = branch_tuple[3]
211  print('%14d%14d%8.2f %s' % (ntot, nzip, comp, name))
212 
213  # Do summary of all branches.
214 
215  name = 'All branches'
216  if doprint:
217  if nzipall != 0:
218  comp = float(ntotall) / float(nzipall)
219  else:
220  comp = 0.
221  print('%14d%14d%8.2f %s' % (ntotall, nzipall, comp, name))
222 
223  # Print average event size.
224 
225  nev = events.GetEntriesFast()
226  if nev != 0:
227  nevtot = 1.e-6 * float(ntotall) / float(nev)
228  nevzip = 1.e-6 * float(nzipall) / float(nev)
229  else:
230  nevtot = 0.
231  nevzip = 0.
232  print()
233  print('%10d events.' % nev)
234  print('%7.2f Mb average size per event.' % nevtot)
235  print('%7.2f Mb average zipped size per event.' % nevzip)
236 
237  if name in gbranches:
238  gbranches[name][0] = gbranches[name][0] + ntotall
239  gbranches[name][1] = gbranches[name][1] + nzipall
240  else:
241  gbranches[name] = [ntotall, nzipall]
242 
243 
244  # Done.
245 
246  return
247 
248 # Main program.
249 
250 def main(argv):
251 
252  # Parse arguments.
253 
254  input_files = []
255  level = 1
256  nfilemax = 0
257  all = 0
258  sorttype = 2
259 
260  args = argv[1:]
261  while len(args) > 0:
262  if args[0] == '-h' or args[0] == '--help':
263 
264  # Help.
265 
266  help()
267  return 0
268 
269  elif args[0] == '--level' and len(args) > 1:
270 
271  # Analyze level.
272 
273  level = int(args[1])
274  del args[0:2]
275 
276  elif args[0] == '--nfile' and len(args) > 1:
277 
278  # Number of files.
279 
280  nfilemax = int(args[1])
281  del args[0:2]
282 
283  elif args[0] == '--all':
284 
285  # All files flag.
286 
287  all = 1
288  del args[0]
289 
290  elif args[0] == '--s1':
291 
292  # Sort flag.
293 
294  sorttype = 1
295  del args[0]
296 
297  elif args[0] == '--s2':
298 
299  # Sort flag.
300 
301  sorttype = 2
302  del args[0]
303 
304  elif args[0] == '--s3':
305 
306  # Sort flag.
307 
308  sorttype = 3
309  del args[0]
310 
311  elif args[0][0] == '-':
312 
313  # Unknown option.
314 
315  print('Unknown option %s' % args[0])
316  return 1
317 
318  elif args[0][0] == '@':
319 
320  # Read in file list to input files.
321 
322  filelistname = args[0][1:]
323  if larbatch_posix.exists(filelistname):
324  for filename in larbatch_posix.readlines(filelistname):
325  input_files.append(filename.strip())
326  else:
327  print('File list %s does not exist.' % filelistname)
328  return 1
329  del args[0]
330  else:
331 
332  # Add single file to input files.
333 
334  input_files.append(args[0])
335  del args[0]
336 
337  # Loop over input files.
338 
339  gtrees = {}
340  gbranches = {}
341  nfile = 0
342 
343  for input_file in input_files:
344 
345  if nfilemax > 0 and nfile >= nfilemax:
346  break
347  nfile = nfile + 1
348 
349  if not larbatch_posix.exists(input_file):
350  print('Input file %s does not exist.' % input_file)
351  return 1
352 
353  print('\nOpening %s' % input_file)
354  root = ROOT.TFile.Open(input_file)
355  if not root.IsOpen() or root.IsZombie():
356  print('Failed to open %s' % input_file)
357  return 1
358 
359  # Analyze this file.
360 
361  analyze(root, level, gtrees, gbranches, all, sorttype)
362 
363  print('\n%d files analyzed.' % nfile)
364 
365  # Print summary of trees.
366 
367  print('\nTrees from all files:\n')
368  for key in sorted(gtrees.keys()):
369  nentry = gtrees[key]
370  print('%s has %d total entries.' % (key, nentry))
371 
372  # Print summary of branches.
373 
374  if level > 0:
375  print('\nBranches of Events tree from all files:\n')
376  print(' Total bytes Zipped bytes Comp. Branch name')
377  print(' ----------- ------------ ----- -----------')
378  allname = 'All branches'
379  ntot = 0
380  nzip = 0
381  branch_tuples = {}
382  for key in sorted(gbranches.keys()):
383  if key != allname:
384  ntot = gbranches[key][0]
385  nzip = gbranches[key][1]
386  if nzip != 0:
387  comp = float(ntot) / float(nzip)
388  else:
389  comp = 0.
390  branch_key = None
391  if sorttype == 1:
392  branch_key = ntot
393  elif sorttype == 2:
394  branch_key = nzip
395  else:
396  branch_key = key
397  branch_tuples[branch_key] = (ntot, nzip, comp, key)
398  #print('%14d%14d%8.2f %s' % (ntot, nzip, comp, key))
399 
400  # Print sorted information about branches.
401 
402  for branch_key in sorted(branch_tuples.keys()):
403  branch_tuple = branch_tuples[branch_key]
404  ntot = branch_tuple[0]
405  nzip = branch_tuple[1]
406  comp = branch_tuple[2]
407  name = branch_tuple[3]
408  print('%14d%14d%8.2f %s' % (ntot, nzip, comp, name))
409 
410  if allname in gbranches:
411  ntot = gbranches[allname][0]
412  nzip = gbranches[allname][1]
413  if nzip != 0:
414  comp = float(ntot) / float(nzip)
415  else:
416  comp = 0.
417  print('%14d%14d%8.2f %s' % (ntot, nzip, comp, allname))
418 
419  # Print average event size.
420 
421  if 'Events' in gtrees:
422  nev = gtrees['Events']
423  if nev != 0:
424  nevtot = 1.e-6 * float(ntot) / float(nev)
425  nevzip = 1.e-6 * float(nzip) / float(nev)
426  else:
427  nevtot = 0.
428  nevzip = 0.
429  print()
430  print('%10d events.' % nev)
431  if level > 0:
432  print('%7.2f Mb average size per event.' % nevtot)
433  print('%7.2f Mb average zipped size per event.' % nevzip)
434 
435 
436  # Done.
437 
438  return 0
439 
440 # Invoke main program.
441 
442 if __name__ == '__main__':
443  sys.exit(main(sys.argv))
do one_file $F done echo for F in find $TOP name CMakeLists txt print
def help
Definition: rootstat.py:52
def main
Definition: rootstat.py:250
def analyze
Definition: rootstat.py:72
open(RACETRACK) or die("Could not open file $RACETRACK for writing")