All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SortModuleTimes.py
Go to the documentation of this file.
1 #!/usr/bin/env python2
2 #
3 # Brief: parses and outputs timing information from art logs
4 # Author: petrillo@fnal.gov
5 # Date: 201403??
6 #
7 # Run with '--help' argument for usage instructions.
8 #
9 # Version:
10 # 1.0 (petrillo@fnal.gov)
11 # first version
12 # 1.1 (petrillo@fnal.gov)
13 # support for compressed input files; added command line interface
14 # 1.2 (petrillo@fnal.gov)
15 # new mode to expose all the events
16 # 1.3 (petrillo@fnal.gov)
17 # permissive option to ignore errors in the input
18 # 1.4 (20140618, petrillo@fnal.gov)
19 # updating from optparse to argparse
20 # 1.5 (20140711, petrillo@fnal.gov)
21 # improved parsing relying on end-of-event markers; using python 2.7
22 #
23 
24 import sys, os
25 import math
26 import gzip
27 try: import bz2
28 except ImportError: pass
29 from collections import OrderedDict
30 
31 
32 Version = "%(prog)s 1.5"
33 __doc__ = "Prints statistics of the module timings based on the information from the Timing service."
34 
35 #
36 # statistics collection
37 #
38 def signed_sqrt(value):
39  """Returns sign(x) * sqrt(abs(x))"""
40  if value >= 0.: return math.sqrt(value)
41  else: return -math.sqrt(-value)
42 # signed_sqrt()
43 
44 
45 class Stats:
46  """Statistics collector.
47 
48  This class accumulates statistics on a single variable.
49  A new entry is added by add(), that allowes an optional weight.
50  At any time, the following information about the sample of x is available:
51  - n(): number of additions
52  - weights(): total weight (matches n() unless weights are specified)
53  - sum(): weighted sum of x
54  - min(): minimum value of x seen so far (None if no entries yet)
55  - max(): maximum value of x seen so far (None if no entries yet)
56  - sumsq(): weighted sum of x^2
57  - average(): weighted average of x (0 if no entries yet)
58  - sqaverage(): weighted average of x^2 (0 if no entries yet)
59  - rms(): the Root Mean Square (including weights)
60  - rms2(): the square of the RMS (including weights)
61  - stdev(): standard deviation (0 if less than two events)
62  - stdevp(): an alias for rms()
63 
64  The construction allows to specify bFloat = false, in which case the
65  accumulators are integral types (int) until a real type value or weight is
66  add()ed.
67  """
68  def __init__(self, bFloat = True):
69  self.clear(bFloat)
70 
71  def clear(self, bFloat = True):
72  self.e_n = 0
73  if bFloat:
74  self.e_w = 0.
75  self.e_sum = 0.
76  self.e_sumsq = 0.
77  else:
78  self.e_w = 0
79  self.e_sum = 0
80  self.e_sumsq = 0
81  self.e_min = None
82  self.e_max = None
83  # clear()
84 
85  def add(self, value, weight=1):
86  """Add a new item.
87 
88  The addition is treated as integer only if both value and weight are
89  integrals.
90  """
91  self.e_n += 1
92  self.e_w += weight
93  self.e_sum += weight * value
94  self.e_sumsq += weight * value**2
95  if (self.e_min is None) or (value < self.e_min): self.e_min = value
96  if (self.e_max is None) or (value > self.e_max): self.e_max = value
97  # add()
98 
99  def n(self): return self.e_n
100  def weights(self): return self.e_w
101  def sum(self): return self.e_sum
102  def min(self): return self.e_min
103  def max(self): return self.e_max
104  def sumsq(self): return self.e_sumsq
105  def average(self):
106  if self.e_w != 0.: return float(self.e_sum)/self.e_w
107  else: return 0.
108  def sqaverage(self):
109  if self.e_w != 0.: return float(self.e_sumsq)/self.e_w
110  else: return 0.
111  def rms2(self): return self.sqaverage() - self.average()**2
112  def rms(self): return signed_sqrt(self.rms2())
113  def stdev(self):
114  if self.e_n < 2: return 0.
115  else: return self.rms() * math.sqrt(float(self.e_n)/(self.e_n-1))
116  def stdevp(self): return self.rms()
117 # class Stats
118 
119 
120 class EventKeyClass(tuple):
121  """Event identifier: run, subrun and event numbers."""
122  def run(self): return self[0]
123  def subRun(self): return self[1]
124  def event(self): return self[2]
125 
126  def __str__(self):
127  return "run %d subRun %d event %d" \
128  % (self.run(), self.subRun(), self.event())
129  # __str__()
130 # class EventKeyClass
131 
132 
133 class ModuleKeyClass(tuple):
134  """Module instance identifier: module label and instance name."""
135  def name(self): return self[1]
136  def instance(self): return self[0]
137 
138  def __str__(self): return "%s[%s]" % (self.name(), self.instance())
139 # class ModuleKeyClass
140 
141 
142 class EntryDataClass(object):
143  """A flexible data structure for per-event information.
144 
145  The object is associated to a specific, unique event.
146  It can represent either the execution of the full event, or of a specific
147  module on that event.
148  The object gathers custom data; the standard data members:
149  - time (default: None): seconds elapsed by the event
150  - module (default: not defined): the module identification
151  If time is None, we assume this event was never completed.
152  The presence of a module data member implies that this object descrivbes a
153  module execution rather than the whole event.
154  """
155  def __init__(self, eventKey, **kargs):
156  self.data = kargs
157  self.data.setdefault('time', None)
158  self.eventKey = eventKey
159  # __init__()
160 
161  def __getattr__(self, attrName):
162  # we expect this will be called only if no attrName already exists
163  try: return self.data[attrName]
164  except KeyError: raise AttributeError(attrName)
165  # __getattr__()
166 
167  def time(self):
168  try: return self.data['time']
169  except KeyError: return None
170  # time()
171 
172  def isModule(self):
173  try: return bool(self.module)
174  except AttributeError: return False
175  # isEvent()
176 
177  def isEvent(self): return not self.isModule()
178 
179  def isMissing(self): return self.time() is None
180 
181  def SetMissing(self): self.data['time'] = None
182 
183  def __str__(self):
184  s = str(self.eventKey)
185  if self.isModule(): s += " module " + str(self.module)
186  else: s += " event"
187  s += ": ";
188  if self.time() is None: s += "(n/a)"
189  else: s += "%g s" % self.time()
190  return s
191  # __str__()
192 
193 # class EntryDataClass
194 
195 
197  """Collects statistics about execution time.
198 
199  This class collects statistics about execution time of a module or the whole
200  event.
201  The timing information is added by add() function, with as argument an
202  instance of EntryDataClass.
203  Optionally, the object can keep track of all the entries separately.
204  The order of insertion of the events is also recorded.
205  By default, this does not happen and only statistics are stored.
206 
207  The sample can be forcibly filled with empty entries. The idea is that one
208  event is added to the sample only when the information about its timing is
209  available. If we are tracking the event keys, we can check if we have all
210  the events and, if some event keys are missing, we can add an empty entry for
211  them so that we have the correct number of enrties in the sample.
212  This is achieved by a call to complete().
213  Note that to keep the order of the events the correct one one should check
214  if the previous event is present or not, and complete() with it, before
215  adding a new event. If the completion is performed after the new event is
216  added, the previous event will be added after the new one, when complete()
217  is actually called.
218  """
219  def __init__(self, moduleKey, bTrackEntries = False):
220  """Constructor: specifies the module we collect information about.
221 
222  If the flag bTrackEntries is true, all the added events are stored singly.
223  """
224  Stats.__init__(self)
225  self.key = moduleKey
226  self.entries = OrderedDict() if bTrackEntries else None
227  # __init__()
228 
229  def add(self, data):
230  """Adds a time to the sample.
231 
232  The argument data is an instance of EntryDataClass, that includes both
233  event identification and timing information.
234  Its time() is used as the value of the statistic; if the entry has no time
235  (None), the event information is considered to be missing.
236  """
237  if self.entries is not None:
238  if data.eventKey in self.entries: return False
239  self.entries[data.eventKey] = data
240  # if
241  if not data.isMissing(): Stats.add(self, data.time())
242  return True
243  # add()
244 
245  def complete(self, eventKeys):
246  """Makes sure that an entry for each of the keys in eventKeys is present.
247 
248  For event keys already known, nothing happens. For new event keys, an
249  empty entry is added at the end of the list, with no time information.
250  Note that the events are added at the bottom of the list, in the relative
251  order in eventKeys.
252 
253  If we are not tracking the events, nothing happens ever.
254  """
255  if self.entries is None: return 0
256  if (len(self.entries) > 1): eventKeys = eventKeys[-1:]
257  res = 0
258  for eventKey in eventKeys:
259  if self.add(EntryDataClass(eventKey)): res += 1
260  return res
261  # complete()
262 
263  def getEvents(self):
264  """Returns the list of known event keys (if tracking the events)."""
265  return [] if self.entries is None else self.entries.keys()
266  # getEvents()
267 
268  def getEntries(self):
269  """Returns a list of the event statistics (if tracking the events)."""
270  return [] if self.entries is None else self.entries.values()
271  # getEntries()
272 
273  def nEntries(self):
274  """Returns the number of recorded entries (throws if not tracking)."""
275  return len(self.entries)
276  # nEntries()
277 
278  def nEvents(self):
279  """Returns the number of valid entries (events with timing)."""
280  return self.n()
281  # nEvents()
282 
283  def hasEmptyData(self):
284  """Returns whethere there are entries without timing information.
285 
286  Note: throws if not tracking events.
287  """
288  return self.nEntries() > self.nEvents()
289  # hasEmptyData()
290 
291  def FormatStatsAsList(self, format_ = None):
292  """Prints the collected information into a list.
293 
294  The list of strings includes a statistics ID (based on the key), an
295  average time, a relative RMS in percent, the total time and the recorded
296  the number of events with timing information and the timing extrema.
297 
298  The format dictionary can contain format directives, for future use (no
299  format directive is currently supported).
300  """
301  if isinstance(self.key, basestring): name = str(self.key)
302  else: name = str(self.key)
303  if (self.n() == 0) or (self.sum() == 0.):
304  return [ name, "n/a" ]
305  RMS = self.rms() if (self.n() != 0) else 0.
306  return [
307  name,
308  "%g\"" % self.average(),
309  "(RMS %4.1f%%)" % (RMS / self.average() * 100.),
310  "total %g\"" % self.sum(), "(%d events:" % self.n(),
311  "%g" % self.min(), "- %g)" % self.max(),
312  ]
313  # FormatStatsAsList()
314 
315  def FormatTimesAsList(self, format_ = {}):
316  """Prints the collected information into a list.
317 
318  The list of strings includes a statistics ID (based on the key), and
319  a time entry for each of the events stored (with holes for the events
320  with missing time).
321  The format dictionary can contain format directives; the ones supported
322  so far are:
323  - 'max_events' (int): limit the number of events to the first max_events
324  (by default, all the available entries are printed)
325  - 'format' (string, default: '%g'): the C-style formatting string for the
326  numeric timings
327  """
328  if isinstance(self.key, basestring): name = str(self.key)
329  else: name = str(self.key)
330 
331  n = min(self.nEntries(), format_.get('max_events', self.nEntries()))
332  format_str = format_.get('format', '%g')
333  if not self.entries: return [ name, ] + [ "n/a", ] * n
334 
335  output = [ name, ]
336  for i, entry in enumerate(self.entries.values()):
337  if i >= n: break
338  if entry is None or entry.isMissing(): output.append("n/a")
339  else: output.append(format_str % entry.time())
340  # for
341  return output
342  # FormatTimesAsList()
343 
344 # class TimeModuleStatsClass
345 
346 
348  """A class collecting timing information from different modules.
349 
350  This is mostly a dictionary structure, but it is sorted.
351  The supported interface includes access by key (dictionary-like) or by
352  position (list-like).
353  """
354  def __init__(self, jobName = None):
355  self.name = jobName
356  self.moduleList = []
357  self.moduleStats = {}
358  # __init__()
359 
360  def MaxEvents(self):
361  if not self.moduleList: return 0
362  return max(map(Stats.n, self.moduleList))
363  # MaxEvents()
364 
365  def MinEvents(self):
366  if not self.moduleList: return 0
367  return min(map(Stats.n, self.moduleList))
368  # MinEvents()
369 
370 
371  # replicate some list/dictionary interface
372  def __iter__(self): return iter(self.moduleList)
373  def __len__(self): return len(self.moduleList)
374  def __getitem__(self, key):
375  if isinstance(key, int): return self.moduleList.__getitem__(key)
376  else: return self.moduleStats.__getitem__(key)
377  # __getitem__()
378  def __setitem__(self, key, value):
379  if isinstance(key, int):
380  if key < len(self.moduleList):
381  if self.moduleList[key].key != value.key:
382  raise RuntimeError(
383  "Trying to overwrite stats of module %s at #%d with module %s"
384  % (self.moduleList[key].key, key, value.key)
385  )
386  # if key mismatch
387  else:
388  self.moduleList.extend([ None ] * (key - len(self.moduleList) + 1))
389  index = key
390  key = value.key
391  else:
392  try:
393  stats = self.moduleStats[key]
394  index = self.moduleList.index(stats)
395  except KeyError: # new stats
396  index = len(self.moduleList)
397  self.moduleList.append(None)
398  #
399  # if ... else
400  self.moduleStats[key] = value
401  self.moduleList[index] = value
402  # __setitem__()
403 # class JobStatsClass
404 
405 
406 #
407 # format parsing
408 #
409 class FormatError(RuntimeError):
410  def __init__(self, msg, **kargs):
411  RuntimeError.__init__(self, msg)
412  self.data = kargs
413  # __init__()
414 # class FormatError
415 
417  """Parses a line to extract module timing information.
418 
419  The line must be known to contain module timing information.
420  The function returns a EntryDataClass including the timing information, or
421  raises a FormatError if the line has no valid format.
422 
423  Format 1 (20140226):
424 
425  TimeModule> run: 1 subRun: 0 event: 10 beziertrackercc BezierTrackerModule 0.231838
426  """
427  Tokens = line.split()
428 
429  ModuleKey = None
430  EventKey = None
431  time = None
432 
433  # Format 1 parsing:
434  try:
435  EventKey = EventKeyClass((int(Tokens[2]), int(Tokens[4]), int(Tokens[6])))
436  ModuleKey = ModuleKeyClass((Tokens[7], Tokens[8]))
437  time=float(Tokens[9])
438  except Exception, e:
439  raise FormatError(
440  "TimeModule format not recognized: '%s' (%s)" % (line, str(e)),
441  type="Module", event=EventKey, module=ModuleKey
442  )
443  # try ... except
444 
445  # validation of Format 1
446  if (Tokens[0] != 'TimeModule>') \
447  or (Tokens[1] != 'run:') \
448  or (Tokens[3] != 'subRun:') \
449  or (Tokens[5] != 'event:') \
450  or (len(Tokens) != 10) \
451  :
452  raise FormatError \
453  ("TimeModule format not recognized: '%s'" % line, type="Module")
454  # if
455 
456  return EntryDataClass(EventKey, module=ModuleKey, time=time)
457 # ParseTimeModuleLine()
458 
459 
461  """Parses a line to extract event timing information.
462 
463  The line must be known to contain event timing information.
464  The function returns a EntryDataClass including the timing information, or
465  raises a FormatError if the line has no valid format.
466 
467  Format 1 (20140226):
468 
469  TimeEvent> run: 1 subRun: 0 event: 10 0.231838
470  """
471  Tokens = line.split()
472 
473  EventKey = None
474  time = None
475  try:
476  EventKey = EventKeyClass((int(Tokens[2]), int(Tokens[4]), int(Tokens[6])))
477  time = float(Tokens[7])
478  except Exception, e:
479  raise FormatError(
480  "TimeEvent format not recognized: '%s' (%s)" % (line, str(e)),
481  type="Event", event=EventKey
482  )
483  # try ... except
484 
485  if (Tokens[0] != 'TimeEvent>') \
486  or (Tokens[1] != 'run:') \
487  or (Tokens[3] != 'subRun:') \
488  or (Tokens[5] != 'event:') \
489  or (len(Tokens) != 8) \
490  :
491  raise FormatError("TimeEvent format not recognized: '%s'" % line,
492  type="Event", event=EventKey)
493  # if
494 
495  return EntryDataClass(EventKey, time=time)
496 # ParseTimeEventLine()
497 
498 
499 def OPEN(Path, mode = 'r'):
500  """Open a file (possibly a compressed one).
501 
502  Support for modes other than 'r' (read-only) are questionable.
503  """
504  if Path.endswith('.bz2'): return bz2.BZ2File(Path, mode)
505  if Path.endswith('.gz'): return gzip.GzipFile(Path, mode)
506  return open(Path, mode)
507 # OPEN()
508 
509 
510 def ParseInputFile(InputFilePath, AllStats, EventStats, options):
511  """Parses a log file.
512 
513  The art log file at InputFilePath is parsed.
514  The per-module statistics are added to the existing in AllStats (an instance
515  of JobStatsClass), creating new ones as needed. Similarly, per-event
516  statistics are added to EventStats (a TimeModuleStatsClass instance).
517 
518  options class can contain the following members:
519  - Permissive (default: false): do not bail out when a format error is found;
520  the entry is typically skipped. This often happens because the output line
521  of the timing information is interrupted by some other output.
522  - MaxEvents (default: all events): collect statistics for at most MaxEvents
523  events (always the first ones)
524  - CheckDuplicates (default: false): enables the single-event tracking, that
525  allows to check for duplicates
526 
527  It returns the number of errors encountered.
528  """
529  def CompleteEvent(CurrentEvent, EventStats, AllStats):
530  """Make sure that CurrentEvent is known to all stats."""
531  EventStats.complete(( CurrentEvent, ))
532  for ModuleStats in AllStats:
533  ModuleStats.complete(EventStats.getEvents())
534  # CompleteEvent()
535 
536 
537  LogFile = OPEN(InputFilePath, 'r')
538 
539  nErrors = 0
540  LastLine = None
541  CurrentEvent = None
542  for iLine, line in enumerate(LogFile):
543 
544  line = line.strip()
545  if line == LastLine: continue # duplicate line
546  LastLine = line
547 
548  if line.startswith("TimeModule> "):
549 
550  try:
551  TimeData = ParseTimeModuleLine(line)
552  except FormatError, e:
553  nErrors += 1
554  msg = "Format error on '%s'@%d" % (InputFilePath, iLine + 1)
555  try: msg += " (%s)" % str(e.data['type'])
556  except KeyError: pass
557  try: msg += ", for event " + str(e.data['event'])
558  except KeyError: pass
559  try: msg += ", module " + str(e.data['module'])
560  except KeyError: pass
561  print >>sys.stderr, msg
562  if not options.Permissive: raise
563  else: continue
564  # try ... except
565 
566  try:
567  ModuleStats = AllStats[TimeData.module]
568  except KeyError:
569  ModuleStats = TimeModuleStatsClass \
570  (TimeData.module, bTrackEntries=options.CheckDuplicates)
571  AllStats[TimeData.module] = ModuleStats
572  #
573 
574  ModuleStats.add(TimeData)
575  elif line.startswith("TimeEvent> "):
576  try:
577  TimeData = ParseTimeEventLine(line)
578  except FormatError, e:
579  nErrors += 1
580  msg = "Format error on '%s'@%d" % (InputFilePath, iLine + 1)
581  try: msg += " (%s)" % str(e.data['type'])
582  except KeyError: pass
583  try: msg += ", for event " + str(e.data['event'])
584  except KeyError: pass
585  try: msg += ", module " + str(e.data['module'])
586  except KeyError: pass
587  print >>sys.stderr, msg
588  if not options.Permissive: raise
589  else: continue
590  # try ... except
591 
592  EventStats.add(TimeData)
593  if (options.MaxEvents >= 0) \
594  and (EventStats.n() >= options.MaxEvents):
595  if CurrentEvent: CompleteEvent(CurrentEvent, EventStats, AllStats)
596  raise NoMoreInput
597  else:
598  TimeData = None
599  continue
600 
601  if (CurrentEvent != TimeData.eventKey):
602  if TimeData and CurrentEvent:
603  CompleteEvent(CurrentEvent, EventStats, AllStats)
604  CurrentEvent = TimeData.eventKey
605  # if
606  # for line in log file
607  if CurrentEvent: CompleteEvent(CurrentEvent, EventStats, AllStats)
608 
609  return nErrors
610 # ParseInputFile()
611 
612 
613 #
614 # output
615 #
616 
618  """A list with the maximum length of items seen.
619 
620  Facilitates the correct sizing of a table in text mode.
621 
622  When a list of strings is add()ed, for each position in the list the length
623  of the string in that position is compared to the maximum one seen so far in
624  that position, and that maximum value is updated if proper.
625  """
626  def __init__(self, n = 0):
627  self.maxlength = [ None ] * n
628 
629  def add(self, items):
630  for iItem, item in enumerate(items):
631  try:
632  maxlength = self.maxlength[iItem]
633  except IndexError:
634  self.maxlength.extend([ None ] * (iItem + 1 - len(self.maxlength)))
635  maxlength = None
636  #
637  itemlength = len(str(item))
638  if maxlength < itemlength: self.maxlength[iItem] = itemlength
639  # for
640  # add()
641 
642  def __len__(self): return len(self.maxlength)
643  def __iter__(self): return iter(self.maxlength)
644  def __getitem__(self, index): return self.maxlength[index]
645 
646 # class MaxItemLengthsClass
647 
648 
649 def CenterString(s, w, f = ' '):
650  """Returns the string s centered in a width w, padded by f on both sides."""
651  leftFillerWidth = max(0, w - len(s)) / 2
652  return f * leftFillerWidth + s + f * (w - leftFillerWidth)
653 # CenterString()
654 
655 def LeftString(s, w, f = ' '):
656  """Returns the string s in a width w, padded by f on the right."""
657  return s + f * max(0, w - len(s))
658 
659 def RightString(s, w, f = ' '):
660  """Returns the string s in a width w, padded by f on the left."""
661  return f * max(0, w - len(s)) + s
662 
663 def JustifyString(s, w, f = ' '):
664  """Recomputes the spaces between the words in s so that they fill a width w.
665 
666  The original spacing is lost. The string is split in words by str.split().
667  The character f is used to create the filling spaces between the words.
668  Note that the string can result longer than w if the content is too long.
669  """
670  assert len(f) == 1
671  tokens = s.split(f)
672  if len(tokens) <= 1: return CenterString(s, w, f=f)
673 
674  # example: 6 words, 7 spaces (in 5 spacers)
675  spaceSize = max(1., float(f - sum(map(len, tokens))) / (len(tokens) - 1))
676  # = 1.4
677  totalSpace = 0.
678  assignedSpace = 0
679  s = tokens[0]
680  for token in tokens[1:]:
681  totalSpace += spaceSize # 0 => 1.4 => 2.8 => 4.2 => 5.6 => 7.0
682  tokenSpace = int(totalSpace - assignedSpace) # int(1.4 1.8 2.2 1.6 2.0)
683  s += f * tokenSpace + token # spaces: 1 + 1 + 2 + 1 + 2
684  assignedSpace += tokenSpace # 0 => 1 => 2 => 4 => 5 => 7
685  # for
686  assert assignedSpace == w
687  return s
688 # JustifyString()
689 
690 
692  """Formats list of data in a table"""
693  def __init__(self, specs = [ None, ]):
694  """
695  Each format specification applies to one item in each row.
696  If no format specification is supplied for an item, the last used format
697  is applied. By default, that is a plain conversion to string.
698  """
699  self.tabledata = []
700  self.formats = {}
701  if specs: self.SetDefaultFormats(specs)
702  # __init__()
703 
705  def __init__(self): pass
706  def __call__(self, iLine, rawdata): return None
707  # class LineIdentifierClass
708 
709  class CatchAllLines(LineIdentifierClass):
710  def __call__(self, iLine, rawdata): return 1
711  # class CatchAllLines
712 
714  def __init__(self, lineno, success_factor = 5.):
715  TabularAlignmentClass.LineIdentifierClass.__init__(self)
716  if isinstance(lineno, int): self.lineno = [ lineno ]
717  else: self.lineno = lineno
718  self.success_factor = success_factor
719  # __init__()
720 
721  def matchLine(self, lineno, iLine, rawdata):
722  if lineno < 0: lineno = len(rawdata) + lineno
723  return iLine == lineno
724  # matchLine
725 
726  def __call__(self, iLine, rawdata):
727  success = 0.
728  for lineno in self.lineno:
729  if self.matchLine(lineno, iLine, rawdata): success += 1.
730  if success == 0: return None
731  if self.success_factor == 0.: return 1.
732  else: return success * self.success_factor
733  # __call__()
734  # class LineNo
735 
736  class FormatNotSupported(Exception): pass
737 
738  def ParseFormatSpec(self, spec):
739  SpecData = {}
740  if spec is None: SpecData['format'] = str
741  elif isinstance(spec, basestring): SpecData['format'] = spec
742  elif isinstance(spec, dict):
743  SpecData = spec
744  SpecData.setdefault('format', str)
746  return SpecData
747  # ParseFormatSpec()
748 
749  def SetRowFormats(self, rowSelector, specs):
750  # parse the format specifications
751  formats = []
752  for iSpec, spec in enumerate(specs):
753  try:
754  formats.append(self.ParseFormatSpec(spec))
756  raise RuntimeError("Format specification %r (#%d) not supported."
757  % (str(e), iSpec))
758  # for specifications
759  self.formats[rowSelector] = formats
760  # SetRowFormats()
761 
762  def SetDefaultFormats(self, specs):
764 
765  def AddData(self, data): self.tabledata.extend(data)
766  def AddRow(self, *row_data): self.tabledata.append(row_data)
767 
768 
769  def SelectFormat(self, iLine):
770  rowdata = self.tabledata[iLine]
771  success = None
772  bestFormat = None
773  for lineMatcher, format_ in self.formats.items():
774  match_success = lineMatcher(iLine, self.tabledata)
775  if match_success <= success: continue
776  bestFormat = format_
777  success = match_success
778  # for
779  return bestFormat
780  # SelectFormat()
781 
782 
783  def FormatTable(self):
784  # select the formats for all lines
785  AllFormats \
786  = [ self.SelectFormat(iRow) for iRow in xrange(len(self.tabledata)) ]
787 
788  # format all the items
789  ItemLengths = MaxItemLengthsClass()
790  TableContent = []
791  for iRow, rowdata in enumerate(self.tabledata):
792  RowFormats = AllFormats[iRow]
793  LineContent = []
794  LastSpec = None
795  for iItem, itemdata in enumerate(rowdata):
796  try:
797  Spec = RowFormats[iItem]
798  LastSpec = Spec
799  except IndexError: Spec = LastSpec
800 
801  Formatter = Spec['format']
802  if isinstance(Formatter, basestring):
803  ItemContent = Formatter % itemdata
804  elif callable(Formatter):
805  ItemContent = Formatter(itemdata)
806  else:
807  raise RuntimeError("Formatter %r (#%d) not supported."
808  % (Formatter, iItem))
809  # if ... else
810  LineContent.append(ItemContent)
811  # for items
812  ItemLengths.add(LineContent)
813  TableContent.append(LineContent)
814  # for rows
815 
816  # pad the objects
817  for iRow, rowdata in enumerate(TableContent):
818  RowFormats = AllFormats[iRow]
819  Spec = AllFormats[iRow]
820  for iItem, item in enumerate(rowdata):
821  try:
822  Spec = RowFormats[iItem]
823  LastSpec = Spec
824  except IndexError: Spec = LastSpec
825 
826  fieldWidth = ItemLengths[iItem]
827  alignment = Spec.get('align', 'left')
828  if alignment == 'right':
829  alignedItem = RightString(item, fieldWidth)
830  elif alignment == 'justified':
831  alignedItem = JustifyString(item, fieldWidth)
832  elif alignment == 'center':
833  alignedItem = CenterString(item, fieldWidth)
834  else: # if alignment == 'left':
835  alignedItem = LeftString(item, fieldWidth)
836  if Spec.get('truncate', True): alignedItem = alignedItem[:fieldWidth]
837 
838  rowdata[iItem] = alignedItem
839  # for items
840  # for rows
841  return TableContent
842  # FormatTable()
843 
844  def ToStrings(self, separator = " "):
845  return [ separator.join(RowContent) for RowContent in self.FormatTable() ]
846 
847  def Print(self, stream = sys.stdout):
848  print "\n".join(self.ToStrings())
849 
850 # class TabularAlignmentClass
851 
852 
853 ################################################################################
854 ### main program
855 ###
856 if __name__ == "__main__":
857  import argparse
858 
859  ###
860  ### parse command line arguments
861  ###
862  Parser = argparse.ArgumentParser(description=__doc__)
863  Parser.set_defaults(PresentMode="ModTable")
864 
865  # positional arguments
866  Parser.add_argument("LogFiles", metavar="LogFile", nargs="+",
867  help="log file to be parsed")
868 
869  # options
870  Parser.add_argument("--eventtable", dest="PresentMode", action="store_const",
871  const="EventTable", help="do not group the pages by node")
872  Parser.add_argument("--allowduplicates", '-D', dest="CheckDuplicates",
873  action="store_false", help="do not check for duplicate entries")
874  Parser.add_argument("--maxevents", dest="MaxEvents", type=int, default=-1,
875  help="limit the number of parsed events to this (negative: no limit)")
876  Parser.add_argument("--permissive", dest="Permissive", action="store_true",
877  help="treats input errors as non-fatal [%(default)s]")
878  Parser.add_argument('--version', action='version', version=Version)
879 
880  options = Parser.parse_args()
881 
882  if options.PresentMode in ( 'EventTable', ):
883  options.CheckDuplicates = True
884 
885  ###
886  ### parse all inputs, collect the information
887  ###
888 
889  # per-module statistics
890  AllStats = JobStatsClass( )
891  # per-event statistics
892  EventStats = TimeModuleStatsClass \
893  ("=== events ===", bTrackEntries=options.CheckDuplicates)
894 
895  class NoMoreInput: pass
896 
897  nErrors = 0
898  try:
899  if options.MaxEvents == 0: raise NoMoreInput # wow, that was quick!
900  for LogFilePath in options.LogFiles:
901  nErrors += ParseInputFile(LogFilePath, AllStats, EventStats, options)
902 
903  except NoMoreInput: pass
904 
905  # give a bit of separation between error messages and actual output
906  if nErrors > 0: print >>sys.stderr
907 
908  ###
909  ### print the results
910  ###
911  if (AllStats.MaxEvents() == 0) and (EventStats.nEntries() == 0):
912  print "No time statistics found."
913  sys.exit(1)
914  # if
915 
916  OutputTable = TabularAlignmentClass()
917 
918  # present results
919  if options.PresentMode == "ModTable":
920  # fill the module stat data into the table
921  OutputTable.AddData([ stats.FormatStatsAsList() for stats in AllStats ])
922  # then the event data
923  OutputTable.AddRow(*EventStats.FormatStatsAsList())
924  elif options.PresentMode == "EventTable":
925  # set some table formatting options
926  OutputTable.SetRowFormats \
927  (OutputTable.LineNo(0), [ None, { 'align': 'center' }])
928  # header row
929  OutputTable.AddRow("Module", *range(AllStats.MaxEvents()))
930  # fill the module stat data into the table
931  OutputTable.AddData([ stats.FormatTimesAsList() for stats in AllStats ])
932  # then the event data
933  OutputTable.AddRow(*EventStats.FormatTimesAsList())
934  else:
935  raise RuntimeError("Presentation mode %r not known" % options.PresentMode)
936 
937  OutputTable.Print()
938 
939  ###
940  ### say goodbye
941  ###
942  if nErrors > 0:
943  print >>sys.stderr, "%d errors were found in the input files." % nErrors
944  sys.exit(nErrors)
945 # main
static constexpr bool
auto enumerate(Iterables &&...iterables)
Range-for loop helper tracking the number of iteration.
Definition: enumerate.h:69
S join(S const &sep, Coll const &s)
Returns a concatenation of strings in s separated by sep.