All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Classes | Functions | Variables
sortDataLoggerFiles Namespace Reference

Classes

class  CycleCompareClass
 
class  FileNameParser
 
class  FileInfoClass
 
class  MinimumAccumulator
 

Functions

def findFirstCycle
 
def extractFirstEvent
 
def detectFirstLogger
 
def buildFileIndex
 

Variables

string __doc__
 
string __author__ = 'Gianluca Petrillo (petrillo@slac.stanford.edu)'
 
string __date__ = 'February 22, 2022'
 
string __version__ = '1.3'
 
tuple parser = argparse.ArgumentParser(description=__doc__)
 
string help = 'input file lists [one from stdin by default]'
 
tuple duplGroup = parser.add_argument_group(title="duplicate file options")
 
string action = "store_true"
 
tuple args = parser.parse_args()
 
 printDuplicates = args.printduplicates
 
 skipDuplicates = args.skipDuplicates
 
 makeDuplicateList = args.duplicatelist
 
list sources = args.inputFilesifargs.inputFileselse[ "<stdin>" ]
 
tuple inputFiles
 
list preComments = []
 
list postComments = []
 
list fileInfo = []
 
list sourceNames = []
 
tuple isSingleFile = isinstance(file_, list)
 
tuple info = FileInfoClass(line, source=( iSource, None if isSingleFile else iLine + 1 ))
 
tuple Streams = list(set( info.stream for info in fileInfo ))
 
tuple firstPassFiles
 
tuple firstLogger = detectFirstLogger(firstPassFiles)
 
int nDuplicates = 0
 
tuple fileIndex = buildFileIndex(fileInfo)
 
list uniqueFiles = []
 
list duplicateFiles = []
 
list mainInfo = fileList[0]
 
list firstSource = mainInfo.source[0]
 
 fileListContent = uniqueFilesifskipDuplicateselsefileInfo
 
tuple outputFile = open(args.output, 'w')
 

Function Documentation

def sortDataLoggerFiles.buildFileIndex (   fileInfo)

Definition at line 310 of file sortDataLoggerFiles.py.

311  fileInfo: "list with information from all files",
312  ) -> "a dictionary: { key -> list of files }":
313 
314  fileKey = lambda info: ( info.run, info.pass_, info.dataLogger, info.stream, )
315  index = {}
316  for info in fileInfo:
317  index.setdefault(fileKey(info), []).append(info)
318  return index
319 # buildFileIndex()
320 
def sortDataLoggerFiles.detectFirstLogger (   fileInfo)

Definition at line 285 of file sortDataLoggerFiles.py.

286 def detectFirstLogger(fileInfo):
287  # in the end, we don't need a stream-aware algorithm to determine which
288  # data logger received the first event, as long as we have all relevant
289  # streams represented
290  lowestEvent = MinimumAccumulator()
291  for stream, files in fileInfo.items():
292  if not len(files): continue
293  for info in files:
294  firstEvent = extractFirstEvent(info.pathToXRootD())
295  if firstEvent is not None:
296  lowestEvent.add(info, key=firstEvent)
297  if firstEvent == 1: break # can't get lower than this!
298  # for files
299  # for
300  try: firstLogger = lowestEvent.min().dataLogger
301  except AttributeError:
302  # this is in general a problem because it implies that we are failing to
303  # correctly parse the list of input files
304  raise RuntimeError("No data found for the first data logger pass.")
305  logging.debug("Detected first logger: %d", firstLogger)
306  return firstLogger
307 # detectFirstLogger()
308 
def sortDataLoggerFiles.extractFirstEvent (   filePath)

Definition at line 260 of file sortDataLoggerFiles.py.

261 def extractFirstEvent(filePath):
262  try: import ROOT
263  except ImportError:
264  raise RuntimeError("""ROOT python module could not be loaded.
265  In this condition, you'll have to skip the autodetection of the first logger
266  by explicitly specifying its number as option to the script."""
267  )
268  # try ... except
269  logging.debug("Opening '%s' for event number check...", filePath)
270  srcFile = ROOT.TFile.Open(filePath, "READ")
271  if not srcFile:
272  raise RuntimeError \
273  ("Failed to open '%s' for event number extraction." % filePath)
274  #
275  try: firstEvent = next(iter(srcFile.Events)) # go PyROOT
276  except StopIteration:
277  logging.debug("File '%s' appears to contain no events.", filePath)
278  return None
279  firstEventNumber = firstEvent.EventAuxiliary.event() # keep going PyROOT
280 
281  logging.debug("First event from '%s': %d", filePath, firstEventNumber)
282  return firstEventNumber
283 # extractFirstEvent()
284 
def sortDataLoggerFiles.findFirstCycle (   files,
  stream 
)

Definition at line 240 of file sortDataLoggerFiles.py.

241 def findFirstCycle(files, stream):
242  firstLogger = None
243  firstPassFiles = []
244  wrapped = False
245  for info in files:
246  if info.stream != stream: continue
247  if firstLogger == info.dataLogger: break # cycle completed
248  if wrapped and info.dataLogger > firstLogger: break # cycle completed
249 
250  if firstLogger is None: firstLogger = info.dataLogger
251  elif not wrapped and info.dataLogger < firstLogger: wrapped = True
252 
253  firstPassFiles.append(info)
254  logging.debug("Added cycle %d logger %d stream %s to first cycle list",
255  info.pass_, info.dataLogger, info.stream)
256  # for
257  return firstPassFiles
258 # findFirstCycle()
259 

Variable Documentation

string sortDataLoggerFiles.__author__ = 'Gianluca Petrillo (petrillo@slac.stanford.edu)'

Definition at line 48 of file sortDataLoggerFiles.py.

string sortDataLoggerFiles.__date__ = 'February 22, 2022'

Definition at line 49 of file sortDataLoggerFiles.py.

string sortDataLoggerFiles.__doc__
Initial value:
1 = """Sorts a list of data logger output files.
2 
3 File paths are read from all the specified file lists in sequence, or from
4 standard input if no file list is specified.
5 
6 If a line is encountered that does not match the typical file name pattern,
7 that line is ignored and a warning is printed.
8 
9 Comments and empty lines at the beginning of the first file list are printed
10 at the top of the output as they are. All other comments and empty lines are
11 printed at the end of the output.
12 
13 Note that it is possible to sort "in place" by specifying the same file list as
14 input and output.
15 
16 Duplicate files are files on the same run, data logger cycle and data logger
17 number. By default, only the first of the duplicate files is written into the
18 output list, and only the number of duplicates is printed. Options allow to
19 write a detailed list of duplicate files on screen and on disk, or not to check
20 for duplication altogether.
21 
22 """

Definition at line 25 of file sortDataLoggerFiles.py.

string sortDataLoggerFiles.__version__ = '1.3'

Definition at line 50 of file sortDataLoggerFiles.py.

string sortDataLoggerFiles.action = "store_true"

Definition at line 345 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.args = parser.parse_args()

Definition at line 364 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.duplGroup = parser.add_argument_group(title="duplicate file options")

Definition at line 341 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.duplicateFiles = []

Definition at line 433 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.fileIndex = buildFileIndex(fileInfo)

Definition at line 431 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.fileInfo = []

Definition at line 388 of file sortDataLoggerFiles.py.

sortDataLoggerFiles.fileListContent = uniqueFilesifskipDuplicateselsefileInfo

Definition at line 466 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.firstLogger = detectFirstLogger(firstPassFiles)

Definition at line 419 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.firstPassFiles
Initial value:
1 = dict( ( stream, findFirstCycle(fileInfo, stream) )
2  for stream in Streams )

Definition at line 416 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.firstSource = mainInfo.source[0]

Definition at line 442 of file sortDataLoggerFiles.py.

string sortDataLoggerFiles.help = 'input file lists [one from stdin by default]'

Definition at line 331 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.info = FileInfoClass(line, source=( iSource, None if isSingleFile else iLine + 1 ))

Definition at line 393 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.inputFiles
Initial value:
1 = (
2  [ file_ ] if file_.endswith('.root') else open(file_, 'r')
3  for file_ in args.inputFiles
4  )
open(RACETRACK) or die("Could not open file $RACETRACK for writing")

Definition at line 379 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.isSingleFile = isinstance(file_, list)

Definition at line 391 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.mainInfo = fileList[0]

Definition at line 436 of file sortDataLoggerFiles.py.

sortDataLoggerFiles.makeDuplicateList = args.duplicatelist

Definition at line 373 of file sortDataLoggerFiles.py.

int sortDataLoggerFiles.nDuplicates = 0

Definition at line 430 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.outputFile = open(args.output, 'w')

Definition at line 476 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.parser = argparse.ArgumentParser(description=__doc__)

Definition at line 327 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.postComments = []

Definition at line 387 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.preComments = []

Definition at line 386 of file sortDataLoggerFiles.py.

sortDataLoggerFiles.printDuplicates = args.printduplicates

Definition at line 371 of file sortDataLoggerFiles.py.

sortDataLoggerFiles.skipDuplicates = args.skipDuplicates

Definition at line 372 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.sourceNames = []

Definition at line 389 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.sources = args.inputFilesifargs.inputFileselse[ "<stdin>" ]

Definition at line 376 of file sortDataLoggerFiles.py.

tuple sortDataLoggerFiles.Streams = list(set( info.stream for info in fileInfo ))

Definition at line 407 of file sortDataLoggerFiles.py.

list sortDataLoggerFiles.uniqueFiles = []

Definition at line 432 of file sortDataLoggerFiles.py.