All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Functions | Variables
validate_in_job Namespace Reference

Functions

def check_root_file
 
def check_root
 
def import_samweb
 
def main
 

Variables

 samweb = None
 
 extractor_dict = None
 
 proxy_ok = False
 

Function Documentation

def validate_in_job.check_root (   outdir,
  logdir,
  data_file_types 
)

Definition at line 94 of file validate_in_job.py.

94 
95 def check_root(outdir, logdir, data_file_types):
96 
97  # This method looks for files with file types matching data_file_types.
98  # If such files are found, it also checks for the existence of
99  # an Events TTree.
100  #
101  # Returns a 3-tuple containing the following information.
102  # 1. Total number of events in art root files.
103  # 2. A list of 3-tuples with an entry for each art root file.
104  # The 3-tuple contains the following information.
105  # a) Filename (full path).
106  # b) Number of events
107  # c) Stream name.
108  # 3. A list of histogram root files.
109 
110  nev = -1
111  roots = []
112  hists = []
113 
114  print('Checking root files in directory %s.' % outdir)
115  filenames = os.listdir(outdir)
116  for filename in filenames:
117  name, ext = os.path.splitext(filename)
118  if len(ext) > 0 and ext[1:] in data_file_types:
119  path = os.path.join(outdir, filename)
120  nevroot, stream = check_root_file(path, logdir)
121  if nevroot >= 0:
122  if nev < 0:
123  nev = 0
124  nev = nev + nevroot
125  roots.append((os.path.join(outdir, filename), nevroot, stream))
126 
127  elif nevroot == -1:
128 
129  # Valid data file, not an art root file.
130 
131  hists.append(os.path.join(outdir, filename))
132 
133  else:
134 
135  # Found a .root file that is not openable.
136  # Print a warning, but don't trigger any other error.
137 
138  print('Warning: File %s in directory %s is not a valid root file.' % (filename, outdir))
139 
140  # Done.
141 
142  return (nev, roots, hists)
do one_file $F done echo for F in find $TOP name CMakeLists txt print
def validate_in_job.check_root_file (   path,
  logdir 
)

Definition at line 43 of file validate_in_job.py.

43 
44 def check_root_file(path, logdir):
45 
46  global proxy_ok
47  result = (-2, '')
48  json_ok = False
49  md = []
50 
51  # First check if root file exists (error if not).
52 
53  if not project_utilities.safeexist(path):
54  return result
55 
56  # See if we have precalculated metadata for this root file.
57 
58  json_path = os.path.join(logdir, os.path.basename(path) + '.json')
59  if project_utilities.safeexist(json_path):
60 
61  # Get number of events from precalculated metadata.
62 
63  try:
64  lines = project_utilities.saferead(json_path)
65  s = ''
66  for line in lines:
67  s = s + line
68 
69  # Convert json string to python dictionary.
70 
71  md = json.loads(s)
72 
73  # If we get this far, say the file was at least openable.
74 
75  result = (-1, '')
76 
77  # Extract number of events and stream name from metadata.
78 
79  if len(list(md.keys())) > 0:
80  nevroot = -1
81  stream = ''
82  if 'events' in md:
83  nevroot = int(md['events'])
84  if 'data_stream' in md:
85  stream = md['data_stream']
86  result = (nevroot, stream)
87  json_ok = True
88  except:
89  result = (-2, '')
90  return result
91 
92 
93 # Check data files in the specified directory.
list
Definition: file_to_url.sh:28
def validate_in_job.import_samweb ( )

Definition at line 143 of file validate_in_job.py.

144 def import_samweb():
145 
146  # Get intialized samweb, if not already done.
147 
148  global samweb
149  global extractor_dict
150  global expMetaData
151 
152  if samweb == None:
153  samweb = project_utilities.samweb()
154  from extractor_dict import expMetaData
155 
156 # Main program.
def validate_in_job.main ( )

Definition at line 157 of file validate_in_job.py.

158 def main():
159 
160  ana = 0
161  nproc = 0
162 
163  import_samweb()
164 
165 
166  # Parse arguments.
167  checkdir=''
168  logdir=''
169  outdir=''
170  declare_file = 0
171  copy_to_dropbox = 0
172  maintain_parentage = 0
173  data_file_types = []
174  args = sys.argv[1:]
175  while len(args) > 0:
176 
177  if args[0] == '--dir' and len(args) > 1:
178  checkdir = args[1]
179  del args[0:2]
180  elif args[0] == '--logfiledir' and len(args) > 1:
181  logdir = args[1]
182  del args[0:2]
183  elif args[0] == '--outdir' and len(args) > 1:
184  outdir = args[1]
185  del args[0:2]
186  elif args[0] == '--declare' and len(args) > 1:
187  declare_file = int(args[1])
188  del args[0:2]
189  elif args[0] == '--copy' and len(args) > 1:
190  copy_to_dropbox = int(args[1])
191  del args[0:2]
192  elif args[0] == '--maintain_parentage' and len(args) > 1:
193  maintain_parentage = int(args[1])
194  del args[0:2]
195  elif args[0] == '--data_file_type' and len(args) > 1:
196  data_file_types.append(args[1])
197  del args[0:2]
198  else:
199  print('Unknown option %s' % args[0])
200  return 1
201 
202  # Add default data_file_types.
203 
204  if len(data_file_types) == 0:
205  data_file_types.append('root')
206 
207  status = 0 #global status code to tell us everything is ok.
208 
209  print("Do decleration in job: %d" % declare_file)
210 
211  # Check lar exit status (if any).
212  stat_filename = os.path.join(logdir, 'lar.stat')
213  if project_utilities.safeexist(stat_filename):
214  try:
215  status = int(project_utilities.saferead(stat_filename)[0].strip())
216  if status != 0:
217  print('Job in subdirectory %s ended with non-zero exit status %d.' % (checkdir, status))
218  status = 1
219 
220  except:
221  print('Bad file lar.stat in subdirectory %s.' % checkdir)
222  status = 1
223 
224  if checkdir == '':
225  print('No directory specified (use the --dir option.) Exiting.')
226  return 1
227  if logdir == '':
228  print('No log file directory specified (use the --logfiledir option.) Exiting.')
229  return 1
230 
231  nevts,rootfiles,hists = check_root(checkdir, logdir, data_file_types)
232 
233  # Set flag to do analysis-style validation if all of the following are true:
234  #
235  # 1. There is at least one valid histogram file.
236  # 2. The total number of artroot files and artroot events is zero.
237 
238  if len(hists) > 0 and len(rootfiles) == 0 and nevts <= 0:
239  ana = 1
240 
241  if not ana:
242  if len(rootfiles) == 0 or nevts < 0:
243  print('Problem with root file(s) in %s.' % checkdir)
244  status = 1
245 
246 
247  elif nevts < -1 or len(hists) == 0:
248  print('Problem with analysis root file(s) in %s.' % checkdir)
249  status = 1
250 
251 
252 # Then we need to loop over rootfiles and hists because those are good.
253 # Then we could make a list of those and check that the file in question for
254 # declaration is in that liast. also require that the par exit code is good for
255 # declaration.
256 
257  validate_list = open('validate.list','w')
258  file_list = open('files.list', 'w')
259  ana_file_list = open('filesana.list', 'w')
260 
261  events_list = open('events.list', 'w')
262 
263  #will be empty if the checks succeed
264  bad_list = open('bad.list', 'w')
265  missing_list = open('missing_files.list', 'w')
266 
267  # Print summary.
268 
269  if ana:
270  print("%d processes completed successfully." % nproc)
271  print("%d total good histogram files." % len(hists))
272 
273  else:
274  print("%d total good events." % nevts)
275  print("%d total good root files." % len(rootfiles))
276  print("%d total good histogram files." % len(hists))
277 
278  file_list_stream = {}
279 
280  # Generate bookkeeping files pertaining to artroot files.
281 
282  for rootfile in rootfiles:
283 
284  rootpath = rootfile[0]
285  nev = rootfile[1]
286  streamname = rootfile[2]
287 
288  # Make sure root file names do not exceed 200 characters.
289  rootname = os.path.basename(rootpath)
290  if len(rootname) >= 200:
291  print('Filename %s in subdirectory %s is longer than 200 characters.' % (
292  rootname, outdir))
293  status = 1
294 
295  if streamname not in file_list_stream:
296  file_list_stream[streamname] = open('files_%s.list' % streamname, 'w')
297  validate_list.write(rootpath + '\n')
298  file_on_scratch = os.path.join(outdir, os.path.basename(rootpath))
299  file_list.write(file_on_scratch + '\n')
300  file_list_stream[streamname].write(file_on_scratch + '\n')
301  events_list.write('%s %d \n' % (file_on_scratch, nev) )
302 
303  # Generate bookkeeping files pertaining to analysis files.
304 
305  for histfile in hists:
306  validate_list.write(histfile + '\n')
307  file_on_scratch = os.path.join(outdir, os.path.basename(histfile))
308  ana_file_list.write(file_on_scratch + '\n')
309 
310 
311 
312  validate_list.close()
313  file_list.close()
314  ana_file_list.close()
315  for streamname in list(file_list_stream.keys()):
316  file_list_stream[streamname].close()
317  events_list.close()
318 
319  #decide at this point if all the checks are ok. Write to missing_file_list first
320  missing_list.write('%d \n' %status)
321 
322  if status == 0:
323  bad_list.close()
324 
325  # begin SAM decleration
326 
327  if declare_file:
328 
329  # Declare artroot files.
330 
331  for rootfile in rootfiles:
332 
333  rootpath = rootfile[0]
334  fn = os.path.basename(rootpath)
335  declare_ok = False
336 
337  # Decide if we need to declare this file.
338  # It is OK if the file is already declared.
339  # In that case, do not try to declare it again.
340 
341  try:
342  md = samweb.getMetadata(fn)
343  if len(md) > 0:
344  declare_ok = True
345  print('File %s is already declared.' % fn)
346  except:
347  declare_ok = False
348 
349  if not declare_ok:
350  print('Declaring %s' % fn)
351  expSpecificMetaData = expMetaData(project_utilities.get_experiment(), rootpath)
352  md = expSpecificMetaData.getmetadata()
353 
354  # Decide if we want to override the internal parentage metadata.
355 
356  if maintain_parentage == 1:
357 
358  # Delete the old parents, if any.
359 
360  if 'parents' in md:
361  del md['parents']
362 
363  # change the parentage of the file based on it's parents and aunts from condor_lar
364 
365  jobs_parents = os.getenv('JOBS_PARENTS', '').split(" ")
366  jobs_aunts = os.getenv('JOBS_AUNTS', '').split(" ")
367  if(jobs_parents[0] != '' ):
368  md['parents'] = [{'file_name': parent} for parent in jobs_parents]
369  if(jobs_aunts[0] != '' ):
370  for aunt in jobs_aunts:
371  mixparent_dict = {'file_name': aunt}
372  if 'parents' not in md:
373  md['parents'] = []
374  md['parents'].append(mixparent_dict)
375 
376  if len(md) > 0:
377  project_utilities.test_kca()
378 
379  # Make lack of parent files a nonfatal error.
380  # This should probably be removed at some point.
381 
382  try:
383  samweb.declareFile(md=md)
384  declare_ok = True
385 
386  except samweb_cli.exceptions.SAMWebHTTPError as e:
387  print(e)
388  print('SAM declare failed.')
389  return 1
390 
391  except:
392  print('SAM declare failed.')
393  return 1
394 
395  else:
396  print('No sam metadata found for %s.' % fn)
397  declare_ok = False
398  status = 1
399 
400  if copy_to_dropbox == 1 and declare_ok:
401  print("Copying to Dropbox")
402  dropbox_dir = project_utilities.get_dropbox(fn)
403  rootPath = os.path.join(dropbox_dir, fn)
404  jsonPath = rootPath + ".json"
405  ifdh_cp(rootpath, rootPath)
406 
407  # Declare histogram files.
408 
409  for histpath in hists:
410 
411  declare_ok = False
412  fn = os.path.basename(histpath)
413 
414  # Decide if we need to declare this file.
415  # It is OK if the file is already declared.
416  # In that case, do not try to declare it again.
417 
418  try:
419  md = samweb.getMetadata(fn)
420  if len(md) > 0:
421  declare_ok = True
422  print('File %s is already declared.' % fn)
423  except:
424  declare_ok = False
425 
426  if not declare_ok:
427  print('Declaring %s' % fn)
428  json_file = os.path.join(logdir, fn + '.json')
429 
430  # Get metadata from json
431 
432  md = {}
433  if project_utilities.safeexist(json_file):
434  mdlines = project_utilities.saferead(json_file)
435  mdtext = ''
436  for line in mdlines:
437  mdtext = mdtext + line
438  try:
439  md = json.loads(mdtext)
440  except:
441  md = {}
442  pass
443 
444  if maintain_parentage == 1:
445 
446  # Delete the old parents, if any.
447 
448  if 'parents' in md:
449  del md['parents']
450 
451  # change the parentage of the file based on it's parents and aunts from condor_lar
452 
453  jobs_parents = os.getenv('JOBS_PARENTS', '').split(" ")
454  jobs_aunts = os.getenv('JOBS_AUNTS', '').split(" ")
455  if(jobs_parents[0] != '' ):
456  md['parents'] = [{'file_name': parent} for parent in jobs_parents]
457  if(jobs_aunts[0] != '' ):
458  for aunt in jobs_aunts:
459  mixparent_dict = {'file_name': aunt}
460  if 'parents' not in md:
461  md['parents'] = []
462  md['parents'].append(mixparent_dict)
463 
464  if len(md) > 0 and 'file_type' in md:
465  project_utilities.test_kca()
466 
467  # Make lack of parent files a nonfatal error.
468  # This should probably be removed at some point.
469 
470  try:
471  samweb.declareFile(md=md)
472  declare_ok = True
473 
474  except samweb_cli.exceptions.SAMWebHTTPError as e:
475  print(e)
476  print('SAM declare failed.')
477  declare_ok = False
478 
479  except:
480  print('SAM declare failed.')
481  declare_ok = False
482 
483  else:
484  print('No sam metadata found for %s.' % fn)
485  declare_ok = False
486 
487  if copy_to_dropbox == 1 and declare_ok:
488  print("Copying to Dropbox")
489  dropbox_dir = project_utilities.get_dropbox(fn)
490  rootPath = dropbox_dir + "/" + fn
491  jsonPath = rootPath + ".json"
492  ifdh_cp(histpath, rootPath)
493 
494  return status
495 
496  # something went wrong, so make a list of bad directories and potentially missing files
497  else:
498  # first get the subdir name on pnfs. this contains the job id
499  dir_on_scratch = os.path.basename(outdir)
500  print('Dir on scratch ' + dir_on_scratch)
501  bad_list.write('%s \n' % dir_on_scratch)
502  bad_list.close()
503  return status
504 
then if[["$THISISATEST"==1]]
Definition: neoSmazza.sh:95
do one_file $F done echo for F in find $TOP name CMakeLists txt print
def write
Definition: util.py:23
print OUTPUT<< EOF;< setup name="Default"version="1.0">< worldref="volWorld"/></setup ></gdml > EOF close(OUTPUT)
list
Definition: file_to_url.sh:28
open(RACETRACK) or die("Could not open file $RACETRACK for writing")

Variable Documentation

validate_in_job.extractor_dict = None

Definition at line 33 of file validate_in_job.py.

validate_in_job.proxy_ok = False

Definition at line 34 of file validate_in_job.py.

validate_in_job.samweb = None

Definition at line 32 of file validate_in_job.py.