All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sbndutil/python/extractor_dict.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 import sys
3 import getopt
4 import os
5 import subprocess
6 from subprocess import Popen, PIPE
7 import time
8 import samweb_cli
9 #from samweb_client.utility import fileEnstoreChecksum
10 import ast
11 import project_utilities
12 import root_metadata
13 
14 
15 def getmetadata(inputfile):
16  # Set up the experiment name for samweb Python API
17  samweb = samweb_cli.SAMWebClient(
18  experiment=project_utilities.get_experiment())
19 
20  # Extract metadata into a pipe.
21  local = project_utilities.path_to_local(inputfile)
22  if local != '':
23  proc = subprocess.Popen(
24  ["sam_metadata_dumper", "-H", local], stdout=subprocess.PIPE)
25  else:
26  url = project_utilities.path_to_url(inputfile)
27  proc = subprocess.Popen(
28  ["sam_metadata_dumper", "-H", url], stdout=subprocess.PIPE)
29  lines = proc.stdout.readlines()
30  if local != '' and local != inputfile:
31  os.remove(local)
32 
33  # Count the number of lines in the file (for later use!)
34  num_lines = len(lines)
35 
36  # define an empty python dictionary
37  md = {}
38 
39  # Read tbe columns from the file and fill the dictionary
40  c = 0
41  p = 0
42  parents = []
43  PName = False
44  gen = False
45  for line in lines:
46  c = c+1
47  columns = line.split(" ")
48  columns = [col.strip() for col in columns]
49  if c >= 4 and c <= num_lines-2:
50  if columns[1] == 'dataTier':
51  md['data_tier'] = columns[-1]
52  if columns[-1] == 'generated':
53  gen = True
54  elif columns[1] == 'endTime':
55  E = time.localtime(int(columns[-1]))
56  md['end_time'] = str(
57  E[0])+'-'+str(E[1])+'-'+str(E[2])+'T'+str(E[3])+':'+str(E[4])+':'+str(E[5])
58  elif columns[1] == 'startTime':
59  S = time.localtime(int(columns[-1]))
60  md['start_time'] = str(
61  S[0])+'-'+str(S[1])+'-'+str(S[2])+'T'+str(S[3])+':'+str(S[4])+':'+str(S[5])
62  elif columns[1] == 'group':
63  md['group'] = columns[-1]
64  elif columns[1] == 'eventCount':
65  md['event_count'] = columns[-1]
66  elif columns[1] == 'fclName':
67  md['fcl.name'] = columns[-1]
68  elif columns[1] == 'fclVersion':
69  md['fcl.version'] = columns[-1]
70  elif columns[1] == 'fileFormat':
71  md['file_format'] = columns[-1]
72  elif columns[1] == 'ubProjectStage':
73  md['ub_project.stage'] = columns[-1]
74  elif columns[1] == 'ubProjectVersion':
75  md['ub_project.version'] = columns[-1]
76  elif columns[1] == 'lastEvent':
77  md['last_event'] = columns[-1]
78  elif columns[1] == 'firstEvent':
79  md['first_event'] = columns[-1]
80  elif columns[1] == 'fileType':
81  md['file_type'] = columns[-1]
82  elif columns[1] == 'group':
83  md['group'] = columns[-1]
84  elif columns[1] == 'group':
85  md['group'] = columns[-1]
86  elif columns[1] == 'run':
87  run = columns[-1]
88  elif columns[1] == 'runType':
89  run_type = columns[-1]
90  elif columns[1] == 'applicationFamily':
91  app_family = columns[-1]
92  elif columns[1] == 'applicationVersion':
93  app_version = columns[-1]
94  elif columns[1] == 'process_name':
95  app_name = columns[-1]
96  elif columns[1] == 'ubProjectName':
97  PName = True
98  md['ub_project.name'] = columns[-1]
99  elif columns[1] == 'parent':
100  parents.append({'file_name': columns[-1]})
101 
102  # Get the other meta data field parameters
103  md['file_name'] = inputfile.split("/")[-1]
104  md['file_size'] = os.path.getsize(inputfile)
105  # For now, skip the checksum for dCache files.
106  md['crc'] = root_metadata.fileEnstoreChecksum(inputfile)
107  md['runs'] = [[run, run_type]]
108  md['application'] = {'family': app_family,
109  'name': app_name, 'version': app_version}
110  md['parents'] = parents
111 
112  # If ub_project.name is not in the internal metadata,
113  # for generator files, get the ub_project.name from the fcl_filename (without the '.fcl' part) for gen files.
114  # for all other stages, get this from the parents
115  if gen == True:
116  md['parents'] = []
117  if PName == False:
118  md['ub_project.name'] = md['fcl.name'].split(".fcl")[0]
119  else:
120  if PName == False:
121  if 'parents' in md:
122  parent = md['parents'][0]['file_name']
123  mdparent = samweb.getMetadata(parent)
124  if 'ub_project.name' in mdparent:
125  md['ub_project.name'] = mdparent['ub_project.name']
126 
127  return md
128 
129 
130 if __name__ == "__main__":
131  md = getmetadata(str(sys.argv[1]))
132  # print md
133  mdtext = samweb_cli.json.dumps(md, sys.stdout, indent=2, sort_keys=True)
134  print(mdtext)
135  sys.exit(0)
do one_file $F done echo for F in find $TOP name CMakeLists txt print