All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Functions | Variables
extractor_dict Namespace Reference

Functions

def getmetadata
 

Variables

tuple md = getmetadata(str(sys.argv[1]))
 
tuple mdtext = samweb_cli.json.dumps(md, sys.stdout, indent=2, sort_keys=True)
 

Function Documentation

def extractor_dict.getmetadata (   inputfile)

Definition at line 15 of file sbndutil/python/extractor_dict.py.

15 
16 def getmetadata(inputfile):
17  # Set up the experiment name for samweb Python API
18  samweb = samweb_cli.SAMWebClient(
19  experiment=project_utilities.get_experiment())
20 
21  # Extract metadata into a pipe.
22  local = project_utilities.path_to_local(inputfile)
23  if local != '':
24  proc = subprocess.Popen(
25  ["sam_metadata_dumper", "-H", local], stdout=subprocess.PIPE)
26  else:
27  url = project_utilities.path_to_url(inputfile)
28  proc = subprocess.Popen(
29  ["sam_metadata_dumper", "-H", url], stdout=subprocess.PIPE)
30  lines = proc.stdout.readlines()
31  if local != '' and local != inputfile:
32  os.remove(local)
33 
34  # Count the number of lines in the file (for later use!)
35  num_lines = len(lines)
36 
37  # define an empty python dictionary
38  md = {}
39 
40  # Read tbe columns from the file and fill the dictionary
41  c = 0
42  p = 0
43  parents = []
44  PName = False
45  gen = False
46  for line in lines:
47  c = c+1
48  columns = line.split(" ")
49  columns = [col.strip() for col in columns]
50  if c >= 4 and c <= num_lines-2:
51  if columns[1] == 'dataTier':
52  md['data_tier'] = columns[-1]
53  if columns[-1] == 'generated':
54  gen = True
55  elif columns[1] == 'endTime':
56  E = time.localtime(int(columns[-1]))
57  md['end_time'] = str(
58  E[0])+'-'+str(E[1])+'-'+str(E[2])+'T'+str(E[3])+':'+str(E[4])+':'+str(E[5])
59  elif columns[1] == 'startTime':
60  S = time.localtime(int(columns[-1]))
61  md['start_time'] = str(
62  S[0])+'-'+str(S[1])+'-'+str(S[2])+'T'+str(S[3])+':'+str(S[4])+':'+str(S[5])
63  elif columns[1] == 'group':
64  md['group'] = columns[-1]
65  elif columns[1] == 'eventCount':
66  md['event_count'] = columns[-1]
67  elif columns[1] == 'fclName':
68  md['fcl.name'] = columns[-1]
69  elif columns[1] == 'fclVersion':
70  md['fcl.version'] = columns[-1]
71  elif columns[1] == 'fileFormat':
72  md['file_format'] = columns[-1]
73  elif columns[1] == 'ubProjectStage':
74  md['ub_project.stage'] = columns[-1]
75  elif columns[1] == 'ubProjectVersion':
76  md['ub_project.version'] = columns[-1]
77  elif columns[1] == 'lastEvent':
78  md['last_event'] = columns[-1]
79  elif columns[1] == 'firstEvent':
80  md['first_event'] = columns[-1]
81  elif columns[1] == 'fileType':
82  md['file_type'] = columns[-1]
83  elif columns[1] == 'group':
84  md['group'] = columns[-1]
85  elif columns[1] == 'group':
86  md['group'] = columns[-1]
87  elif columns[1] == 'run':
88  run = columns[-1]
89  elif columns[1] == 'runType':
90  run_type = columns[-1]
91  elif columns[1] == 'applicationFamily':
92  app_family = columns[-1]
93  elif columns[1] == 'applicationVersion':
94  app_version = columns[-1]
95  elif columns[1] == 'process_name':
96  app_name = columns[-1]
97  elif columns[1] == 'ubProjectName':
98  PName = True
99  md['ub_project.name'] = columns[-1]
100  elif columns[1] == 'parent':
101  parents.append({'file_name': columns[-1]})
102 
103  # Get the other meta data field parameters
104  md['file_name'] = inputfile.split("/")[-1]
105  md['file_size'] = os.path.getsize(inputfile)
106  # For now, skip the checksum for dCache files.
107  md['crc'] = root_metadata.fileEnstoreChecksum(inputfile)
108  md['runs'] = [[run, run_type]]
109  md['application'] = {'family': app_family,
110  'name': app_name, 'version': app_version}
111  md['parents'] = parents
112 
113  # If ub_project.name is not in the internal metadata,
114  # for generator files, get the ub_project.name from the fcl_filename (without the '.fcl' part) for gen files.
115  # for all other stages, get this from the parents
116  if gen == True:
117  md['parents'] = []
118  if PName == False:
119  md['ub_project.name'] = md['fcl.name'].split(".fcl")[0]
120  else:
121  if PName == False:
122  if 'parents' in md:
123  parent = md['parents'][0]['file_name']
124  mdparent = samweb.getMetadata(parent)
125  if 'ub_project.name' in mdparent:
126  md['ub_project.name'] = mdparent['ub_project.name']
127 
128  return md
129 

Variable Documentation

tuple extractor_dict.md = getmetadata(str(sys.argv[1]))

Definition at line 131 of file sbndutil/python/extractor_dict.py.

tuple extractor_dict.mdtext = samweb_cli.json.dumps(md, sys.stdout, indent=2, sort_keys=True)

Definition at line 133 of file sbndutil/python/extractor_dict.py.