All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MetadataSBN_service.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 // Name: MetadataSBN_service.cc.
3 //
4 // Purpose: generate SBN-specific sam metadata for root Tfiles (histogram or ntuple files).
5 //
6 // FCL parameters: Experiment: Experiment name ("sbnd" or "icarus").
7 // JSONFileName: Name of generated .json file(s).
8 // dataTier: Data tier(s).
9 // for ntuples, dataTier = root-tuple;
10 // for histos, dataTier = root-histogram
11 // (default value: root-tuple)
12 // fileFormat: This is currently specified by the user,
13 // the fileFormat for Tfiles is "root" (default value: root)
14 // Merge: Merge flag.
15 // 1 - Set merge.merge = 1 and merge.merged = 0
16 // 0 - Set merge.merge = 0 and merge.merged = 0
17 // -1 - Do not generate merge parameters.
18 // POTModuleLabel - POTSummary module label (default "generator").
19 //
20 // Parameters JSONFileName, dataTier, and fileFormat can be single
21 // stringss or sequences of strings. In case of sequences of length
22 // greater than one, multiple json files will be generated. Sequences
23 // must be equal length.
24 //
25 // Other notes: 1. This service uses the ART's standard file_catalog_metadata service
26 // to extract some of the common (common to both ART and TFile outputs)
27 // job-specific metadata parameters, so, it is important to call this
28 // service in your fcl file
29 // stick this line in your "services" section of fcl file:
30 // FileCatalogMetadata: @local::art_file_catalog_mc
31 //
32 // 2. When you call FileCatalogMetadata service in your fcl file, and if
33 // you have (art) root Output section in your fcl file, and if you do not
34 // have "dataTier" specified in that section, then this service will throw
35 // an exception. To avoid this, either remove the entire root Output section
36 // in your fcl file (and remove art stream output from your end_paths) or
37 // include appropriate dataTier information in the section.If you are only
38 // running analysis job, best way is to not include any art root Output section.
39 //
40 // 3. This service is exclusively written to work with production (in other
41 // words for jobs submitted through grid). Some of the metadata parameters
42 // (output TFileName, filesize, Project related details) are captured/updated
43 // during and/or after the workflow.
44 //
45 //
46 // Created: 21-Feb-2018, D. Brailsford
47 // based on the SBND version by T. Junk which is based on the
48 // based on the MicroBooNE example by S. Gollapinni
49 //
50 ////////////////////////////////////////////////////////////////////////
51 
52 #include <algorithm>
53 #include <ctime>
54 #include <iomanip>
55 #include <iostream>
56 #include <fstream>
57 #include <sstream>
58 #include <string>
59 #include <vector>
60 
64 
65 #include "art/Framework/Principal/Event.h"
66 #include "art/Framework/Principal/SubRun.h"
67 #include "art/Framework/Services/Registry/ServiceHandle.h"
68 #include "art/Framework/Services/Registry/ServiceDefinitionMacros.h"
69 #include "art/Framework/Services/System/FileCatalogMetadata.h"
70 #include "art/Framework/Services/System/TriggerNamesService.h"
71 #include "art/Utilities/OutputFileInfo.h"
72 #include "cetlib_except/exception.h"
73 #include "messagefacility/MessageLogger/MessageLogger.h"
74 
75 #include "TROOT.h"
76 #include "TFile.h"
77 #include "TTimeStamp.h"
78 
79 using namespace std;
80 
81 //--------------------------------------------------------------------
82 
83 // Constructor.
84 util::MetadataSBN::MetadataSBN(fhicl::ParameterSet const& pset,
85  art::ActivityRegistry& reg):
86  fFileStats{"", art::ServiceHandle<art::TriggerNamesService const>{}->getProcessName()}
87 {
88  // Insist on configuring Experiment from the fcl file (ideally) or the
89  // environment.
90  const char* expt = getenv("EXPERIMENT");
91  if(expt) fExperiment = pset.get<std::string>("Experiment", expt); else fExperiment = pset.get<std::string>("Experiment");
92  std::transform(fExperiment.begin(), fExperiment.end(), fExperiment.begin(), [](unsigned char c){return std::tolower(c);});
93 
94  // Get scalar/vector parameters.
95  if(pset.is_key_to_atom("JSONFileName"))
96  fJSONFileName.push_back(pset.get<std::string>("JSONFileName"));
97  else if(pset.is_key_to_sequence("JSONFileName"))
98  fJSONFileName = pset.get<std::vector<std::string> >("JSONFileName");
99 
100  if(pset.is_key_to_atom("dataTier"))
101  fDataTier.push_back(pset.get<std::string>("dataTier"));
102  else if(pset.is_key_to_sequence("dataTier"))
103  fDataTier = pset.get<std::vector<std::string> >("dataTier");
104  if(fDataTier.size() != fJSONFileName.size())
105  throw cet::exception("MetadataSBN") << "FCL sequence size mismatch.\n";
106 
107  if(pset.is_key_to_atom("fileFormat"))
108  fFileFormat.push_back(pset.get<std::string>("fileFormat"));
109  else if(pset.is_key_to_sequence("fileFormat"))
110  fFileFormat = pset.get<std::vector<std::string> >("fileFormat");
111  if(fFileFormat.size() != fJSONFileName.size())
112  throw cet::exception("MetadataSBN") << "FCL sequence size mismatch.\n";
113 
114  if(pset.has_key("Merge")) {
115  if(pset.is_key_to_atom("Merge"))
116  fMerge.push_back(pset.get<int>("Merge"));
117  else if(pset.is_key_to_sequence("Merge"))
118  fMerge = pset.get<std::vector<int> >("Merge");
119  }
120  fPOTModuleLabel = pset.get<std::string>("POTModuleLabel", "generator");
121 
122  reg.sPostBeginJob.watch(this, &MetadataSBN::postBeginJob);
123  reg.sPostOpenFile.watch(this, &MetadataSBN::postOpenInputFile);
124  reg.sPostCloseFile.watch(this, &MetadataSBN::postCloseInputFile);
125  reg.sPostProcessEvent.watch(this, &MetadataSBN::postEvent);
126  reg.sPostBeginSubRun.watch(this, &MetadataSBN::postBeginSubRun);
127  reg.sPostEndSubRun.watch(this, &MetadataSBN::postEndSubRun);
128 
129  // get metadata from the FileCatalogMetadataSBN service, which is filled on its construction
130  art::ServiceHandle<util::FileCatalogMetadataSBN> paramhandle;
131  md.fFCLName = paramhandle->GetFCLName();
132  md.fProjectName = paramhandle->GetProjectName();
133  md.fProjectStage = paramhandle->GetProjectStage();
134  md.fProjectVersion = paramhandle->GetProjectVersion();
135  md.fProjectSoftware = paramhandle->GetProjectSoftware();
136  md.fProductionName = paramhandle->GetProductionName();
137  md.fProductionType = paramhandle->GetProductionType();
138  md.merge = -1;
139  md.fTotPOT = 0.;
140 }
141 
142 /// Un-quote quoted strings
143 std::string UnQuoteString(std::string s)
144 {
145  if(s.size() < 2 || s[0] != '\"' || s[s.size()-1] != '\"') return s;
146  s.erase(0, 1);
147  s.erase(s.size()-1, 1);
148  return s;
149 }
150 
151 void MaybeCopyFromMap(const std::map<std::string, std::string>& in,
152  const std::string& key,
153  std::string& out)
154 {
155  const auto it = in.find(key);
156  if(it == in.end()){
157  out = "";
158  }
159  else{
160  out = UnQuoteString(it->second);
161  }
162 }
163 
164 void MaybeCopyToMap(const std::string& in,
165  const std::string& key,
166  std::map<std::string, std::string>& out)
167 {
168  if(!in.empty()) out[key] = UnQuoteString(in);
169 }
170 
171 //--------------------------------------------------------------------
172 // PostBeginJob callback.
173 // Insert per-job metadata via Metadata service.
175 {
176  // get the start time
177  md.fstart_time = time(0);
178 
179  // Get art metadata service and extract paramters from there
180  art::ServiceHandle<art::FileCatalogMetadata> artmds;
181 
182  art::FileCatalogMetadata::collection_type artmd;
183  artmds->getMetadata(artmd);
184 
185  std::map<std::string, std::string> mdmap;
186  for(const auto& d: artmd)
187  mdmap[d.first] = UnQuoteString(d.second);
188 
189  // if a certain paramter/key is not found, assign an empty string value to it
190  MaybeCopyFromMap(mdmap, "application.family", std::get<0>(md.fapplication));
191  MaybeCopyFromMap(mdmap, "art.process_name", std::get<1>(md.fapplication));
192  MaybeCopyFromMap(mdmap, "application.version", std::get<2>(md.fapplication));
193  MaybeCopyFromMap(mdmap, "group", md.fgroup);
194  MaybeCopyFromMap(mdmap, "file_type", md.ffile_type);
195  MaybeCopyFromMap(mdmap, "art.run_type", frunType);
196 }
197 
198 
199 //--------------------------------------------------------------------
200 // PostOpenFile callback.
201 void util::MetadataSBN::postOpenInputFile(std::string const& fn)
202 {
203  // save parent input files here
204  // 08/06 DBrailsford: Only save the parent string if the string is filled. The string still exists (with 0 characters) for generation stage files. See redmine issue 20124
205  if (fn.length() > 0) md.fParents.insert(fn);
206  fFileStats.recordInputFile(fn);
207 }
208 
209 //--------------------------------------------------------------------
210 // PostEvent callback.
211 void util::MetadataSBN::postEvent(art::Event const& evt, art::ScheduleContext)
212 {
213  art::RunNumber_t run = evt.run();
214  art::SubRunNumber_t subrun = evt.subRun();
215  art::EventNumber_t event = evt.event();
216  art::SubRunID srid = evt.id().subRunID();
217 
218  // save run, subrun and runType information once every subrun
219  if (fSubRunNumbers.count(srid) == 0){
220  fSubRunNumbers.insert(srid);
221  md.fruns.push_back(make_tuple(run, subrun, frunType));
222  }
223 
224  // save the first event
225  if (md.fevent_count == 0) md.ffirst_event = event;
226  md.flast_event = event;
227  // event counter
228  ++md.fevent_count;
229 
230 }
231 
232 //--------------------------------------------------------------------
233 // PostSubRun callback.
234 void util::MetadataSBN::postBeginSubRun(art::SubRun const& sr)
235 {
236  art::RunNumber_t run = sr.run();
237  art::SubRunNumber_t subrun = sr.subRun();
238  art::SubRunID srid = sr.id();
239 
240  // save run, subrun and runType information once every subrun
241  if (fSubRunNumbers.count(srid) == 0){
242  fSubRunNumbers.insert(srid);
243  md.fruns.push_back(make_tuple(run, subrun, frunType));
244  }
245 }
246 
247 //--------------------------------------------------------------------
248 // PostEndSubRun callback.
249 void util::MetadataSBN::postEndSubRun(art::SubRun const& sr)
250 {
251  art::Handle< sumdata::POTSummary > potListHandle;
252  double fTotPOT = 0;
253  if(sr.getByLabel(fPOTModuleLabel,potListHandle)){
254  fTotPOT+=potListHandle->totpot;
255  }
256 
257  md.fTotPOT += fTotPOT;
258 }
259 
260 
261 //--------------------------------------------------------------------
262 std::string Escape(const std::string& s)
263 {
264  // If it's formatted as a dict or list, trust it's already formatted
265  if(s.size() >= 2 && ((s[0] == '{' && s.back() == '}') || (s[0] == '[' && s.back() == ']'))) return s;
266 
267  // otherwise quote it
268  return "\""+s+"\"";
269 }
270 
271 //--------------------------------------------------------------------
273 {
274  if(md.fParents.empty()) return "";
275 
276  unsigned int c = 0;
277 
278  std::string ret = "[\n";
279  for(auto parent: md.fParents) {
280  std::cout<<"Parent " << c << ": " << parent << std::endl;
281  c++;
282  size_t n = parent.find_last_of('/');
283  size_t f1 = (n == std::string::npos ? 0 : n+1);
284  ret += " {\n \"file_name\": \"" + parent.substr(f1) + "\"\n }";
285  if(md.fParents.size() == 1 || c == md.fParents.size()) ret += "\n";
286  else ret += ",\n";
287  }
288 
289  ret += " ]";
290  return ret;
291 }
292 
293 //--------------------------------------------------------------------
295 {
296  unsigned int c = 0;
297 
298  std::string ret = "[\n";
299  for(auto&t :md.fruns){
300  c++;
301  ret += " [\n " + std::to_string(std::get<0>(t)) + ",\n " + std::to_string(std::get<1>(t)) + ",\n \"" + std::get<2>(t) + "\"\n ]";
302  if(md.fruns.size() == 1 || c == md.fruns.size()) ret += "\n";
303  else ret += ",\n";
304  }
305  ret += " ]";
306  return ret;
307 }
308 
309 //--------------------------------------------------------------------
310 void util::MetadataSBN::GetMetadataMaps(std::map<std::string, std::string>& strs,
311  std::map<std::string, int>& ints,
312  std::map<std::string, double>& doubles,
313  std::map<std::string, std::string>& objs)
314 {
315  strs.clear(); ints.clear(); doubles.clear(); objs.clear();
316 
317  objs["application"] = "{\"family\": \""+std::get<0>(md.fapplication)+"\", \"name\": \""+std::get<1>(md.fapplication)+"\", \"version\": \""+std::get<2>(md.fapplication)+"\"}";
318 
319  if(!md.fParents.empty()) objs["parents"] = GetParentsString();
320  if(!md.fruns.empty()) objs["runs"] = GetRunsString();
321 
322  // convert start and end times into time format: Year-Month-DayTHours:Minutes:Seconds
323  char endbuf[80], startbuf[80];
324  struct tm tstruct;
325  tstruct = *localtime(&md.fend_time);
326  strftime(endbuf,sizeof(endbuf),"%Y-%m-%dT%H:%M:%S",&tstruct);
327  tstruct = *localtime(&md.fstart_time);
328  strftime(startbuf,sizeof(startbuf),"%Y-%m-%dT%H:%M:%S",&tstruct);
329 
330  strs["start_time"] = startbuf;
331  strs["end_time"] = endbuf;
332 
333  strs["data_tier"] = md.fdata_tier;
334  ints["event_count"] = md.fevent_count;
335  strs["file_format"] = md.ffile_format;
336  ints["first_event"] = md.ffirst_event;
337  ints["last_event"] = md.flast_event;
338 
339  const std::string proj = fExperiment+"_project";
340  MaybeCopyToMap(md.fFCLName, "fcl.name", strs);
341  MaybeCopyToMap(md.fProjectName, proj+".name", strs);
342  MaybeCopyToMap(md.fProjectStage, proj+".stage", strs);
343  MaybeCopyToMap(md.fProjectVersion, proj+".version", strs);
344  MaybeCopyToMap(md.fProjectSoftware, proj+".software", strs);
345  MaybeCopyToMap(md.fProductionName, "production.name", strs);
346  MaybeCopyToMap(md.fProductionType, "production.type", strs);
347 
348  MaybeCopyToMap(md.fgroup, "group", strs);
349  MaybeCopyToMap(md.ffile_type, "file_type", strs);
350 
351  if(md.merge >= 0) {
352  ints["merge.merge"] = (md.merge==0 ? 0 : 1);
353  ints["merge.merged"] = 0;
354  }
355  doubles["mc.pot"] = md.fTotPOT;
356 }
357 
358 //--------------------------------------------------------------------
359 // PostCloseFile callback.
361 {
362  //update end time
363  md.fend_time = time(0);
364 
365  // Loop over files.
366 
367  for(unsigned int i=0; i<fJSONFileName.size(); ++i) {
368 
369  // Update per-file metadata.
370 
371  md.fdata_tier = fDataTier[i];
372  md.ffile_format = fFileFormat[i];
373  if(fMerge.size() > i)
374  md.merge = fMerge[i];
375  else
376  md.merge = -1;
377 
378 
379  std::map<std::string, std::string> strs;
380  std::map<std::string, int> ints;
381  std::map<std::string, double> doubles;
382  std::map<std::string, std::string> objs;
383  GetMetadataMaps(strs, ints, doubles, objs);
384 
385  // open a json file and write everything from the struct md complying to the
386  // samweb json format. This json file holds the below information temporarily.
387  // If you submitted a grid job invoking this service, the information from
388  // this file is appended to a final json file and this file will be removed
389 
390  if(!fJSONFileName[i].empty()){
391  std::ofstream jsonfile;
392  jsonfile.open(fJSONFileName[i]);
393  jsonfile << "{\n";
394 
395  bool once = true;
396  for(auto& it: objs){
397  if(!once) jsonfile << ",\n";
398  once = false;
399  jsonfile << " \"" << it.first << "\": " << it.second;
400  }
401  for(auto& it: strs){
402  // Have to escape string outputs
403  jsonfile << ",\n \"" << it.first << "\": \"" << it.second << "\"";
404  }
405  for(auto& it: ints){
406  jsonfile << ",\n \"" << it.first << "\": " << it.second;
407  }
408  for(auto& it: doubles){
409  jsonfile << ",\n \"" << it.first << "\": " << it.second;
410  }
411 
412  jsonfile<<"\n}\n";
413  jsonfile.close();
414  }
415  }
416 
417  fFileStats.recordFileClose();
418  //TODO figure out how to make the name identical to the TFile
419  //std::string new_name = fRenamer.maybeRenameFile("myjson.json",fJSONFileName);
420 }
421 
422 DEFINE_ART_SERVICE(util::MetadataSBN)
void GetMetadataMaps(std::map< std::string, std::string > &strs, std::map< std::string, int > &ints, std::map< std::string, double > &doubles, std::map< std::string, std::string > &objs)
double std(const std::vector< short > &wf, const double ped_mean, size_t start, size_t nsample)
Definition: UtilFunc.cxx:42
static constexpr Sample_t transform(Sample_t sample)
void postEndSubRun(art::SubRun const &subrun)
void postEvent(art::Event const &ev, art::ScheduleContext)
std::string Escape(const std::string &s)
void postOpenInputFile(std::string const &fn)
MetadataSBN(fhicl::ParameterSet const &pset, art::ActivityRegistry &reg)
void postBeginSubRun(art::SubRun const &subrun)
std::string GetParentsString() const
if &&[-z"$BASH_VERSION"] then echo Attempting to switch to bash bash shellSwitch exit fi &&["$1"= 'shellSwitch'] shift declare a IncludeDirectives for Dir in
std::string to_string(WindowPattern const &pattern)
then echo File list $list not found else cat $list while read file do echo $file sed s
Definition: file_to_url.sh:60
void MaybeCopyFromMap(const std::map< std::string, std::string > &in, const std::string &key, std::string &out)
void MaybeCopyToMap(const std::string &in, const std::string &key, std::map< std::string, std::string > &out)
std::string GetRunsString() const
TCEvent evt
Definition: DataStructs.cxx:8
bool empty(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:555
BEGIN_PROLOG could also be cout
std::string UnQuoteString(std::string s)
Un-quote quoted strings.