All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TFileMetadataICARUS_service.cc
Go to the documentation of this file.
1 ////////////////////////////////////////////////////////////////////////
2 // Name: TFileMetadataICARUS_service.cc.
3 //
4 // Purpose: generate ICARUS-specific sam metadata for root Tfiles (histogram or ntuple files).
5 //
6 // FCL parameters: GenerateTFileMetadata: This needs to be set to "true" in the fcl file
7 // to generate metadata (default value: false)
8 // dataTier: Currrently this needs to be parsed by the user
9 // for ntuples, dataTier = root-tuple;
10 // for histos, dataTier = root-histogram
11 // (default value: root-tuple)
12 // fileFormat: This is currently specified by the user,
13 // the fileFormat for Tfiles is "root" (default value: root)
14 //
15 // Other notes: 1. This service uses the ART's standard file_catalog_metadata service
16 // to extract some of the common (common to both ART and TFile outputs)
17 // job-specific metadata parameters, so, it is important to call this
18 // service in your fcl file
19 // stick this line in your "services" section of fcl file:
20 // FileCatalogMetadata: @local::art_file_catalog_mc
21 //
22 // 2. When you call FileCatalogMetadata service in your fcl file, and if
23 // you have (art) root Output section in your fcl file, and if you do not
24 // have "dataTier" specified in that section, then this service will throw
25 // an exception. To avoid this, either remove the entire root Output section
26 // in your fcl file (and remove art stream output from your end_paths) or
27 // include appropriate dataTier information in the section.If you are only
28 // running analysis job, best way is to not include any art root Output section.
29 //
30 // 3. This service is exclusively written to work with production (in other
31 // words for jobs submitted through grid). Some of the metadata parameters
32 // (output TFileName, filesize, Project related details) are captured/updated
33 // during and/or after the workflow.
34 //
35 //
36 // Created: 30-Dec-2019, M. Wospakrik
37 // based on the SBND version by D. Brailsford which is
38 // based on the ProtoDUNE version by T. Junk which is
39 // based on the MicroBooNE version by S. Gollapinni
40 //
41 ////////////////////////////////////////////////////////////////////////
42 
43 #include <algorithm>
44 #include <ctime>
45 #include <iomanip>
46 #include <iostream>
47 #include <fstream>
48 #include <sstream>
49 #include <stdio.h>
50 #include <string>
51 #include <vector>
52 #include <numeric> // std::accumulate
53 
56 
57 #include "art_root_io/RootDB/SQLite3Wrapper.h"
58 #include "art_root_io/RootDB/SQLErrMsg.h"
59 #include "art/Framework/Principal/Event.h"
60 #include "art/Framework/Principal/SubRun.h"
61 #include "art/Framework/Services/Registry/ServiceHandle.h"
62 #include "art/Framework/Services/Registry/ServiceDefinitionMacros.h"
63 #include "art/Framework/Services/System/FileCatalogMetadata.h"
64 #include "art/Framework/Services/System/TriggerNamesService.h"
65 #include "art/Utilities/OutputFileInfo.h"
66 #include "cetlib_except/exception.h"
67 #include "messagefacility/MessageLogger/MessageLogger.h"
68 
69 #include "TROOT.h"
70 #include "TFile.h"
71 #include "TTimeStamp.h"
72 
73 using namespace std;
74 
75 //--------------------------------------------------------------------
76 
77 // Constructor.
79  art::ActivityRegistry& reg):
80  fGenerateTFileMetadata{pset.get<bool>("GenerateTFileMetadata")},
81  fJSONFileName{pset.get<std::string>("JSONFileName")},
82  fFileStats{"", art::ServiceHandle<art::TriggerNamesService const>{}->getProcessName()}
83 {
85  md.fdata_tier = pset.get<std::string>("dataTier");
86  md.ffile_format = pset.get<std::string>("fileFormat");
87 
88  reg.sPostBeginJob.watch(this, &TFileMetadataICARUS::postBeginJob);
89  reg.sPostOpenFile.watch(this, &TFileMetadataICARUS::postOpenInputFile);
90  reg.sPostCloseFile.watch(this, &TFileMetadataICARUS::postCloseInputFile);
91  reg.sPostProcessEvent.watch(this, &TFileMetadataICARUS::postEvent);
92  reg.sPostBeginSubRun.watch(this, &TFileMetadataICARUS::postBeginSubRun);
93  }
94 }
95 
96 //--------------------------------------------------------------------
97 // PostBeginJob callback.
98 // Insert per-job metadata via TFileMetadata service.
100 {
101  std::cout << "TFileMetadataICARUS postBeginJob() begin" << std::endl;
102  // get the start time
103  md.fstart_time = time(0);
104 
105  // Get art metadata service and extract paramters from there
106  art::ServiceHandle<art::FileCatalogMetadata> artmds;
107 
108  art::FileCatalogMetadata::collection_type artmd;
109  artmds->getMetadata(artmd);
110 
111  for(auto const & d : artmd)
112  mdmap[d.first] = d.second;
113 
114  std::map<std::string,std::string>::iterator it;
115 
116  // if a certain paramter/key is not found, assign an empty string value to it
117 
118  if ((it=mdmap.find("applicationFamily"))!=mdmap.end()) std::get<0>(md.fapplication) = it->second;
119  else std::get<0>(md.fapplication) = "\" \"";
120 
121  if ((it=mdmap.find("process_name"))!=mdmap.end()) std::get<1>(md.fapplication) = it->second;
122  else std::get<1>(md.fapplication) = "\" \"";
123 
124  if ((it=mdmap.find("applicationVersion"))!=mdmap.end()) std::get<2>(md.fapplication) = it->second;
125  else std::get<2>(md.fapplication) = "\" \"";
126 
127  if ((it=mdmap.find("group"))!=mdmap.end()) md.fgroup = it->second;
128  else md.fgroup = "\" \"";
129 
130  if ((it=mdmap.find("file_type"))!=mdmap.end()) md.ffile_type = it->second;
131  else md.ffile_type = "\" \"";
132 
133  if ((it=mdmap.find("runType"))!=mdmap.end()) frunType = it->second;
134  else frunType = "\" \"";
135  std::cout << "TFileMetadataICARUS postBeginJob() end" << std::endl;
136 }
137 
138 
139 //--------------------------------------------------------------------
140 // PostOpenInputFile callback.
142 {
143  std::cout << "TFileMetadataICARUS postOpenInputFile() begin" << std::endl;
144  // save parent input files here
145  // 08/06 DBrailsford: Only save the parent string if the string is filled. The string still exists (with 0 characters) for generation stage files. See redmine issue 20124
146  if (fn.length() > 0) md.fParents.insert(fn);
147  fFileStats.recordInputFile(fn);
148  std::cout << "TFileMetadataICARUS postOpenInputFile() end" << std::endl;
149 }
150 
151 //--------------------------------------------------------------------
152 // PostEvent callback.
153 void icarusutil::TFileMetadataICARUS::postEvent(art::Event const& evt, art::ScheduleContext)
154 {
155  std::cout << "TFileMetadataICARUS postEvent() begin" << std::endl;
156  art::RunNumber_t run = evt.run();
157  art::SubRunNumber_t subrun = evt.subRun();
158  art::EventNumber_t event = evt.event();
159  art::SubRunID srid = evt.id().subRunID();
160 
161  // save run, subrun and runType information once every subrun
162  if (fSubRunNumbers.count(srid) == 0){
163  fSubRunNumbers.insert(srid);
164  md.fruns.push_back(make_tuple(run, subrun, frunType));
165  }
166 
167  // save the first event
168  if (md.fevent_count == 0) md.ffirst_event = event;
169  md.flast_event = event;
170  // event counter
171  ++md.fevent_count;
172 
173  std::cout << "TFileMetadataICARUS postEvent() end" << std::endl;
174 }
175 
176 //--------------------------------------------------------------------
177 // PostSubRun callback.
179 {
180  std::cout << "TFileMetadataICARUS postBeginSubRun() begin" << std::endl;
181  art::RunNumber_t run = sr.run();
182  art::SubRunNumber_t subrun = sr.subRun();
183  art::SubRunID srid = sr.id();
184 
185  // save run, subrun and runType information once every subrun
186  if (fSubRunNumbers.count(srid) == 0){
187  fSubRunNumbers.insert(srid);
188  md.fruns.push_back(make_tuple(run, subrun, frunType));
189  }
190  std::cout << "TFileMetadataICARUS postBeginSubRun() end" << std::endl;
191 }
192 
193 //--------------------------------------------------------------------
194 // PostCloseFile callback.
196 {
197  std::cout << "TFileMetadataICARUS postCloseInputFile() begin" << std::endl;
198  // get metadata from the FileCatalogMetadataICARUS service, which is filled on its construction
199 
200  art::ServiceHandle<icarusutil::FileCatalogMetadataICARUS> paramhandle;
201  md.fFCLName = paramhandle->GetFCLName();
202  md.fProjectName = paramhandle->GetProjectName();
203  md.fProjectStage = paramhandle->GetProjectStage();
204  md.fProjectVersion = paramhandle->GetProjectVersion();
205  md.fProjectSoftware = paramhandle->GetProjectSoftware();
206  md.fProductionName = paramhandle->GetProductionName();
207  md.fProductionType = paramhandle->GetProductionType();
208 
209  //update end time
210  md.fend_time = time(0);
211 
212  // convert start and end times into time format: Year-Month-DayTHours:Minutes:Seconds
213  char endbuf[80], startbuf[80];
214  struct tm tstruct;
215  tstruct = *localtime(&md.fend_time);
216  strftime(endbuf,sizeof(endbuf),"%Y-%m-%dT%H:%M:%S",&tstruct);
217  tstruct = *localtime(&md.fstart_time);
218  strftime(startbuf,sizeof(startbuf),"%Y-%m-%dT%H:%M:%S",&tstruct);
219 
220  // open a json file and write everything from the struct md complying to the
221  // samweb json format. This json file holds the below information temporarily.
222  // If you submitted a grid job invoking this service, the information from
223  // this file is appended to a final json file and this file will be removed
224 
225  std::ofstream jsonfile;
226  jsonfile.open(fJSONFileName);
227  jsonfile<<"{\n \"application\": {\n \"family\": "<<std::get<0>(md.fapplication)<<",\n \"name\": ";
228  jsonfile<<std::get<1>(md.fapplication)<<",\n \"version\": "<<std::get<2>(md.fapplication)<<"\n },\n ";
229  jsonfile<<"\"data_tier\": \""<<md.fdata_tier<<"\",\n ";
230  jsonfile<<"\"event_count\": "<<md.fevent_count<<",\n ";
231  jsonfile<<"\"file_format\": \""<<md.ffile_format<<"\",\n ";
232  jsonfile<<"\"file_type\": "<<md.ffile_type<<",\n ";
233  jsonfile<<"\"first_event\": "<<md.ffirst_event<<",\n ";
234  jsonfile<<"\"group\": "<<md.fgroup<<",\n ";
235  jsonfile<<"\"last_event\": "<<md.flast_event<<",\n ";
236  unsigned int c=0;
237  //08/06 DBrailsford. Only create the parent json object if there are parent names in the set.
238  if (md.fParents.size() > 0){
239  jsonfile<<"\"parents\": [\n";
240  for(auto parent : md.fParents) {
241  std::cout<<"Parent " << c << ": " << parent << std::endl;
242  c++;
243  size_t n = parent.find_last_of('/');
244  size_t f1 = (n == std::string::npos ? 0 : n+1);
245  jsonfile<<" {\n \"file_name\": \""<<parent.substr(f1)<<"\"\n }";
246  if (md.fParents.size()==1 || c==md.fParents.size()) jsonfile<<"\n";
247  else jsonfile<<",\n";
248  }
249  jsonfile<<" ],\n ";
250  c=0;
251  }
252  jsonfile<<"\"runs\": [\n";
253  for(auto &t : md.fruns){
254  c++;
255  jsonfile<<" [\n "<<std::get<0>(t)<<",\n "<<std::get<1>(t)<<",\n "<<std::get<2>(t)<<"\n ]";
256  if (md.fruns.size()==1 || c==md.fruns.size()) jsonfile<<"\n";
257  else jsonfile<<",\n";
258  }
259  jsonfile<<" ],\n";
260 
261  if (md.fFCLName!="") jsonfile << "\"fcl.name\": \"" << md.fFCLName << "\",\n";
262  if (md.fProjectName!="") jsonfile << "\"icarus_project.name\": \"" << md.fProjectName << "\",\n";
263  if (md.fProjectStage!="") jsonfile << "\"icarus_project.stage\": \"" << md.fProjectStage << "\",\n";
264  if (md.fProjectVersion!="") jsonfile << "\"icarus_project.version\": \"" << md.fProjectVersion << "\",\n";
265  if (md.fProjectSoftware!="") jsonfile << "\"icarus_project.software\": \"" << md.fProjectSoftware << "\",\n";
266  if (md.fProductionName!="") jsonfile << "\"production.name\": \"" << md.fProductionName << "\",\n";
267  if (md.fProductionType!="") jsonfile << "\"production.type\": \"" << md.fProductionType << "\",\n";
268 
269  // put these at the end because we know they'll be there and the last one needs to not have a comma
270  jsonfile<<"\"start_time\": \""<<startbuf<<"\",\n";
271  jsonfile<<"\"end_time\": \""<<endbuf<<"\"\n";
272 
273 
274  jsonfile<<"}\n";
275  jsonfile.close();
276 
277  fFileStats.recordFileClose();
278  //TODO figure out how to make the name identical to the TFile
279  //std::string new_name = fRenamer.maybeRenameFile("myjson.json",fJSONFileName);
280  std::cout << "TFileMetadataICARUS postCloseInputFile() end" << std::endl;
281 }
282 
283 DEFINE_ART_SERVICE(icarusutil::TFileMetadataICARUS)
TFileMetadataICARUS(fhicl::ParameterSet const &pset, art::ActivityRegistry &reg)
double std(const std::vector< short > &wf, const double ped_mean, size_t start, size_t nsample)
Definition: UtilFunc.cxx:42
then if[["$THISISATEST"==1]]
Definition: neoSmazza.sh:95
std::set< art::SubRunID > fSubRunNumbers
std::tuple< std::string, std::string, std::string > fapplication
void postBeginSubRun(art::SubRun const &subrun)
void postOpenInputFile(std::string const &fn)
std::vector< std::tuple< art::RunNumber_t, art::SubRunNumber_t, std::string > > fruns
art::FileStatsCollector fFileStats
void postEvent(art::Event const &ev, art::ScheduleContext)
TCEvent evt
Definition: DataStructs.cxx:8
std::map< std::string, std::string > mdmap
BEGIN_PROLOG could also be cout