All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sbndpoms_caf_concatenation.sh
Go to the documentation of this file.
1 #!/bin/bash
2 
3 # Define the output directories
4 # These will be appended with metadata info
5 WORKDIR="/pnfs/sbnd/scratch/$USER/mcp"
6 OUTDIR="/pnfs/sbnd/persistent/$USER/mcp"
7 # Concatenate files together up to 1GB
8 MAXSIZE=1000000000
9 
10 while :; do
11  case $1 in
12  -h|-\?|--help)
13  show_help # Display a usage synopsis.
14  exit
15  ;;
16  --defname) # Takes an option argument; ensure it has been specified.
17  if [ "$2" ]
18  then
19  DEFNAME="$2"
20  shift 2
21  else
22  echo "$0 ERROR: defname requires a non-empty option argument."
23  exit 1
24  fi
25  ;;
26  --workdir) # Takes an option argument; ensure it has been specified.
27  if [ "$2" ]
28  then
29  WORKDIR="$2"
30  shift 2
31  else
32  echo "$0 ERROR: workdir requires a non-empty option argument."
33  exit 1
34  fi
35  ;;
36  --outdir) # Takes an option argument; ensure it has been specified.
37  if [ "$2" ]
38  then
39  OUTDIR="$2"
40  shift 2
41  else
42  echo "$0 ERROR: outdir requires a non-empty option argument."
43  exit 1
44  fi
45  ;;
46  --size) # Takes an option argument; ensure it has been specified.
47  if [ "$2" ]
48  then
49  MAXSIZE="$2"
50  shift 2
51  else
52  echo "$0 ERROR: size requires a non-empty option argument."
53  exit 1
54  fi
55  ;;
56  --flatten) # Takes an option argument; ensure it has been specified.
57  FLATTEN=true
58  shift
59  ;;
60  --clean) # Takes an option argument; ensure it has been specified.
61  CLEAN=true
62  shift
63  ;;
64  *) break
65  esac
66 done
67 
68 if [ -z "$DEFNAME" ]
69 then
70  echo "$0 ERROR: defname is mandatory"
71  exit 2
72 fi
73 
74 if [ -z "$WORKDIR" ]
75 then
76  echo "$0 ERROR: workdir is mandatory"
77  exit 2
78 fi
79 
80 if [ -z "$OUTDIR" ]
81 then
82  echo "$0 ERROR: outdir is mandatory"
83  exit 2
84 fi
85 
86 if [ -z "$MAXSIZE" ]
87 then
88  echo "$0 ERROR: size is mandatory"
89  exit 2
90 fi
91 
92 echo "Running with settings:"
93 echo " Definition: $DEFNAME"
94 echo " Workdir: $WORKDIR"
95 echo " Outdir: $OUTDIR"
96 echo " Max Size: $(numfmt --to=si $MAXSIZE)"
97 if [ ! -z "$FLATTEN" ]
98 then
99  echo " Making Flat CAFS"
100 fi
101 if [ ! -z "$CLEAN" ]
102 then
103  echo " Cleaning all outputs"
104 fi
105 
106 prepare()
107 {
108  # Assume that the metadata is roughly the same in all files, so take the first
109  FILENAME=$(samweb -e sbnd list-files "defname: $DEFNAME with limit 1")
110  samweb -e sbnd get-metadata $FILENAME --json > base.json
111 
112  # Lets change "caf" to "concat_caf" for the stage and defname
113  MDPRODUCTIONDEFNAME=${DEFNAME/caf/concat_caf}
114 
115  # Now we want to set some global vars to define the ouput path
116  MDFILETYPE=$(jq -r '."file_type"' base.json)
117  MDPRODUCTIONTYPE=$(jq -r '."production.type"' base.json)
118  MDPRODUCTIONNAME=$(jq -r '."production.name"' base.json)
119  MDSBNDPROJECTNAME=$(jq -r '."sbnd_project.name"' base.json)
120  MDSBNDPROJECTVERSION=$(jq -r '."sbnd_project.version"' base.json)
121  MDSBNDPROJECTSTAGE=$(jq -r '."sbnd_project.stage"' base.json)
122 
123  if [ $(samweb -e sbnd list-definitions | grep "$MDPRODUCTIONDEFNAME") ]
124  then
125  if [ ! -z $CLEAN ]
126  then
127  echo "Deleting SAM Definition $MDPRODUCTIONDEFNAME"
128  for FILE in $(samweb -e sbnd list-definition-files $MDPRODUCTIONDEFNAME);
129  do
130  ifdh rm $(samweb -e sbnd locate-file $FILE | sed 's/enstore://')/$FILE
131  samweb -e sbnd retire-file $FILE
132  done
133  samweb delete-definition $MDPRODUCTIONDEFNAME
134  else
135  echo "SAM Definition $MDPRODUCTIONDEFNAME already present"
136  exit 3
137  fi
138  else
139  echo "Creating SAM Definition $MDPRODUCTIONDEFNAME"
140  fi
141 
142  if [ ! -z "$FLATTEN" ]
143  then
144  FLATMDPRODUCTIONDEFNAME=${MDPRODUCTIONDEFNAME/concat_caf/flat_caf}
145  if [ $(samweb -e sbnd list-definitions | grep "$FLATMDPRODUCTIONDEFNAME") ]
146  then
147  if [ ! -z $CLEAN ]
148  then
149  echo "Deleting SAM Definition $FLATMDPRODUCTIONDEFNAME"
150  for FILE in $(samweb -e sbnd list-definition-files $FLATMDPRODUCTIONDEFNAME);
151  do
152  ifdh rm $(samweb -e sbnd locate-file $FILE | sed 's/enstore://')/$FILE
153  samweb -e sbnd retire-file $FILE
154  done
155  samweb delete-definition $FLATMDPRODUCTIONDEFNAME
156  else
157  echo "SAM Definition $FLATMDPRODUCTIONDEFNAME already present"
158  exit 3
159  fi
160  else
161  echo "Creating SAM Definition $FLATMDPRODUCTIONDEFNAME"
162  fi
163  fi
164 
165  OUTDIR="$OUTDIR/$MDFILETYPE/$MDPRODUCTIONTYPE/$MDPRODUCTIONNAME/$MDSBNDPROJECTNAME/$MDSBNDPROJECTVERSION/$MDSBNDPROJECTSTAGE"
166  WORKDIR="$WORKDIR/$MDFILETYPE/$MDPRODUCTIONTYPE/$MDPRODUCTIONNAME/$MDSBNDPROJECTNAME/$MDSBNDPROJECTVERSION/$MDSBNDPROJECTSTAGE"
167 
168  mkdir -p $OUTDIR
169  mkdir -p $WORKDIR
170 
171  if [ -z "$(ls -A $WORKDIR)" ]
172  then
173  echo "WORKDIR: $WORKDIR"
174  elif [ ! -z "$CLEAN" ]
175  then
176  echo "Deleting WORKDIR: $WORKDIR"
177  ifdh rmdir $WORKDIR --force=srm
178  else
179  echo "Not Empty: $WORKDIR"
180  exit 3
181  fi
182 
183  if [ -z "$(ls -A $OUTDIR)" ]
184  then
185  echo "OUTDIR: $OUTDIR"
186  elif [ ! -z "$CLEAN" ]
187  then
188  echo "Deleting OUTDIR: $OUTDIR"
189  ifdh rmdir $OUTDIR --force=srm
190  else
191  echo "Not Empty: $OUTDIR"
192  exit 3
193  fi
194 }
195 
196 # TODO put this into a proper script and pass arguments rather than rely on global variables
197 # This would allow us to run this in a background process and multithread the process
198 # Or submit a grid job etc.
199 doConcat()
200 {
201  # Create the output
202  CONCATNAME="concat_caf_${SLICENUM}.root"
203  CONCATFILE="$WORKDIR/$CONCATNAME"
204  JSONFILE="${CONCATFILE}.json"
205 
206  echo "Creating $CONCATNAME"
207 
208  # Let ROOT run its magic
209  concat_cafs $SLICEDEFNAME $WORKDIR/$CONCATNAME
210 
211  extractCAFMetadata "$CONCATFILE" > $JSONFILE
212 
213  # Steal the ifdh and sam commands from sbndpoms_genfclwithrunnumber_maker.sh (Thanks Dom)
214  # SAM needs the file to have a unique name
215  ifdh addOutputFile $CONCATFILE
216  ifdh renameOutput unique
217 
218  # Bit annoying but we now need to find the fcl file again as ifdh doesn't tell us what the unique name is
219  if [[ `find $WORKDIR -name "${CONCATNAME%.*}*.root" | wc -l` -ne 1 ]]
220  then
221  echo "Found incorrect number of matching files for pattern: ${CONCATNAME%.*}*.root"
222  find $WORKDIR -name "${CONCATNAME%.*}*.root"
223  echo "Exiting"
224  exit 3
225  else
226  UNIQUEOUTCONCATNAME=`find $WORKDIR -name "${CONCATNAME%.*}*.root"`
227  UNIQUEOUTCONCATNAME=`basename $UNIQUEOUTCONCATNAME`
228  echo "$CONCATNAME renamed to $UNIQUEOUTCONCATNAME"
229  fi
230 
231  # OK so it looks like there is exactly one pattern match, so assume that is the correct one
232  #Copy the file to the output directory (most likely dcache)
233  ifdh copyBackOutput $OUTDIR
234  #Clear up
235  ifdh cleanup
236 
237  sbndpoms_metadata_extractor.sh "$WORKDIR/$UNIQUEOUTCONCATNAME"
238 
239  samweb -e sbnd declare-file $JSONFILE
240  samweb -e sbnd add-file-location ${UNIQUEOUTCONCATNAME} $OUTDIR
241 
242  # Check that we find the output file, this will tell us if SAM actually crea
243  if [[ ! -f "$(samweb -e sbnd locate-file $UNIQUEOUTCONCATNAME | sed 's/enstore://')/$UNIQUEOUTCONCATNAME" ]]
244  then
245  echo "Output file $UNIQUEOUTCONCATNAME not found in outdir $OUTDIR"
246  exit 3
247  fi
248 
249  echo "$UNIQUEOUTCONCATNAME declared and located by SAM"
250 
251  if [ ! -z "$FLATTEN" ]
252  then
253  echo "Flattening $UNIQUEOUTCONCATNAME"
254 
255  FLATNAME="flat_caf_${SLICENUM}.root"
256  FLATFILE="$WORKDIR/$FLATNAME"
257  FLATJSONFILE="${FLATFILE}.json"
258  flatten_caf $OUTDIR/$UNIQUEOUTCONCATNAME $FLATFILE
259 
260  extractCAFMetadata "$FLATFILE" > "$FLATJSONFILE"
261 
262  # Steal the ifdh and sam commands from sbndpoms_genfclwithrunnumber_maker.sh (Thanks Dom)
263  # SAM needs the file to have a unique name
264  ifdh addOutputFile $FLATFILE
265  ifdh renameOutput unique
266 
267  # Bit annoying but we now need to find the fcl file again as ifdh doesn't tell us what the unique name is
268  if [[ `find $WORKDIR -name "${FLATNAME%.*}*.root" | wc -l` -ne 1 ]]
269  then
270  echo "Found incorrect number of matching files for pattern: ${FLATNAME%.*}*.root"
271  find $WORKDIR -name "${FLATNAME%.*}*.root"
272  echo "Exiting"
273  exit 3
274  else
275  UNIQUEOUTFLATNAME=`find $WORKDIR -name "${FLATNAME%.*}*.root"`
276  UNIQUEOUTFLATNAME=`basename $UNIQUEOUTFLATNAME`
277  echo "$FLATNAME renamed to $UNIQUEOUTFLATNAME"
278  fi
279 
280  # OK so it looks like there it exactly one pattern match, so assume that is the correct one
281  #Copy the file to the output directory (most likely dcache)
282  ifdh copyBackOutput $OUTDIR
283  #Clear up
284  ifdh cleanup
285 
286  sbndpoms_metadata_extractor.sh "$WORKDIR/$UNIQUEOUTFLATNAME"
287 
288  samweb -e sbnd declare-file "$FLATJSONFILE"
289  samweb -e sbnd add-file-location ${UNIQUEOUTFLATNAME} $OUTDIR
290 
291  # Check that we find the output file, this will tell us if SAM actually crea
292  if [[ ! -f "$(samweb -e sbnd locate-file $UNIQUEOUTFLATNAME | sed 's/enstore://')/$UNIQUEOUTFLATNAME" ]]
293  then
294  echo "Output file $UNIQUEOUTFLATNAME not found in outdir $OUTDIR"
295  exit 3
296  fi
297 
298  echo "$UNIQUEOUTFLATNAME declared and located by SAM"
299  fi
300 }
301 
302 prepare
303 
304 if [ ! -z $CLEAN ]
305 then
306  exit 0
307 fi
308 
309 declare -i CONCATCOUNT=0
310 declare -i SLICENUM=0
311 declare -i FILECOUNTER=0
312 
313 # Loop over all of the files in the dataset
314 DEFSIZE=$(samweb -e sbnd list-definition-files --summary $DEFNAME | grep "Total size" | tr -dc '0-9')
315 FILECOUNT=$(samweb -e sbnd count-definition-files $DEFNAME)
316 
317 # Work out the size of each file
318 FILESIZE=$(( $DEFSIZE / $FILECOUNT ))
319 # Work out the max number of file to concat together
320 SLICELIMIT=$(( $MAXSIZE / $FILESIZE ))
321 # Work out the number of concats this will result in
322 CONCATCOUNT=$(( $FILECOUNT / $SLICELIMIT ))
323 CONCATCOUNT=$(( CONCATCOUNT + 1 ))
324 # Spread the files evenly between the CONCATS
325 FILELIMIT=$(( $FILECOUNT / $CONCATCOUNT ))
326 FILELIMIT=$(( FILELIMIT + 1 ))
327 
328 echo "DefSize: $DEFSIZE and FileCount: $FILECOUNT"
329 echo "FileSize: $FILESIZE and FileLimit: $FILELIMIT"
330 echo "ConcatCount: $CONCATCOUNT"
331 
332 while [[ $SLICENUM -lt $CONCATCOUNT ]]
333 do
334  FILECOUNTER=$(( $SLICENUM * $FILELIMIT ))
335  echo "Creating Slice: $SLICENUM, starting from file: $FILECOUNTER"
336 
337  SLICEDEFNAME=${DEFNAME}_Slice${SLICENUM}
338  samweb -e sbnd create-definition "$SLICEDEFNAME" "defname: $DEFNAME with limit $FILELIMIT with offset $FILECOUNTER"
339 
340  doConcat
341 
342  SLICENUM+=1
343 done
344 
345 # Create a definition with the output files
346 samweb -e sbnd create-definition $MDPRODUCTIONDEFNAME "file_name like concat_caf_%.root and file_type $MDFILETYPE and production.type $MDPRODUCTIONTYPE and production.name $MDPRODUCTIONNAME and sbnd_project.name $MDSBNDPROJECTNAME and sbnd_project.version $MDSBNDPROJECTVERSION and sbnd_project.stage $MDSBNDPROJECTSTAGE and ischildof: ( defname: $DEFNAME ) and file_format concat_caf"
347 echo "Created Concat SAM definition: $MDPRODUCTIONDEFNAME: $(samweb -e sbnd list-definition-files --summary $MDPRODUCTIONDEFNAME)"
348 
349 if [ ! -z "$FLATTEN" ]
350 then
351  samweb -e sbnd create-definition $FLATMDPRODUCTIONDEFNAME "file_name like flat_caf_%.root and file_type $MDFILETYPE and production.type $MDPRODUCTIONTYPE and production.name $MDPRODUCTIONNAME and sbnd_project.name $MDSBNDPROJECTNAME and sbnd_project.version $MDSBNDPROJECTVERSION and sbnd_project.stage $MDSBNDPROJECTSTAGE and ischildof: ( defname: $DEFNAME ) and file_format flat_caf"
352  echo "Created Flat SAM definition: $FLATMDPRODUCTIONDEFNAME: $(samweb -e sbnd list-definition-files --summary $FLATMDPRODUCTIONDEFNAME)"
353 fi
process_name opflash particleana ie ie ie z
then OUTDIR
then WORKDIR
* file
Definition: file_to_url.sh:69
pdgs p
Definition: selectors.fcl:22
#define the
then source cvmfs dune opensciencegrid org products dune setup_dune sh exit elif[-f/grid/fermiapp/products/dune/setup_dune_fermiapp.sh]
std::size_t size(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:561
shift
Definition: fcl_checks.sh:26
usage
Definition: doGit.sh:21
then echo Sam station was not specified(use option--sam_station)." exit 1 fi if [ x$SAM_GROUP
process_name gaushit a
while getopts h
return match has_match and(match.match_pdg==11 or match.match_pdg==-11)
then shift fi
g grid
esac voms proxy info all source grid fermiapp products common etc setups sh source cvmfs oasis opensciencegrid org fermilab products larsoft setup setup ifdhc echo Here is the your environment in this job
Definition: run_job.sh:29
if &&[-z"$BASH_VERSION"] then echo Attempting to switch to bash bash shellSwitch exit fi &&["$1"= 'shellSwitch'] shift declare a IncludeDirectives for Dir in
do i e
flatten FLATTEN
stream1 can override from command line with o or output services user sbnd
then echo fcl name
echo Invalid option
Definition: TrainMVA.sh:17
size if["$2"] then MAXSIZE
defname if["$2"] then DEFNAME
basic_json<> json
default JSON class
Definition: json.hpp:2445
std::size_t count(Cont const &cont)
esac echo uname r
list
Definition: file_to_url.sh:28