All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
runFilesFromSAM.sh
Go to the documentation of this file.
1 #!/usr/bin/env bash
2 #
3 # Creates file lists from a run number, or SAM definition, or just SAM files.
4 #
5 # Run with `--help` for usage instructions
6 #
7 # Author: Gianluca Petrillo (petrillo@slac.fnal.gov)
8 # Date: March 2021
9 #
10 # Changes:
11 # 20220720 (petrillo@slac.stanford.edu) [1.4]
12 # added --locate option
13 # 20220406 (petrillo@slac.stanford.edu) [1.3]
14 # --max option now only converts that many files
15 # 20220126 (petrillo@slac.stanford.edu) [1.2]
16 # added --stream options
17 # 20210429 (petrillo@slac.stanford.edu) [1.1]
18 # added --max option
19 # 20210411 (petrillo@slac.stanford.edu) [1.0]
20 # first public version
21 #
22 #
23 
24 SCRIPTNAME="$(basename "$0")"
25 SCRIPTVERSION="1.3"
26 
27 declare -r RawType='raw'
28 declare -r DecodeType='decoded'
29 declare -r XRootDschema='root'
30 declare -r LocateSchema='locate'
31 declare -r dCacheLocation='dcache'
32 declare -r TapeLocation='enstore'
33 declare -r BNBstream='bnb'
34 declare -r NuMIstream='numi'
35 declare -r AnyStream=''
36 
37 declare -r DefaultType="$DecodeType"
38 declare -r DefaultSchema="$XRootDschema"
39 declare -r DefaultLocation="$dCacheLocation"
40 declare -r DefaultDecoderStageName="stage0" # used to be 'decoder' up to a certain time
41 declare -r DefaultStream="$AnyStream"
42 
43 declare -r DefaultOutputPattern="%TYPE%-run%RUN%%DASHSTREAM%%DASHSCHEMA%%DASHLOCATION%%DASHLIMIT%.filelist"
44 
45 # ------------------------------------------------------------------------------
46 function isFlagSet() {
47  local VarName="$1"
48  [[ -n "${!VarName//0}" ]]
49 } # isFlagSet()
50 
51 function isDebugging() {
52  local -i Level="${1:-1}"
53  [[ "$DEBUG" -ge "$Level" ]]
54 } # isDebugging()
55 
56 function DBGN() {
57  local -i Level="$1"
58  shift
59  isDebugging "$Level" && STDERR "DBG[${Level}]| $*" ;
60 }
61 function DBG() { DBGN 1 "$*" ; }
62 
63 function STDERR() { echo "$*" >&2 ; }
64 
65 function ERROR() { STDERR "ERROR: $*" ; }
66 function WARN() { STDERR "WARNING: $*" ; }
67 function INFO() { isFlagSet DoQuiet || STDERR "$*" ; }
68 
69 function FATAL() {
70  local -i Code=$1
71  shift
72  STDERR "FATAL(${Code}): $*"
73  exit $Code
74 } # FATAL()
75 
76 function LASTFATAL() {
77  local -i res=$?
78  [[ $res == 0 ]] || FATAL "$res" "$@"
79 } # LASTFATAL()
80 
81 
82 function PrintHelp() {
83 
84  cat <<EOH
85 Queries SAM for all the files in a run and translates each file into a URL.
86 
87 Usage: ${SCRIPTNAME} [options] Spec [Spec ...] > run.filelist
88 
89 Each specification may be a run number, a SAM definition (preceded by \`@\`,
90 e.g. \`@icarus_run005300_raw\`) or a file name.
91 In all cases, run, definition or file, they must be known to SAM.
92 
93 Options:
94 --type=<${RawType}|${DecodeType}|...> [${DefaultType}]
95 --decode , --decoded , -D
96 --raw , -R
97 --stage=STAGE
98  select the type of files to query (raw from DAQ, decoded...);
99  if the stage is explicitly selected, it is used as constraint in SAM query
100 --schema=<${XRootDschema}|...> [${DefaultSchema}]
101 --xrootd , --root , -X
102 --locate
103  select the type of URL (XRootD, ...); the option \`--locate\` and the
104  special schema value <${LocateSchema}> will cause the query to be done via
105  SAM \`locate-file\` command instead of the default \`get-file-access-url\`
106 --location=<${dCacheLocation}|${TapeLocation}> [${DefaultLocation}]
107 --tape , --enstore , -T
108 --disk , --dcache , -C
109  select the storage type (no effect for \`locate\` schema)
110 --stream=<${BNBstream}|${NuMIstream}|...> [${DefaultStream}]
111 --bnb
112 --numi
113 --allstreams
114  select the stream (if empty, all streams are included)
115 
116 
117 --output=OUTPUTFILE
118  use OUTPUTFILE for all output file lists
119 --outputdir=OUTPUTDIR
120  prepend all output filelist paths with this directory
121 -O
122  create a list per run, with the pattern specified by \`--outputpattern\`:
123 --outputpattern=PATTERN [${DefaultOutputPattern}]
124  use PATTERN for the standard output file list (see option \`-O\` above);
125  the following tags in PATTERN are replaced: \`%RUN%\` by the run number;
126  \`%STREAM%\` by the name of the data stream/beam;
127  \`%SCHEMA%\` by the URL schema; \`%LOCATION%\` by the storage location;
128  \`%TYPE%\` by the file content type; \`%LIMIT%\` by the number of requested
129  entry, only if \`--max\` option is specified;
130  and all tags prepended by \`DASH\` (e.g. \`%DASHTYPE%\`) are replaced
131  by a dash (\`-\`) and the value of the tag only if the content of that tag
132  is not empty
133 --max=LIMIT
134  retrieves only the first LIMIT files from SAM (only when querying run numbers);
135  in all cases, it translates only LIMIT files into a location; 0 means no limit
136 --experiment=NAME
137  experiment name (and SAM station) passed to SAM
138 --quiet , -q
139  do not print non-fatal information on screen while running
140 --debug[=LEVEL]
141  enable debugging printout (optionally with the specified verbosity LEVEL)
142 --version, -V
143 --help , -h , -?
144  print this help message
145 
146 EOH
147 
148 } # PrintHelp()
149 
150 
151 function PrintVersion() {
152  cat <<EOV
153 
154 ${SCRIPTNAME} version ${SCRIPTVERSION}
155 
156 EOV
157 } # PrintVersion()
158 
159 
160 # ------------------------------------------------------------------------------
161 function AddPathToList() {
162 
163  local Path="$1"
164 
165  DBGN 2 "Adding: '${Path}'"
166  if [[ -n "$OutputFile" ]]; then
167  echo "$Path" >> "$OutputFile"
168  else
169  echo "$Path"
170  fi
171 
172 } # AddPathToList()
173 
174 
175 declare -a TempFiles
176 function DeclareTempFile() {
177  local Run="$1"
178 
179  local TemplateName="${SCRIPTNAME%.sh}-run${Run}.tmp.XXXXXX"
180  local TempFile
181  TempFile="$(mktemp --tmpdir "$TemplateName")"
182  LASTFATAL "Failed to create a temporary file (after '${TemplateName}') for run ${Run}."
183  TempFiles+=( "$TempFile" )
184 
185  echo "$TempFile"
186 
187 } # DeclareTempFile()
188 
189 function Cleanup() {
190  rm -f "${TempFiles[@]}"
191 } # Cleanup()
192 
193 
194 function isRunNumber() { [[ "$1" =~ ^[[:digit:]]+$ ]]; }
195 function isSAMdefName() { [[ "${1:0:1}" == '@' ]]; }
196 function isSAMfile() { [[ "${1: -5}" == '.root' ]]; }
197 function SpecType() {
198  local Spec="$1"
199  local -Ar Tests=(
200  ['run']='isRunNumber'
201  ['SAMdef']='isSAMdefName'
202  ['SAMfile']='isSAMfile'
203  )
204  local Type
205  for Type in "${!Tests[@]}" ; do
206  "${Tests[$Type]}" "$Spec" || continue
207  echo "$Type"
208  return 0
209  done
210  return 1
211 } # SpecType()
212 
213 
214 function BuildOutputFilePath() {
215  local Pattern="$1"
216  local Run="$2"
217 
218  local OutputName="$Pattern"
219 
220  local -A Replacements
221  local VarName VarValue
222  for VarName in Run Stream Type Schema Location Limit ; do
223  VarValue="${!VarName}"
224  Replacements["${VarName^^}"]="$VarValue"
225  Replacements["DASH${VarName^^}"]="${VarValue:+"-${VarValue}"}"
226  done
227  local TagName
228  for TagName in "${!Replacements[@]}" ; do
229  OutputName="${OutputName//%${TagName}%/${Replacements["$TagName"]}}"
230  done
231 
232  echo "${OutputDir:+"${OutputDir%/}/"}${OutputName}"
233 } # BuildOutputFilePath()
234 
235 
236 # ------------------------------------------------------------------------------
237 function RunSAM() {
238  local -a Cmd=( 'samweb' ${Experiment:+--experiment="$Experiment"} "$@" )
239 
240  DBG "${Cmd[@]}"
241  "${Cmd[@]}"
242 
243 } # RunSAM()
244 
245 
246 # ------------------------------------------------------------------------------
247 function getFileAccessURL() {
248  local FileName="$1"
249  local Schema="$2"
250  local Location="$3"
251 
252  RunSAM get-file-access-url ${Schema:+"--schema=${Schema}"} ${Location:+"--location=${Location}"} "$FileName"
253 
254 } # getFileAccessURL()
255 
256 
257 # ------------------------------------------------------------------------------
258 function locateFile() {
259  local FileName="$1"
260 
261  RunSAM locate-file "$FileName"
262 
263 } # locateFile()
264 
265 
266 # ------------------------------------------------------------------------------
267 declare -a Specs
268 declare -i UseDefaultOutputFile=0 DoQuiet=0
269 declare OutputFile Experiment
270 declare Type="$DefaultType"
271 declare Schema="$DefaultSchema"
272 declare Location="$DefaultLocation"
273 declare Stream="$DefaultStream"
274 declare OutputPattern="$DefaultOutputPattern"
275 declare EntryLimit=0 # 0 = no limits'
276 declare -i iParam
277 for (( iParam=1 ; iParam <= $# ; ++iParam )); do
278  Param="${!iParam}"
279  if [[ "${Param:0:1}" == '-' ]]; then
280  case "$Param" in
281  ( '--raw' | '-R' ) Type="$RawType" ;;
282  ( '--decoded' | '--decode' | '-D' ) Type="$DecodeType" ;;
283  ( '--type='* ) Type="${Param#--*=}" ;;
284  ( '--stage='* ) Type="${Param#--*=}" ;;
285 
286  ( '--schema='* | '--scheme='* ) Schema="${Param#--*=}" ;;
287  ( '--xrootd' | '--XRootD' | '--root' | '--ROOT' | '-X' ) Schema="$XRootDschema" ;;
288  ( '--locate' ) Schema="$LocateSchema" ;;
289 
290  ( '--loc='* | '--location='* ) Location="${Param#--*=}" ;;
291  ( '--dcache' | '--dCache' | '-C' ) Location="$dCacheLocation" ;;
292  ( '--tape' | '--enstore' | '-T' ) Location="$TapeLocation" ;;
293 
294 
295  ( '--stream='* ) Stream="${Param#--*=}" ;;
296  ( '--bnb' | '--BNB' ) Stream="$BNBstream" ;;
297  ( '--numi' | '--NuMI' | '--NUMI' ) Stream="$NuMIstream" ;;
298  ( '--allstreams' ) Stream="$AnyStream" ;;
299 
300  ( "--output="* ) OutputFile="${Param#--*=}" ;;
301  ( "--outputpattern="* ) OutputPattern="${Param#--*=}" ;;
302  ( "--outputdir="* ) OutputDir="${Param#--*=}" ;;
303  ( "-O" ) UseDefaultOutputFile=1 ;;
304  ( "--max="* | "--limit="* ) EntryLimit="${Param#--*=}" ;;
305  ( "--experiment="* ) Experiment="${Param#--*=}" ;;
306 
307  ( '--debug' ) DEBUG=1 ;;
308  ( '--debug='* ) DEBUG="${Param#--*=}" ;;
309  ( '--quiet' | '-q' ) DoQuiet=1 ;;
310  ( '--version' | '-V' ) DoVersion=1 ;;
311  ( '--help' | '-h' | '-?' ) DoHelp=1 ;;
312 
313  ( * )
314  PrintHelp
315  echo
316  FATAL 1 "Invalid option #${iParam} ('${Param}')."
317  ;;
318  esac
319  else
320  Specs+=( "$Param" )
321  LASTFATAL "Parameter #${iParam} ('${Param}') is not a valid (run) number."
322  fi
323 done
324 
325 if isFlagSet UseDefaultOutputFile && [[ -n "$OutputFile" ]]; then
326  FATAL 1 "Options \`-O\` and \`--outputfile\` (value: '${OutputFile}') are exclusive."
327 fi
328 
329 if isFlagSet DoVersion ; then
330  PrintVersion
331  [[ "${ExitWithCode:-0}" -gt 0 ]] || ExitWithCode=0
332 fi
333 
334 if isFlagSet DoHelp ; then
335  PrintHelp
336  [[ "${ExitWithCode:-0}" -gt 0 ]] || ExitWithCode=0
337 fi
338 
339 [[ -n "$ExitWithCode" ]] && exit "$ExitWithCode"
340 
341 
342 trap Cleanup EXIT
343 
344 [[ "$EntryLimit" -gt 0 ]] && Limit="max${EntryLimit}"
345 
346 declare Constraints=''
347 case "${Type,,}" in
348  ( 'raw' ) Constraints+=" and data_tier=raw" ;;
349  ( * )
350  if [[ "${Type,,}" == 'decoded' ]]; then
351  Stage="$DefaultDecoderStageName"
352  else
353  Stage="$Type"
354  fi
355  Constraints+=" and icarus_project.stage=${Stage}"
356  ;;
357 # echo "Type '${Type}' not supported!" >&2
358 # exit 1
359 esac
360 [[ -n "$Stream" ]] && Constraints+=" and sbn_dm.beam_type=${Stream}"
361 [[ "$EntryLimit" -gt 0 ]] && Constraints+=" with limit ${EntryLimit}"
362 
363 
364 declare -i nErrors=0
365 
366 declare Spec
367 [[ -n "$OutputDir" ]] && mkdir -p "$OutputDir"
368 if [[ -n "$OutputFile" ]]; then
369  [[ "${OutputFile:0:1}" == '/' ]] || OutputFile="${OutputDir%/}/${OutputFile}"
370  rm -f "$OutputFile"
371 fi
372 for Spec in "${Specs[@]}" ; do
373 
374  declare FileList="$(DeclareTempFile "$Spec")"
375  if isFlagSet UseDefaultOutputFile ; then
376  OutputFile="$(BuildOutputFilePath "$OutputPattern" "$Spec")"
377  rm -f "$OutputFile"
378  fi
379 
380  unset Run SAMdefName
381  case "$(SpecType "$Spec")" in
382  ( 'run' )
383  Run="$Spec"
384  JobTag="Run ${Run}"
385  declare Query="run_number=${Run}${Constraints}"
386  RunSAM list-files "$Query" > "$FileList"
387  LASTFATAL "getting run ${Run} file list (SAM query: '${Query}')."
388  ;;
389  ( 'SAMdef' )
390  SAMdefName="${Spec:1}"
391  JobTag="SAM definition ${SAMdefName}"
392  RunSAM list-definition-files "$SAMdefName" > "$FileList"
393  LASTFATAL "getting ${JobTag} file list."
394  ;;
395  ( 'SAMfile' )
396  FileName="$Spec"
397  JobTag="SAM-declared file '${FileName}'"
398  echo "$FileName" > "$FileList"
399  ;;
400  ( * )
401  ERROR "Unknown type for specification '${Spec}'."
402  let ++nErrors
403  ;;
404  esac
405 
406  declare -i nFiles="$(wc -l "$FileList" | cut -d' ' -f1)"
407  declare FileName
408  declare -i iFile=0
409  declare -a FileURL
410  INFO "${JobTag}: ${nFiles} files${OutputFile:+" => '${OutputFile}'"}"
411  while read FileName ; do
412  [[ "$EntryLimit" -gt 0 ]] && [[ $iFile -ge "$EntryLimit" ]] && INFO "Limit of ${EntryLimit} reached." && break
413  INFO "[$((++iFile))/${nFiles}] '${FileName}'"
414  case "$Schema" in
415  ( "$LocateSchema" )
416  FileURL=( $(locateFile "$FileName" ) )
417  ;;
418  ( * )
419  FileURL=( $(getFileAccessURL "$FileName" "$Schema" "$Location" ) )
420  ;;
421  esac
422  LASTFATAL "getting file '${FileName}' location from SAM."
423  [[ "${#FileURL[@]}" == 0 ]] && FATAL 2 "failed getting file '${FileName}' location from SAM."
424  [[ "${#FileURL[@]}" -gt 1 ]] && WARN "File '${FileName}' matched ${#FileURL[@]} locations (only the first one included):$(printf -- "\n- '%s'" "${FileURL[@]}")"
425  AddPathToList "${FileURL[0]}"
426  done < "$FileList"
427 
428 done
429 
430 [[ $nErrors -gt 0 ]] && FATAL 1 "${nErrors} error(s) accumulated while processing."
431 
432 exit 0
xrootd XRootD root ROOT X Schema
Experiment
Definition: Experiment.hh:13
finds tracks best matching by with limits
then echo FATAL
function LASTFATAL()
Definition: utilities.sh:25
then echo ERROR
Definition: grid_setup.sh:42
esac done echo Signal files are
Definition: TrainMVA.sh:25
* file
Definition: file_to_url.sh:69
#define the
do one_file $F done echo for F in find $TOP name CMakeLists txt print
SCRIPTNAME
Definition: publish.sh:21
function isFlagSet()
Definition: utilities.sh:7
then echo echo For and will not be changed by echo further linking echo echo B echo The symbol is in the uninitialized data multiple common symbols may appear with the echo same name If the symbol is defined the common echo symbols are treated as undefined references For more echo details on common see the discussion of warn common echo in *Note Linker see the discussion of warn common echo in *Note Linker such as a global int variable echo as opposed to a large global array echo echo I echo The symbol is an indirect reference to another symbol This echo is a GNU extension to the a out object file format which is echo rarely used echo echo N echo The symbol is a debugging symbol echo echo R echo The symbol is in a read only data section echo echo S echo The symbol is in an uninitialized data section for small echo objects echo echo T echo The symbol is in the the normal defined echo symbol is used with no error When a weak undefined symbol echo is linked and the symbol is not the value of the echo weak symbol becomes zero with no error echo echo W echo The symbol is a weak symbol that has not been specifically echo tagged as a weak object symbol When a weak defined symbol echo is linked with a normal defined the normal defined echo symbol is used with no error When a weak undefined symbol echo is linked and the symbol is not the value of the echo weak symbol becomes zero with no error echo echo echo The symbol is a stabs symbol in an a out object file In echo this the next values printed are the stabs other echo the stabs desc and the stab type Stabs symbols are echo used to hold debugging information For more echo see *Note or object file format specific echo echo For Mac OS X
function STDERR()
Definition: utilities.sh:17
shift
Definition: fcl_checks.sh:26
dcache dCache C Location
then echo Sam station was not specified(use option--sam_station)." exit 1 fi if [ x$SAM_GROUP
process_name gaushit a
while getopts h
then local
void decode(std::any const &src, Interval< Args...> &iv)
Decodes an interval.
BEGIN_PROLOG V
BEGIN_PROLOG dataFFTHistosEW root
BEGIN_PROLOG gatesFromTracksW gatesFromTracksE streams
return match has_match and(match.match_pdg==11 or match.match_pdg==-11)
BEGIN_PROLOG FileName
Definition: filemuons.fcl:10
then echo echo For and will not be changed by echo further linking echo echo B echo The symbol is in the uninitialized data multiple common symbols may appear with the echo same name If the symbol is defined the common echo symbols are treated as undefined references For more echo details on common see the discussion of warn common echo in *Note Linker see the discussion of warn common echo in *Note Linker such as a global int variable echo as opposed to a large global array echo echo I echo The symbol is an indirect reference to another symbol This echo is a GNU extension to the a out object file format which is echo rarely used echo echo N echo The symbol is a debugging symbol echo echo R echo The symbol is in a read only data section echo echo S echo The symbol is in an uninitialized data section for small echo objects echo echo T echo The symbol is in the the normal defined echo symbol is used with no error When a weak undefined symbol echo is linked and the symbol is not the value of the echo weak symbol becomes zero with no error echo echo W echo The symbol is a weak symbol that has not been specifically echo tagged as a weak object symbol When a weak defined symbol echo is linked with a normal defined the normal defined echo symbol is used with no error When a weak undefined symbol echo is linked and the symbol is not the value of the echo weak symbol becomes zero with no error echo echo echo The symbol is a stabs symbol in an a out object file In echo this the next values printed are the stabs other echo the stabs desc and the stab type Stabs symbols are echo used to hold debugging information For more information
then shift fi
then echo Work directory not specified exit fi echo Work directory
if &&[-z"$BASH_VERSION"] then echo Attempting to switch to bash bash shellSwitch exit fi &&["$1"= 'shellSwitch'] shift declare a IncludeDirectives for Dir in
then echo File list $list not found else cat $list while read file do echo $file sed s
Definition: file_to_url.sh:60
decoded decode D Type
BEGIN_PROLOG sequence::SlidingWindowTriggerPatternsOppositeWindows END_PROLOG simSlidingORM6O6 effSlidingORW output
then echo echo For and will not be changed by echo further linking echo echo B echo The symbol is in the uninitialized data multiple common symbols may appear with the echo same name If the symbol is defined the common echo symbols are treated as undefined references For more echo details on common see the discussion of warn common echo in *Note Linker options
do i e
then echo fcl name
echo Invalid option
Definition: TrainMVA.sh:17
temporary value
BEGIN_PROLOG triggeremu_data_config_icarus settings sequence::triggeremu_data_config_icarus settings PMTADCthresholds sequence::triggeremu_data_config_icarus settings PMTADCthresholds sequence::triggeremu_data_config_icarus settings PMTADCthresholds Pattern
float A
Definition: dedx.py:137
bool empty(FixedBins< T, C > const &) noexcept
Definition: FixedBins.h:555
esac echo uname r
do Param
list
Definition: file_to_url.sh:28
bnb BNB Stream
experiment
Definition: ffttest.sh:3