(file) Return to dorebinsmooth CVS log (file) (dir) Up to [Development] / JSOC / proj / globalhs / scripts

File: [Development] / JSOC / proj / globalhs / scripts / dorebinsmooth (download)
Revision: 1.3, Wed Jun 18 08:09:13 2014 UTC (8 years, 11 months ago) by tplarson
Branch: MAIN
CVS Tags: Ver_8-5
Changes since 1.2: +1 -1 lines
force use of jsoc/cvs/Development executables

#!/bin/tcsh -f

setenv JSOCROOT /home/jsoc/cvs/Development/JSOC
setenv PATH $JSOCROOT/bin/$JSOC_MACHINE':'$JSOCROOT/proj/globalhs/scripts':'$PATH

# number of jobs will be ntimechunks = (totalsecs / chunksecs) rounded up

set qsubtmp=/tmp27/$USER/qsubtmp
mkdir -p $qsubtmp

@ i = 1
while ($i <= $#argv )
  set $argv[$i]
@ i++
end

set suff=''
if ($?label) then
  set suff=.$label
endif
set q=j.q
if ($?queue) then
  set q=$queue
endif
if (! $?jobthreshold) set jobthreshold=60

if ($q == a.q) then
  alias qsub qsub2
  alias qstat qstat2
  alias waittosubmit waittosubmit2
endif

if (! $?starttime) then
  echo must specify parameter starttime
  exit 1
endif

if (! $?totaltime) then
  echo must specify parameter totaltime
  exit 1
endif

set totalsecs = `durcon $totaltime`
if ($status) then
  echo incorrect format for parameter totaltime
  exit 1
endif

if (! $?timechunk) set timechunk = 1d
set chunksecs = `durcon $timechunk`
if ($status) then
  echo incorrect format for parameter timechunk
  exit 1
endif

set ntimechunks = `echo "$totalsecs / $chunksecs" | bc`

date
echo $PWD
echo $0 $argv

if (! -es rbs.parms.blank) then
  echo parameter file blank missing: rbs.parms.blank is required
  exit 1
endif

grep -v "^[[:space:]]*#" rbs.parms.blank | sed s@"[[:space:]]"@"\n"@g | grep -v '^[[:space:]]*$'  > parms.tmp

set in = `cat parms.tmp | grep ^in= | sed s/\'//g | sed s/\"//g | awk -F '[=[]' '{print $2}'`
set epoch = `show_info -j $in | grep -i '^keyword:t_rec_epoch' | cut -d, -f5`
set epochsecs = `time_convert o=jsoc time=$epoch`
set trecstep = `show_info -j $in | grep -i '^keyword:t_rec_step' | cut -d, -f5`
set indexchunk = `echo "$chunksecs / $trecstep" | bc`

durcon $starttime >& /dev/null
if ($status) then
  set startsecs = `time_convert o=jsoc time=$starttime`
else
  set startsecs = `durcon $starttime` 
  set startsecs = `echo "$startsecs + $epochsecs" | bc`
endif

set firstindex = `echo "($startsecs - $epochsecs)/$trecstep" | bc`
set nextfirstindex = `echo "($startsecs + $totalsecs - $epochsecs)/$trecstep" | bc`

set i=0
while ($i < $ntimechunks)
@ index = $firstindex + $i * $indexchunk
  set isecs = `echo "$startsecs + ($i * $chunksecs)" | bc`
  set istart = `time_convert o=cal zone=tai s=$isecs`
  set subfile = subrbs.$firstindex.$index$suff
  echo '#\!/bin/csh' > $subfile
  echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
  echo 'cd' $PWD >> $subfile
  set interval = '#'$index'/'$indexchunk
  cat parms.tmp | sed s@XXXX@$interval@g > rbs.parms.$index
  echo \(time jrebinsmooth @rbs.parms.$index\) '>&' rbs.log.$index >> $subfile
  echo echo \$status '>&' jrebinsmooth.exitstatus.$index >> $subfile
@ i++
end

@ index += $indexchunk
if ($index < $nextfirstindex) then
@ indexchunk = $nextfirstindex - $index
  set isecs = `echo "$startsecs + ($i * $chunksecs)" | bc`
  set istart = `time_convert o=cal zone=tai s=$isecs`
  set chunksecs = `echo "$indexchunk * $trecstep" | bc`
  set subfile = subrbs.$firstindex.$index$suff
  echo '#\!/bin/csh' > $subfile
  echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
  echo 'cd' $PWD >> $subfile
  set interval = '#'$index'/'$indexchunk
  cat parms.tmp | sed s@XXXX@$interval@g > rbs.parms.$index
  echo \(time jrebinsmooth @rbs.parms.$index\) '>&' rbs.log.$index >> $subfile
  echo echo \$status '>&' jrebinsmooth.exitstatus.$index >> $subfile
@ ntimechunks++
  set chunksecs = `durcon $timechunk`
  set indexchunk = `echo "$chunksecs / $trecstep" | bc`
endif

echo $ntimechunks job scripts created

waittosubmit $jobthreshold subrbs
set i=0
while ($i < $ntimechunks)
@ index = $firstindex + $i * $indexchunk
  set subfile = subrbs.$firstindex.$index$suff
  qsub -q $q -e $qsubtmp -o $qsubtmp $subfile
@ i++
end

echo jobs submitted, start waiting

set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subrbs.$firstindex | grep "$suff" | wc -l`
while($njobsrunning > 0)
  sleep 60
  set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subrbs.$firstindex | grep "$suff" | wc -l`
end

#check for errors here
set expectedlist = `/bin/ls rbs.parms.[0-9]* | cut -d'.' -f 3`
set ranlist   = `/bin/ls rbs.log.* | cut -d'.' -f 3`
set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u`

set errlist1 = `grep -Hv 0 jrebinsmooth.exitstatus.* | awk -F '[.:]' '{print $3}'`
set errlist2 = `grep -Hc "successful completion" rbs.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`
set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq`

if ($#rerunlist || $#errlist) then
  echo some jobs fail, rerunning
  echo rerunlist: $rerunlist > faillog
  echo errlist1: $errlist1 >> faillog
  echo errlist2: $errlist2 >> faillog
  waittosubmit $jobthreshold subrbs
  foreach ind ($errlist $rerunlist)
    qsub -q $q -e $qsubtmp -o $qsubtmp subrbs.$firstindex.$ind$suff
  end

  set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subrbs.$firstindex | grep "$suff" | wc -l`
  while($njobsrunning > 0)
    sleep 60
    set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subrbs.$firstindex | grep "$suff" | wc -l`
  end

  set ranlist   = `/bin/ls rbs.log.* | cut -d'.' -f 3`
  set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u`
  set errlist1 = `grep -Hv 0 jrebinsmooth.exitstatus.* | awk -F '[.:]' '{print $3}'`
  set errlist2 = `grep -Hc "successful completion" rbs.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`

  if ($#errlist1 || $#errlist2 || $#rerunlist) then
    echo some jobs still fail, i give up
    exit 1
  endif
endif

echo successful completion

rm parms.tmp

exit 0

Karen Tian
Powered by
ViewCVS 0.9.4