(file) Return to dorepeatpow CVS log (file) (dir) Up to [Development] / JSOC / proj / globalhs / scripts

File: [Development] / JSOC / proj / globalhs / scripts / dorepeatpow (download)
Revision: 1.5, Mon Apr 16 18:10:13 2018 UTC (5 years, 5 months ago) by baldner
Branch: MAIN
CVS Tags: Ver_9-3, Ver_9-2
Changes since 1.4: +3 -1 lines
Added JSOC_DBUSER variable to qsub scripts.

#!/bin/tcsh -f

# this script is a hybrid of doglobalhs, dogapfill and dopow
# its purpose is to repeat the generation of power spectra (or other outputs)
# without repeating the detrending and gapfilling

setenv JSOCROOT /home/jsoc/cvs/Development/JSOC
setenv PATH $JSOCROOT/bin/$JSOC_MACHINE':'$JSOCROOT/proj/globalhs/scripts':'$PATH

checkglobalhsargs $argv 
if ($status) then
  echo parameter check fails
  exit 1 
endif

@ i = 1
while ($i <= $#argv )
  set $argv[$i]
@ i++
end

if (! $?starttime) then
  echo must specify parameter starttime
  exit 1
endif

if (! $?totaltime) then
  echo must specify parameter totaltime
  exit 1
endif

set totalsecs = `durcon $totaltime`
if ($status) then
  echo incorrect format for parameter totaltime
  exit 1
endif

if (! $?label) then
  echo must specify parameter label
  exit 1
endif
set suff=.$label

if (! $?lmin) set lmin = 0
if (! $?lmax) set lmax = 300
if (! $?lchunk) set lchunk = 10

# submit njobs jobs at once
if (! $?pownjobs) then
  set njobs=20
else
  set njobs=$pownjobs
endif

# submit next batch of jobs when total number of gapfill jobs drops below jobthreshold
if (! $?powjobthreshold) then
  set jobthreshold=25
else
  set jobthreshold=$powjobthreshold
endif

set qsubtmp=/tmp29/$USER/qsubtmp
mkdir -p $qsubtmp
if (! $?powqueue) then
  set q=j.q
else
  set q=$powqueue
endif

if ($q == k.q) then
  alias qsub qsub2
  alias qstat qstat2
  alias waittosubmit waittosubmit2
endif

if (! $?topdir) set topdir = /tmp29
set workdir = /$topdir/$USER/globalhs_work/$label/$starttime'_'$totaltime/$lmin-$lmax

echo `date` on $HOST
echo $PWD
echo $0 $argv

mkdir -p $workdir/repeatpow
cd $workdir/repeatpow
cp $JSOCROOT/proj/globalhs/scripts/parmtemplates/tsf.parms.$label tsf.parms.blank

if (! -es tsf.parms.blank) then
  echo parameter file blank missing: tsf.parms.blank is required
  exit 1
endif

set logfile=repeatpowlog
echo `date` on $HOST >> $logfile
echo $PWD >> $logfile
echo $0 $argv >> $logfile

grep "^[[:space:]]*##" tsf.parms.blank | awk -F '##' '{print $2}' > script.parms
set vars = `cat script.parms`
@ i = 1
while ($i <= $#vars )
  set $vars[$i]
@ i++
end

grep -v "^[[:space:]]*#" tsf.parms.blank | sed s@"[[:space:]]"@"\n"@g | grep -v '^[[:space:]]*$'  > parms.tmp0
set in = `cat parms.tmp0 | grep ^in= | sed s/\'//g | sed s/\"//g | awk -F '[=[]' '{print $2}'`
set gapfile = `cat parms.tmp0 | grep ^GAPFILE= | sed s/\'//g | sed s/\"//g | awk -F '[=[]' '{print $2}'`
set tsout = `cat parms.tmp0 | grep ^tsout | cut -d= -f2`
cat parms.tmp0 | grep -v ^tsout | sed s/$in/$tsout/g | sed s/$gapfile/$gapsmade/g | grep -v SECTIONFILE | grep -v DETREND | grep -v IFILL > parms.tmp
echo CDETREND=0.0 >> parms.tmp
echo IFILL=0 >> parms.tmp
rm parms.tmp0

#set in = `cat parms.tmp | grep ^in= | sed s/\'//g | sed s/\"//g | awk -F '[=[]' '{print $2}'`
set epoch = `show_info -j $in | grep -i '^keyword:t_start_epoch' | cut -d, -f5`
set epochsecs = `time_convert o=jsoc time=$epoch`
set tstartstep = `show_info -j $in | grep -i '^keyword:t_start_step' | cut -d, -f5`
set cadence = `show_info -j $in | grep -i '^keyword:t_step' | cut -d, -f5`
set ndt   = `echo "$totalsecs / $cadence" | bc`

durcon $starttime >& /dev/null
if ($status) then
  set startsecs = `time_convert o=jsoc time=$starttime`
  set firstday  = `echo "($startsecs - $epochsecs)/86400" | bc` 
else
  set startsecs = `durcon $starttime` 
  set firstday = `echo "$startsecs / 86400" | bc`
  set startsecs = `echo "$startsecs + $epochsecs" | bc`
endif

set firstindex = `echo "($startsecs - $epochsecs)/$tstartstep" | bc`
@ lchunkfirst = $lmin / $lchunk
@ lchunklast  = $lmax / $lchunk
@ nlc = ($lchunklast - $lchunkfirst) + 1

@ lc = $lchunkfirst
while ($lc <= $lchunklast)
@ lfirst = $lc * $lchunk
@ llast  = $lfirst + $lchunk - 1
  if ($lfirst < $lmin) set lfirst = $lmin
  if ($llast  > $lmax) set llast  = $lmax

@ r = ( $lc - $lchunkfirst ) % $njobs
  set subfile = subpow.$firstindex.$lmin-$lmax.$r$suff
  if ( ( $lc - $lchunkfirst ) < $njobs) then
    echo '#\!/bin/csh' > $subfile
    echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
    echo 'setenv JSOC_DBUSER' $JSOC_DBUSER >> $subfile
    echo 'cd' $PWD >> $subfile
  endif

  cat parms.tmp | sed s@XXXX@$starttime@g | sed s/MMMM/$llast/g | sed s/NNNN/$lfirst/g | sed s/TTTT/$ndt/g > tsf.parms.$lfirst-$llast
  echo \(time jtsfiddle @tsf.parms.$lfirst-$llast\) '>&' tsf.log.$lfirst-$llast >> $subfile
  echo echo \$status '>&' jtsfiddle.exitstatus.$lfirst-$llast >> $subfile

@ lc++
end

waittosubmit $jobthreshold subpow >>& $logfile
set i=0
if ($nlc < $njobs) set njobs=$nlc
while ($i < $njobs)
  set subfile = subpow.$firstindex.$lmin-$lmax.$i$suff
  qsub -q $q -e $qsubtmp -o $qsubtmp $subfile >>& $logfile
@ i++
end

echo jobs submitted, start waiting >> $logfile

set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subpow.$firstindex | grep $lmin-$lmax | grep "$suff" | wc -l`
while($njobsrunning > 0)
  sleep 60
  set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subpow.$firstindex | grep $lmin-$lmax | grep "$suff" | wc -l`
end

#check for errors here
set expectedlist = `/bin/ls tsf.parms.[0-9]* | cut -d'.' -f3`
set ranlist      = `/bin/ls tsf.log.* | cut -d'.' -f3`
set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort | uniq -u`

set errlist1 = `grep -Hv 0 jtsfiddle.exitstatus.* | awk -F '[.:]' '{print $3}'`
set errlist2 = `grep -c "successful completion" tsf.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`
set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort | uniq`
set list = `echo $rerunlist $errlist | sed s/" "/"\n"/g | sort`

if ($#list) then
  echo some jobs fail, rerunning >> $logfile
  echo rerunlist: $rerunlist > faillog
  echo errlist1: $errlist1 >> faillog
  echo errlist2: $errlist2 >> faillog
  mkdir -p subsave
  mv subpow.* subsave
  set i=0
  while ($i < $#list)
  @ r = $i % $njobs
    set subfile = subpow.$firstindex.$lmin-$lmax.$r$suff
    if ( $i < $njobs) then
      echo '#\!/bin/csh' > $subfile
      echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
      echo 'setenv JSOC_DBUSER' $JSOC_DBUSER >> $subfile
      echo 'cd' $PWD >> $subfile
    endif
  @ i++
    echo \(time jtsfiddle @tsf.parms.$list[$i]\) '>&' tsf.log.$list[$i] >> $subfile
    echo echo \$status '>&' jtsfiddle.exitstatus.$list[$i] >> $subfile
  end

  waittosubmit $jobthreshold subpow >>& $logfile
  set i=0
  if ($#list < $njobs) set njobs = $#list
  while ($i < $njobs)
    set subfile = subpow.$firstindex.$lmin-$lmax.$i$suff
    qsub -q $q -e $qsubtmp -o $qsubtmp $subfile >>& $logfile
  @ i++
  end

  set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subpow.$firstindex | grep $lmin-$lmax | grep "$suff" | wc -l`
  while($njobsrunning > 0)
    sleep 60
    set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subpow.$firstindex | grep $lmin-$lmax | grep "$suff" | wc -l`
  end

  set ranlist   = `/bin/ls tsf.log.*   | cut -d'.' -f 3`
  set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort | uniq -u`
  set errlist1  = `grep -Hv 0 jtsfiddle.exitstatus.* | awk -F '[.:]' '{print $3}'`
  set errlist2  = `grep -c "successful completion" tsf.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`

  if ($#errlist1 || $#errlist2 || $#rerunlist) then
    echo some jobs still fail, i give up >> $logfile
    echo failure of repeatpow for starttime = $starttime, totaltime= $totaltime, check $logfile
    exit 1
  endif
endif

echo successful completion >> $logfile

rm -rf subsave script.parms

set tagclause = `cat tsf.parms.blank | grep -v '^[[:space:]]*#' | sed s@"[[:space:]]"@"\n"@g | grep ^TAG=`
if ($#tagclause == 0) set tagclause = `cat tsf.parms.blank | grep -v '^[[:space:]]*#' | sed s@"[[:space:]]"@"\n"@g | grep ^in= | cut -d= -f2- | sed s@"\["@"\n"@g | sed s@]@"\n"@g | grep TAG`
set totalsecs = `durcon $totaltime`
set out = `cat tsf.parms.blank | grep -v "^[[:space:]]*#" | sed s@"[[:space:]]"@"\n"@g | grep ^tsout= | cut -d= -f2`
set cadence = `show_info -j $out | grep -i '^keyword:t_step' | cut -d, -f5`
set ndt   = `echo "$totalsecs / $cadence" | bc`
@ nts = ($lmax - $lmin) + 1

#add one of the following sections for each type of possible output
set out = `cat tsf.parms.blank | grep -v "^[[:space:]]*#" | sed s@"[[:space:]]"@"\n"@g | grep ^powout= | cut -d= -f2`
if ($#out != 0) then
  show_info $out\[$starttime]\[$lmin-$lmax]\[]\[$ndt]\[$tagclause] DRMS_RETENTION=100 -APiR >& powout.tmp
  if (`grep -c "/SUM.*/data\.fits" powout.tmp` != $nts) then
    echo failure of repeatpow for starttime = $starttime, totaltime = $totaltime : some power spectra missing
    exit 1
  endif
endif

set out = `cat tsf.parms.blank | grep -v "^[[:space:]]*#" | sed s@"[[:space:]]"@"\n"@g | grep ^fftout= | cut -d= -f2`
if ($#out != 0) then
  show_info $out\[$starttime]\[$lmin-$lmax]\[]\[$ndt]\[$tagclause] DRMS_RETENTION=100 -APiR >& fftout.tmp
  if (`grep -c "/SUM.*/data\.fits" fftout.tmp` != $nts) then
    echo failure of repeatpow for starttime = $starttime, totaltime = $totaltime : some ffts missing
    exit 1
  endif
endif

set out = `cat tsf.parms.blank | grep -v "^[[:space:]]*#" | sed s@"[[:space:]]"@"\n"@g | grep ^fft1out= | cut -d= -f2`
if ($#out != 0) then
  show_info $out\[$starttime]\[$lmin-$lmax]\[]\[$ndt]\[$tagclause] DRMS_RETENTION=100 -APiR >& fft1out.tmp
  if (`grep -c "/SUM.*/data\.fits" fft1out.tmp` != $nts) then
    echo failure of repeatpow for starttime = $starttime, totaltime = $totaltime : some fft1s missing
    exit 1
  endif
endif

set out = `cat tsf.parms.blank | grep -v "^[[:space:]]*#" | sed s@"[[:space:]]"@"\n"@g | grep ^mavgout= | cut -d= -f2`
if ($#out != 0) then
  show_info $out\[$starttime]\[$lmin-$lmax]\[]\[$ndt]\[$tagclause] DRMS_RETENTION=100 -APiR >& mavgout.tmp
  if (`grep -c "/SUM.*/data\.fits" mavgout.tmp` != $nts) then
    echo failure of repeatpow for starttime = $starttime, totaltime = $totaltime : some m-averaged spectra missing
    exit 1
  endif
endif

rm subpow.* tsf.parms.[0-9]* tsf.log.* jtsfiddle.exitstatus.*
echo successful completion for starttime = $starttime, totaltime = $totaltime

exit 0

Karen Tian
Powered by
ViewCVS 0.9.4