(file) Return to doylmsht CVS log (file) (dir) Up to [Development] / JSOC / proj / globalhs / scripts

File: [Development] / JSOC / proj / globalhs / scripts / doylmsht (download)
Revision: 1.5, Wed Feb 8 01:12:45 2017 UTC (16 months, 1 week ago) by tplarson
Branch: MAIN
CVS Tags: Ver_9-1, Ver_9-0
Changes since 1.4: +16 -19 lines
update tag handling

#!/bin/tcsh -f

setenv PATH $JSOCROOT/bin/$JSOC_MACHINE':'$JSOCROOT/proj/globalhs/scripts':'$PATH

# submit njobs jobs at once
if (! $?GLOBALHS_V2TNJOBS) then
  set njobs=20
else
  set njobs=$GLOBALHS_V2TNJOBS
endif

# submit next batch of jobs when total number of sht jobs drops below jobthreshold
if (! $?GLOBALHS_V2TJOBTHRESHOLD) then
  set jobthreshold=40
else
  set jobthreshold=$GLOBALHS_V2TJOBTHRESHOLD
endif

# if this script inherited a label in its environment, use it as the suffix for job names
if (! $?GLOBALHS_LABEL) then
  set suff=''
else
  set suff=.$GLOBALHS_LABEL
endif

set qsubtmp=/tmp27/$USER/qsubtmp
mkdir -p $qsubtmp
if (! $?GLOBALHS_V2TQUEUE) then
  set q=k.q
else
  set q=$GLOBALHS_V2TQUEUE
endif

if ($q == k.q) then
  alias qsub qsub2
  alias qstat qstat2
  alias waittosubmit waittosubmit2
endif

@ i = 1
while ($i <= $#argv )
  set $argv[$i]
@ i++
end

if (! $?tag) then
  echo must specify parameter tag
  exit 1
endif

if (! $?deltal) set deltal = 6
if (! $?segment) set segment = vradsum
set tag2 = $tag'_'$segment

echo `date` on $HOST
echo $PWD
echo $0 $argv

if (! -es v2t.parms.blank) then
  echo parameter file blank missing: v2t.parms.blank is required
  exit 1
endif

grep -v "^[[:space:]]*#" v2t.parms.blank | sed s@"[[:space:]]"@"\n"@g | grep -v '^[[:space:]]*$' > parms.tmp

set in = `cat parms.tmp | grep in= | sed s/"'"/""/g | awk -F '[=[]' '{print $2}'`
set trecstep = `show_info -j $in | grep -i '^keyword:t_rec_step' | cut -d, -f5`
set ttotal = `cat parms.tmp | grep TTOTAL= | cut -d= -f2`
set totalsecs = `durcon $ttotal`
set ndt = `echo "$totalsecs/$trecstep" | bc`

if (! -es ../ylm.parms.blank) then
  echo parameter file blank missing: ylm.parms.blank is required
  exit 1
endif

set tstart = `cat ../ylm.parms.blank | grep -v "^[[:space:]]*#" | grep TSTART= | cut -d= -f2`
if ($#tstart == 0) then
@ line = `mkylms -H | grep -n "TSTART (time)" | cut -d: -f1` + 1
  set tstart = `mkylms -H | sed -n $line'p' | awk '{print $3}'`
endif
set startsecs = `time_convert o=jsoc time=$tstart`

set file = `cat ../ylm.parms.blank | grep -v "^[[:space:]]*#" | grep MODELIST= | cut -d= -f2`
if (! -es ../$file) then
  echo MODELIST file $file is missing
  exit 1
endif

set nim=`cat ../$file | wc -l`
@ nchunks = ( ( $nim - 1 ) / $ndt ) + 1
#not necessary if while is replaced by foreach below
#if ($njobs > $nchunks) set njobs=$nchunks

set i=0
while ($i < $nchunks)
  set isecs = `echo "$startsecs + ($i * $ndt * $trecstep)" | bc`
  set istart = `time_convert o=cal zone=tai s=$isecs`
  @ index = ($i * $ndt) + 1
  @ index2 = $index + $ndt - 1
  if ($index2 > $nim) set index2 = $nim
  set lmin = `sed -n $index'p' ../$file | awk '{print $1}'`
  @ lmin = $lmin - $deltal
  if ($lmin < 0) set lmin=0
  set lmax = `sed -n $index2'p' ../$file | awk '{print $1}'`
  @ lmax = $lmax + $deltal
  cat parms.tmp | sed s@RRRR@$istart/$ndt'm'@ | sed s/MMMM/$lmax/ | sed s/NNNN/$lmin/ | sed s/SSSS/$istart/ | sed s/GGGG/$segment/g | sed s/TTTT/$tag2/g  > v2t.parms.$i
  @ r = $i % $njobs
  set subfile = subt.$r$suff'_'$tag2
  if ($i < $njobs) then
    echo '#\!/bin/csh' > $subfile
    echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
    echo 'cd' $PWD >> $subfile
  endif
  echo \(time jv2ts @v2t.parms.$i\) '>&' v2t.log.$i >> $subfile
  echo echo \$status '>&' jv2ts.exitstatus.$i >> $subfile
  @ i++
end

waittosubmit $jobthreshold subt
set sublist=`/bin/ls subt.*_$tag2`
foreach sub ($sublist)
  qsub -q $q -e $qsubtmp -o $qsubtmp $sub
end

#set i=0
#while ($i < $njobs)
#  qsub -q $q -e $qsubtmp -o $qsubtmp subt.$i$suff
#@ i++
#end

echo jobs submitted, start waiting

set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l`
while($njobsrunning > 0)
  sleep 60
  set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l`
end

set expectedlist = `/bin/ls v2t.parms.[0-9]* | cut -d'.' -f 3`
set ranlist   = `/bin/ls v2t.log.* | cut -d'.' -f 3`
set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u`

set errlist1 = `grep -Hv 0 jv2ts.exitstatus.* | awk -F '[.:]' '{print $3}'`
set errlist2 = `grep -Hc "successful completion" v2t.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`
set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq`

set list = `echo $rerunlist $errlist | sed s/" "/"\n"/g | sort -n`
if ($#list) then
  echo some jobs fail, rerunning
  echo rerunlist: $rerunlist > faillog.$segment
  echo errlist1: $errlist1 >> faillog.$segment
  echo errlist2: $errlist2 >> faillog.$segment
  mkdir -p savesubt
  mv subt.*_$tag2 savesubt

  #now have to start with 1 because that is the first index of list
  set i=1
  while ($i <= $#list)
    @ r = $i % $njobs
    set subfile = subt.$r$suff'_'$tag2
    if ($i <= $njobs) then
      echo '#\!/bin/csh' > $subfile
      echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
      echo 'cd' $PWD >> $subfile
    endif
    echo \(time jv2ts @v2t.parms.$list[$i]\) '>&' v2t.log.$list[$i] >> $subfile
    echo echo \$status '>&' jv2ts.exitstatus.$list[$i] >> $subfile
    @ i++
  end

  waittosubmit $jobthreshold subt
  set sublist=`/bin/ls subt.*_$tag2`
  foreach sub ($sublist)
    qsub -q $q -e $qsubtmp -o $qsubtmp $sub
  end

  set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l`
  while($njobsrunning > 0)
    sleep 60
    set njobsrunning = `qstat -r -u $USER | grep "Full jobname:" | grep subt | grep $suff'_'$tag2 | wc -l`
  end

  set expectedlist = `/bin/ls v2t.parms.* | cut -d'.' -f 3`
  set ranlist   = `/bin/ls v2t.log.* | cut -d'.' -f 3`
  set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u`

  set errlist1 = `grep -Hv 0 jv2ts.exitstatus.* | awk -F '[.:]' '{print $3}'`
  set errlist2 = `grep -Hc "successful completion" v2t.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`
  set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq`

  if ($#rerunlist || $#errlist) then
    echo some jobs still fail, i give up
    exit 1
  endif
endif

rm parms.tmp

echo successful completion
exit 0

Karen Tian
Powered by
ViewCVS 0.9.4