(file) Return to domkylms CVS log (file) (dir) Up to [Development] / JSOC / proj / globalhs / scripts

File: [Development] / JSOC / proj / globalhs / scripts / domkylms (download)
Revision: 1.5, Mon Apr 16 18:11:00 2018 UTC (5 years, 5 months ago) by baldner
Branch: MAIN
CVS Tags: Ver_9-4, Ver_9-3, Ver_9-2
Changes since 1.4: +1 -0 lines
Added JSOC_DBUSER variable to qsub scripts.

#!/bin/tcsh -f

setenv PATH $JSOCROOT/bin/$JSOC_MACHINE':'$JSOCROOT/proj/globalhs/scripts':'$PATH

# submit next batch of jobs when total number of sht jobs drops below jobthreshold
if (! $?GLOBALHS_YLMJOBTHRESHOLD) then
  set jobthreshold=5
else
  set jobthreshold=$GLOBALHS_YLMJOBTHRESHOLD
endif

#number of jobs will be nchunk

# if this script inherited a label in its environment, use it as the suffix for job names
if (! $?GLOBALHS_LABEL) then
  set suff=''
else
  set suff=.$GLOBALHS_LABEL
endif

set qsubtmp=/tmp29/$USER/qsubtmp
mkdir -p $qsubtmp
if (! $?GLOBALHS_YLMQUEUE) then
  set q=k.q
else
  set q=$GLOBALHS_YLMQUEUE
endif

if ($q == k.q) then
  alias qsub qsub2
  alias qstat qstat2
  alias waittosubmit waittosubmit2
endif

@ i = 1
while ($i <= $#argv )
  set $argv[$i]
@ i++
end

if (! $?xoffset) set xoffset = 0.0
if (! $?yoffset) set yoffset = 0.0
if (! $?pangle) set pangle = 0.0
if (! $?bangle) set bangle = 0.0
if (! $?obsdist) set obsdist = 1.000
if (! $?ndt) set ndt = 0
set tag = 'x'$xoffset'_y'$yoffset'_p'$pangle'_b'$bangle'_d'$obsdist

echo `date` on $HOST
echo $PWD
echo $0 $argv

if (! -es ylm.parms.blank) then
  echo parameter file blank missing: ylm.parms.blank is required
  exit 1
endif

grep -v '^[[:space:]]*$' ylm.parms.blank | grep -v "^[[:space:]]*#"  > parms.tmp
set file = `cat parms.tmp | grep MODELIST= | cut -d= -f2`

if (! -es $file) then
  echo MODELIST file $file is missing
  exit 1
endif

set nim=`cat $file | wc -l`
#now get ndt from input parameters
#set ndt = `cat parms.tmp | grep NDT= | cut -d= -f2`
#if ($#ndt == 0) set ndt = 0
if ($ndt == 0) then
@ nchunks = 1
else
@ nchunks = ( ( $nim - 1 ) / $ndt ) + 1
endif

#DTOFF will be ignored if NDT=0 (which may be forced in ylm.parms.blank)
set i=0
while ($i < $nchunks)
@ dtoff = $i * $ndt
  set subfile=suby.$i$suff'_'$tag
  echo '#\!/bin/csh' > $subfile
  echo 'setenv PATH' $JSOCROOT'/bin/$JSOC_MACHINE' >> $subfile
  echo 'setenv JSOC_DBUSER' $JSOC_DBUSER >> $subfile
  echo 'cd' $PWD >> $subfile
  cat parms.tmp | sed s/XXXX/$xoffset/g | sed s/YYYY/$yoffset/g | sed s/PPPP/$pangle/g | sed s/BBBB/$bangle/g | sed s/DDDD/$obsdist/g | sed s/TTTT/$ndt/g > ylm.parms.$i
  echo \(time mkylms @ylm.parms.$i DTOFF=$dtoff\) '>&' ylm.log.$i >> $subfile
  echo echo \$status '>&' mkylms.exitstatus.$i >> $subfile
@ i++
end

waittosubmit $jobthreshold suby
set i=0
while ($i < $nchunks)
  qsub -q $q -e $qsubtmp -o $qsubtmp suby.$i$suff'_'$tag
@ i++
end

echo jobs submitted, start waiting

set njobs = `qstat -r -u $USER | grep "Full jobname:" | grep suby | grep $suff'_'$tag | wc -l`
while($njobs > 0)
  sleep 60
  set njobs = `qstat -r -u $USER | grep "Full jobname:" | grep suby | grep $suff'_'$tag | wc -l`
end

set expectedlist = `/bin/ls ylm.parms.[0-9]* | cut -d'.' -f 3`
set ranlist   = `/bin/ls ylm.log.* | cut -d'.' -f 3`
set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u`

set errlist1 = `grep -Hv 0 mkylms.exitstatus.* | awk -F '[.:]' '{print $3}'`
set errlist2 = `grep -Hc "successful completion" ylm.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`
set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq`

if ($#rerunlist || $#errlist) then
  echo some jobs fail, rerunning
  echo rerunlist: $rerunlist > faillog
  echo errlist1: $errlist1 >> faillog
  echo errlist2: $errlist2 >> faillog
  waittosubmit $jobthreshold suby
  foreach i ($errlist $rerunlist)
    qsub -q $q -e $qsubtmp -o $qsubtmp suby.$i$suff'_'$tag
  end

  set njobs = `qstat -r -u $USER | grep "Full jobname:" | grep suby | grep $suff'_'$tag | wc -l`
  while($njobs > 0)
    sleep 60
    set njobs = `qstat -r -u $USER | grep "Full jobname:" | grep suby | grep $suff'_'$tag | wc -l`
  end

  set expectedlist = `/bin/ls ylm.parms.[0-9]* | cut -d'.' -f 3`
  set ranlist   = `/bin/ls ylm.log.* | cut -d'.' -f 3`
  set rerunlist = `echo $expectedlist $ranlist | sed s/" "/"\n"/g | sort -n | uniq -u`

  set errlist1 = `grep -Hv 0 mkylms.exitstatus.* | awk -F '[.:]' '{print $3}'`
  set errlist2 = `grep -Hc "successful completion" ylm.log.* | grep -v :1 | awk -F '[.:]' '{print $3}'`
  set errlist = `echo $errlist1 $errlist2 | sed s/" "/"\n"/g | sort -n | uniq`

  if ($#rerunlist || $#errlist) then
    echo some jobs still fail, i give up
    exit 1
  endif
endif

rm parms.tmp

echo successful completion
exit 0

Karen Tian
Powered by
ViewCVS 0.9.4