![]() ![]() |
![]() |
File: [Development] / JSOC / proj / workflow / BigRedButton.csh
(download)
/
(as text)
Revision: 1.2, Tue May 14 18:46:16 2013 UTC (10 years ago) by jeneen Branch: MAIN CVS Tags: Ver_LATEST, Ver_9-5, Ver_9-41, Ver_9-4, Ver_9-3, Ver_9-2, Ver_9-1, Ver_9-0, Ver_8-8, Ver_8-7, Ver_8-6, Ver_8-5, Ver_8-4, Ver_8-3, Ver_8-2, Ver_8-12, Ver_8-11, Ver_8-10, Ver_8-1, HEAD Changes since 1.1: +4 -2 lines updating so pipeline will run from Development |
#! /bin/csh -f ### BigRedButton.csh should be run as jsocprod on n04 clean up gates and tasks ### after a major failure in production processing: ### ### 1. Stop the gatekeeper ### 2. Kill all taskmanagers running on n04 ### 3. Delete all tickets and miscellaneous files from all gates and tasks ### 4. Kill anything running in qsub ### 5. Check for valid low and high keys in gates ### 6. Run the cleanup script and delete failed directories to make it easier to check gates and tasks ### 7 Restart the gatekeeper set user = $USER if ( $user != 'jsocprod' ) then echo "" echo "Must run as user jsocprod on n04." echo "" exit endif set WORKFLOW_DATA = /home/jsoc/pipeline #set WORKFLOW_ROOT = /home/phil/jsoc/proj/workflow set WORKFLOW_ROOT = /home/jsoc/cvs/Development/JSOC/proj/workflow set TASKS = $WORKFLOW_DATA/tasks set GATES = $WORKFLOW_GATES/gates set echo # 1 # rm $WORKFLOW_DATA/Keep_running # 2 # @ TM_num = `ps -ef | grep taskmanager.csh | wc -l` while ( $TM_num > 0 ) foreach TM ( `ps -ef | grep taskmanager.csh | awk '{print $2}'` ) kill -9 $TM end e@ TM_num = `ps -ef | grep taskmanager.csh | wc -l` while ( `ps -ef | grep taskmanager.csh | wc -l` > 0 ) end # 3 # cd $TASKS foreach task ( * ) rm -rf $task/active/* echo 0 > $task/state rm $task/active/$task'-root'/pending_tickets/* end cd $GATES foreach gate ( * ) rm $gate/active_tickets/* rm $gate/new_tickets/* end ## 4 ## foreach QSub ( `qstat | grep jsocprod | egrep '(OBS|VEC|NRT|IMG|MSK|FITS|keiji)' | awk '{print $1}'` ) qdel $Qsub end ## 5 ## cd $WORKFLOW_ROOT ./cleanup.csh cd $TASKS foreach task ( * ) rm -rf $task/archive/failed/* end ## 6 ## cd $GATES foreach gate ( * ) echo $gate cat $gate/low cat $gate/high echo "" end ## 7 ## #/home/phil/jsoc/proj/workflow/gatekeeper.restart >> /home/jsoc/pipeline/restart.log & /home/jsoc/cvs/Development/JSOC/proj/workflow/gatekeeper.restart >> /home/jsoc/pipeline/restart.log & echo "1. Check for bad low high times in gates (should be the last thing on the screen)." echo "2. Make sure there are no taskmanagers running." echo "3. Check gates and tasks (chechgates.csh | more, etc)." echo "4. Restart failed tickets or run maketickets to get things running again."
Karen Tian |
Powered by ViewCVS 0.9.4 |