(file) Return to exportmanage.pl CVS log (file) (dir) Up to [Development] / JSOC / proj / util / scripts

File: [Development] / JSOC / proj / util / scripts / exportmanage.pl (download)
Revision: 1.29, Tue Sep 15 15:29:06 2020 UTC (3 years ago) by arta
Branch: MAIN
CVS Tags: Ver_LATEST, Ver_9-5, Ver_9-41, HEAD
Changes since 1.28: +1 -1 lines
remove not needed guard which is always true because next_hour_to_run is always updated during a run so that it is later than last_time_run

#!/home/jsoc/bin/linux_x86_64/activeperl

# Here's how to run this script (from scratch)
#  ssh jsoc@j0
#  cd /home/jsoc/exports
#  rm keep_running
#  exportmanage.pl -jsocdev procser=jsoc.export_procs &
# To start the web version for jsoc.stanford.edu repeat the above but do:
#  rm keep_running_web
#  exportmanage.pl -jsocweb procser=jsoc.export_procs&
# At some point, I'll add the production version of the script, which
#  would then be run as "exportmanage.pl -jsocpro &"
# To start the debug test version for jsoc2 do:
#  rm keep_running_test
#  exportmanage.pl -jsoctest procser=jsoc.export_procs &

# To test the entire export workflow:
#  1. Run export manage like so:
#     su <USER>
#        where <USER> is the user whose $PATH contains the paths to the modules/scripts that have new code to be tested.
#        The manager program makes shell scripts that call programs and scripts by calling the program/script base file
#        name (not the full path to the program/script). So the actual program/script that runs will be the one
#        that the shell resolves using the $PATH variable. For example, if the user arta makes
#        changes to the jsoc_export_as_fits in arta's home directory, and arta's $PATH contains a pointer to the binaries
#        in arta's home directory, then arta should run 'su arta' before continuing. IMPORTANT - <USER> will need
#        permissions to write into /home/jsoc/exports/tmp and /home/jsoc/exports/logs. Generally <USER> will be a
#        member of the group jsoc, so this requirement will be met.
#     cd /home/jsoc/exports
#     /home/jsoc/cvs/Development/JSOC/proj/util/scripts/exportmanage.pl -root <ROOT> -dbuser <DBUSER> -dbhost <DBHOST> -manager <MANAGER> -runflag <RFLAG> &
#        where <ROOT> is the CVS code tree root containing <MANAGER>
#        and <DBUSER> is the PG user who the manager connects as (defaults to "production"). IMPORTANT - the manager will
#           write records to tables that require elevated permissions. Most likely, you'll need to connect to the database
#           as user production to do this. This means that you'll need to place the password for user production in
#           your .pgpass file.
#        and <DBHOST> is the host of the database server (defaults to "hmidb"). For internal exports, should be "hmidb", for exports from public db, should be "hmidb2"
#        and <MANAGER> is the name of the manager program (defaults to "jsoc_export_manage"). IMPORTANT - you should include
#           a "-t" flag. This will cause the manager program to run in test mode, which means that it will process records
#           in jsoc.export_new that contain the special test status of 12 (instead of the regular status of 2).
#        and <RFLAG> is the file flag that keeps this script running in a loop (defaults to keep_running in cdir)
#
#        Example : /home/jsoc/cvs/Development/JSOC/proj/util/scripts/exportmanage.pl -root /home/arta/cvs/JSOC -dbuser production -dbhost hmidb2 -manager "jsoc_export_manage -t procser=jsoc.export_procs" -runflag keepruntest.txt -logflag Test &
#
#  2. Point a browser at http://jsoc.stanford.edu/ajax/exportdatatest.html and export something.
#
# In order for jsoc_export_manage to properly start a queing system script, the environment must contain a "pointer" to the cluster queing system;
# in the past, this needed to be done by exportmanager.pl; at some point, it was moved to a script that started exportmanage.pl; in both cases, it
# was difficult to ensure that the environment always got set; now, the environment is set by jsoc_export_manage itself; it sources a bash script
# that sets the environment immediately before running qsub

use strict;
use warnings;

use FileHandle;
use Fcntl ':flock';
use DateTime qw(compare strftime now);
use DateTime::Format::Strptime;
use FindBin qw($RealBin);
use lib "$RealBin/../../../localization";
use drmsparams;

use constant kExportDir => "/home/jsoc/exports";
# use constant kMailList => "arta\@sun.stanford.edu jeneen\@sun.stanford.edu phil\@sun.stanford.edu";
use constant kMailList => "arta\@sun.stanford.edu";
use constant kMailMessage1 => "exportmanage.pl could not start jsoc_export_manage. This is a critical failure.\nYou should probably contact Art, who was also notified and should respond shortly.\n";
use constant kMailMessage2 => "jsoc_export_manage died in response to an unhandled signal (e.g., a segfault).\n";
use constant kMailMessage3 => "Could not open log export-daemon log file for writing.\nThis is not a critical failure, but we should\nfix this so that we can track-down future export problems more easily.\nContact Art.\n";

use constant kMsgType1 => "msgtype1";
use constant kMsgType2 => "msgtype2";
use constant kMsgType3 => "msgtype3";

use constant kMsgQInterval => 600; # 10 minutes, at least, between message inserts
use constant kMsgQSendInterval => 120; # 2 minutes between mailing of messages

use constant kLogFlagInt => "int";
use constant kLogFlagExt => "ext";

use constant JEM_OPERATION_CLEAN_HASHES => "clean_hashes";
use constant JEM_OPERATION_CLEAN_PENDING_REQUESTS => "clean_requests";
use constant MD5_SERIES => "jsoc.export_md5";
my(@CLEAN_HASHES_TIMES) = ( 0, 6, 12, 18 );

my($config) = new drmsparams;

my($kINTERNALFLAG) = "/home/jsoc/exports/keep_running";
my($kWEBFLAG) = "/home/jsoc/exports/keep_running_web";
my($kTESTFLAG) = "/home/jsoc/exports/keep_running_test";
#
my($kJSOCDEV_ROOT) = "/home/jsoc/cvs/Development/JSOC";
my($kJSOCDEV_DBUSER) = "production";
my($kJSOCDEV_DBNAME) = $config->get('DBNAME');
my($kJSOCDEV_DBHOST) = $config->get('SERVER');
my($kJSOCDEV_MANAGE) = "jsoc_export_manage";
use constant kProcInfoSeriesDev => "jsoc.export_procs";
#
my($kJSOCPRO_ROOT) = "/home/jsoc/cvs/JSOC";
my($kJSOCPRO_DBUSER) = "production";
my($kJSOCPRO_DBNAME) = $config->get('DBNAME');
my($kJSOCPRO_DBHOST) = $config->get('SERVER');
my($kJSOCPRO_MANAGE) = "jsoc_export_manage";
use constant kProcInfoSeriesPro => "jsoc.export_procs";
#
my($kJSOCWEB_ROOT) = "/home/jsoc/cvs/Development/JSOC";
my($kJSOCWEB_DBUSER) = "production";
my($kJSOCWEB_DBNAME) = "jsoc";
my($kJSOCWEB_DBHOST) = "hmidb2";
my($kJSOCWEB_MANAGE) = "jsoc_export_manage";
use constant kProcInfoSeriesWeb => "jsoc.export_procs";
#
my($kJSOCTEST_ROOT) = "/home/jsoc/cvs/Development/JSOC";
my($kJSOCTEST_DBUSER) = "phil";
my($kJSOCTEST_DBNAME) = $config->get('DBNAME');
my($kJSOCTEST_DBHOST) = $config->get('SERVER');
my($kJSOCTEST_MANAGE) = "jsoc_export_manage_test";
use constant kProcInfoSeriesTst => "jsoc.export_procs";

my($runningflag) = $kINTERNALFLAG;
my($arg);
my($root);
my($dbhost) = $config->get('SERVER');
my($dbname) = $config->get('DBNAME');
my($dbuser) = "production";
my($binpath);
my($manage) = "jsoc_export_manage";
my($logfile);
my($daemonlog);
my($lckfh);
my($msg);
my($logflag);
my($procser);

while ($arg = shift(@ARGV))
{
    if ($arg eq "-root")
    {
        $root = shift(@ARGV);
        # $binpath = "$root/bin";
    }
    elsif ($arg eq "-dbhost")
    {
        $dbhost = shift(@ARGV);
    }
    elsif ($arg eq "-dbuser")
    {
        $dbuser = shift(@ARGV);
    }
    elsif ($arg eq "-dbname")
    {
        $dbname = shift(@ARGV);
    }
    elsif ($arg eq "-manager")
    {
        $manage = shift(@ARGV);
    }
    elsif ($arg eq "-runflag")
    {
        $runningflag = shift(@ARGV);
    }
    elsif ($arg eq "-logflag")
    {
        $logflag = shift(@ARGV);
    }
    elsif ($arg eq "-procser")
    {
        $procser = shift(@ARGV);
    }
    elsif ($arg eq "-jsocdev")
    {
        $root = $kJSOCDEV_ROOT;
        $binpath = "$root/bin";
        $dbuser = $kJSOCDEV_DBUSER;
        $dbname = $kJSOCDEV_DBNAME;
        $dbhost = $kJSOCDEV_DBHOST;
        $manage = $kJSOCDEV_MANAGE;
        $runningflag = $kINTERNALFLAG;
        $logflag = kLogFlagInt;
        $procser = &kProcInfoSeriesDev;
    }
    elsif ($arg eq "-jsocpro")
    {
        $root = $kJSOCPRO_ROOT;
        $binpath = "$root/bin";
        $dbuser = $kJSOCPRO_DBUSER;
        $dbname = $kJSOCPRO_DBNAME;
        $dbhost = $kJSOCPRO_DBHOST;
        $manage = $kJSOCPRO_MANAGE;
        $runningflag = $kINTERNALFLAG;
        $logflag = kLogFlagInt;
        $procser = &kProcInfoSeriesPro;
    }
    elsif ($arg eq "-jsocweb")
    {
        $root = $kJSOCWEB_ROOT;
        $binpath = "$root/bin";
        $dbuser = $kJSOCWEB_DBUSER;
        $dbname = $kJSOCWEB_DBNAME;
        $dbhost = $kJSOCWEB_DBHOST;
        $manage = $kJSOCWEB_MANAGE;
        $runningflag = $kWEBFLAG;
        $logflag = kLogFlagExt;
        $procser = &kProcInfoSeriesWeb;
    }
    elsif ($arg eq "-jsoctest")
    {
        $root = $kJSOCTEST_ROOT;
        $binpath = "$root/bin";
        $dbuser = $kJSOCTEST_DBUSER;
        $dbname = $kJSOCTEST_DBNAME;
        $dbhost = $kJSOCTEST_DBHOST;
        $manage = $kJSOCTEST_MANAGE;
        $runningflag = $kTESTFLAG;
        $logflag = "Test";
        $procser = &kProcInfoSeriesTst;
    }
}

# Only run on j0.Stanford.EDU
# if ($ENV{HOSTNAME} ne "j0.Stanford.EDU") {
#    die "I will only run on j0.Stanford.EDU\n";
#}

# Don't run if somebody is already managing the export
$lckfh = FileHandle->new(">$runningflag.lck");
unless (flock($lckfh, LOCK_EX|LOCK_NB))
{
   print "$0 is already running. Exiting.\n";
   exit(2);
}

#if (-e $runningflag)
#{
#    die "Can't manage export; another process is already managing it.\n";
#}

if (defined($binpath))
{
    $binpath = "$binpath/$ENV{\"JSOC_MACHINE\"}/";
}
else
{
    $binpath = "";
}

#local $ENV{"PATH"} = "$binpath:$ENV{\"PATH\"}";
#local $ENV{"PATH"} = "$scrpath:$ENV{\"PATH\"}";
local $ENV{"JSOCROOT"} = $root;
local $ENV{"JSOC_DBUSER"} = $dbuser;
local $ENV{"JSOC_DBNAME"} = $dbname;

#`touch $runningflag`;
`echo $$ > $runningflag`;
$daemonlog = kExportDir . "/logs/exportlog-${logflag}.txt";

my($rout);
my($cmd);
my($dlogfh);
my($datenow);
my($current_datetime);
my($current_datetime_str);
my($strp_hour);
my($next_hour_to_run_index);
my($next_hour_to_run);
my($last_time_run);
my($err) = 0;
my($msgq) = {lastsend => time(), msgs => {}};


# make time-string parser (all times are local)
$strp_hour = new DateTime::Format::Strptime(pattern => '%Y%m%d_%H', locale => 'en_US', time_zone => 'local');
$current_datetime = DateTime->now(time_zone => 'local');

$msg = "Started by $ENV{'USER'} at " . $current_datetime->strftime('%Y-%m-%d %T') . " on machine $ENV{'HOST'} using $dbhost.\n";

# first call to PrintToLog() opens log file
PrintToLog(\$dlogfh, $daemonlog, $msg);

$next_hour_to_run_index = 0;
$next_hour_to_run = $strp_hour->parse_datetime($current_datetime->strftime('%Y%m%d') . '_' . $CLEAN_HASHES_TIMES[$next_hour_to_run_index]);
undef($last_time_run);

while (1)
{
    # print "running $cmd.\n";
    $cmd = "$binpath" . "$manage JSOC_DBHOST=$dbhost procser=$procser";
    $rout = qx($cmd 2>&1);

    if ($? == -1)
    {
        QueueMessage($msgq, &kMsgType1, "Export Daemon Execution Failure!!", &kMailMessage1);
    }
    elsif ($? & 127)
    {
        # jsoc_export_manage died in response to an unhandled signal
        my($sig) = $? & 127;

        QueueMessage($msgq, &kMsgType1, "Export Daemon Execution Failure!!", &kMailMessage2, "DB Host: $dbhost\n", "Unhandled signal: $sig.\n");
    }
    elsif (($? >> 8) != 0)
    {
        # jsoc_export_manage returned with an error code
        $msg = "$manage returned with a non-zero code of $? >> 8.\n";
        PrintToLog(\$dlogfh, $daemonlog, $msg);
    }

    if (defined($rout) && length($rout) > 0)
    {
        $msg = "$rout\n";
        PrintToLog(\$dlogfh, $daemonlog, $msg);
    }

    # clean hashes from MD5_SERIES series
    $current_datetime = DateTime->now(time_zone => 'local');

    if (!defined($last_time_run) || DateTime->compare($current_datetime, $next_hour_to_run) > 0)
    {
        my($clean_success) = 0;
        my($next_day);


        # clean MD5 hashes
        $cmd = "$binpath" . "$manage JSOC_DBHOST=$dbhost op=" . JEM_OPERATION_CLEAN_HASHES;
        $rout = qx($cmd 2>&1);

        if ($? == -1)
        {
            QueueMessage($msgq, &kMsgType1, "Export Daemon Execution Failure!!", &kMailMessage1, "failure to clean MD5 hashes table");
        }
        elsif ($? & 127)
        {
            # jsoc_export_manage died in response to an unhandled signal
            my($sig) = $? & 127;

            QueueMessage($msgq, &kMsgType1, "Export Daemon Execution Failure!!", &kMailMessage2, "failure to clean MD5 hashes table", "DB Host: $dbhost\n", "Unhandled signal: $sig.\n");
        }
        else
        {
            $clean_success = 1;
            $msg = "cleaned MD5 hashes\n";
            PrintToLog(\$dlogfh, $daemonlog, $msg);
        }

        if (defined($rout) && length($rout) > 0)
        {
            $msg = "$rout\n";
            PrintToLog(\$dlogfh, $daemonlog, $msg);
        }

        if ($clean_success)
        {
            # clean pending-requests table
            $clean_success = 0;

            $cmd = "$binpath" . "$manage JSOC_DBHOST=$dbhost op=" . JEM_OPERATION_CLEAN_PENDING_REQUESTS;
            $rout = qx($cmd 2>&1);

            if ($? == -1)
            {
                QueueMessage($msgq, &kMsgType1, "Export Daemon Execution Failure!!", &kMailMessage1, "failure to clean pending-requests table");
            }
            elsif ($? & 127)
            {
                # jsoc_export_manage died in response to an unhandled signal
                my($sig) = $? & 127;

                QueueMessage($msgq, &kMsgType1, "Export Daemon Execution Failure!!", &kMailMessage2, "failure to clean pending-requests table", "DB Host: $dbhost\n", "Unhandled signal: $sig.\n");
            }
            else
            {
                $clean_success = 1;
                $msg = "cleaned pending requests\n";
                PrintToLog(\$dlogfh, $daemonlog, $msg);
            }

            if (defined($rout) && length($rout) > 0)
            {
                $msg = "$rout\n";
                PrintToLog(\$dlogfh, $daemonlog, $msg);
            }
        }

        if ($clean_success)
        {
            # update the variables that conrtrol the next time cleaning is run - if either of the
            # cleaning tasks fails, then a new attempt will be made each loop iteration until
            # both cleaning tasks complete
            $last_time_run = $current_datetime;
            $msg = "updating next clean time; current time is " . $current_datetime->strftime('%Y-%m-%d %T') . "\n";
            PrintToLog(\$dlogfh, $daemonlog, $msg);

            while (1)
            {
                $next_hour_to_run_index++;

                if ($next_hour_to_run_index > scalar(@CLEAN_HASHES_TIMES) - 1)
                {
                    # set $next_hour_to_run to first run time next day; set index to 0
                    $next_day = ($last_time_run + DateTime::Duration->new( days => 1 ))->strftime('%Y%m%d');
                    $next_hour_to_run_index = 0;
                    $next_hour_to_run = $strp_hour->parse_datetime($next_day . '_' . $CLEAN_HASHES_TIMES[$next_hour_to_run_index]);
                    last;
                }

                $next_hour_to_run = $strp_hour->parse_datetime($last_time_run->strftime('%Y%m%d') . '_' . $CLEAN_HASHES_TIMES[$next_hour_to_run_index]);

                if (DateTime->compare($next_hour_to_run, $last_time_run) > 0)
                {
                    last;
                }
            }

            $msg = "next hour to run is $next_hour_to_run\n";
            PrintToLog(\$dlogfh, $daemonlog, $msg);
        }
    }

    SendPendingMessages($msgq);

    CloseDLog(\$dlogfh);

    if (KeepRunning($runningflag))
    {
        sleep(2);
    }
    else
    {
        last;
    }
} # while forever

$msg = "Stopped by $ENV{'USER'} at " . `date` . ".\n";
PrintToLog(\$dlogfh, $daemonlog, $msg);

if (defined($dlogfh))
{
    CloseDLog(\$dlogfh);
}


# Don't leave junk laying about
CleanRunFlag($runningflag);

# release the exclusive file lock
flock($lckfh, LOCK_UN);
$lckfh->close;

exit($err);

# END
sub IOwnRunFlag
{
   my($file) = $_[0];
   my($fexists);
   my($iownit);
   my($line);

   $fexists = (-e $file);
   if ($fexists)
   {
      if (open(FLFILE, "<$file"))
      {
         $line = <FLFILE>;
         chomp($line);
         $iownit = ($line == $$);
         close(FLFILE);
      }
   }

   return $fexists && $iownit;
}

sub KeepRunning
{
   my($file) = $_[0];

   return IOwnRunFlag($file)
}

sub CleanRunFlag
{
   my($file) = $_[0];

   if (IOwnRunFlag($file))
   {
      unlink($file);
   }
}

sub GetDLogFH
{
    my($rfh) = shift; # reference to filehandle object
    my($dlog) = shift;
    my($msgq) = shift;
    my($err);

    $err = 0;

    if (!defined($$rfh))
    {
        $$rfh = FileHandle->new(">>$dlog");

        if (!defined($$rfh))
        {
            if (defined($msgq))
            {
                QueueMessage($msgq, &kMsgType1, "Export Daemon Log Unavailable", &kMailMessage3);
            }

            $err = 1;
        }
    }

    return $err;
}

sub PrintToLog
{
    my($rfh) = shift; # reference to filehandle object
    my($dlog) = shift;
    my($msg) = shift;
    my($current_datetime);
    my($content);


    unless (GetDLogFH($rfh, $dlog))
    {
        $current_datetime = DateTime->now(time_zone => 'local');
        $content = "[ " .  DateTime->now(time_zone => 'local')->strftime('%Y-%m-%d %T') . " ] " . $msg;
        $$rfh->print($content);
    }
}

sub CloseDLog
{
    my($rfh) = $_[0]; # reference to filehandle object

    if (defined($$rfh))
    {
        $$rfh->close();
        undef($$rfh);
    }
}

sub SendPendingMessages
{
    my($msgs) = shift;
    my($imsg);
    my($msg);
    my($subj);

    if (time() - $msgs->{lastsend} > &kMsgQSendInterval)
    {
        # Check for pending messages
        foreach $imsg (keys(%{$msgq->{msgs}}))
        {
            $msg = $msgq->{msgs}->{$imsg}->{msg};
            $subj = $msgq->{msgs}->{$imsg}->{subj};
            open(MAILPIPE, "| /bin/mail -s \"$subj\" " . &kMailList) || die "Couldn't open 'mail' pipe.\n";
            print MAILPIPE $msg;
            close(MAILPIPE);

            if ($msgq->{msgs}->{$imsg}->{ntimes} > 1)
            {
                $msgq->{msgs}->{$imsg}->{ntimes} = $msgq->{msgs}->{$imsg}->{ntimes} - 1;
            }
            else
            {
                delete($msgq->{msgs}->{$imsg});
            }
        }

        $msgs->{lastsend} = time();
    }
}

# Message queue (to MAIL warnings and notices):
#   key - id
#   val - hash : {instime => 10292392, msg => "export failure", ntimes => 5}
#     where instime is unix seconds identifying time message was inserted into queue.
#           subj is the mail subject
#           msg is the message to mail
#           ntimes is the number of times to send message out.
sub QueueMessage
{
    my($msgq) = shift;
    my($type) = shift;
    my($subj) = shift;
    my(@msg) = @_;
    my($oktoins);

    if (exists($msgq->{msgs}->{$type}))
    {
        $oktoins = time() - $msgq->{msgs}->{$type}->{instime} > &kMsgQInterval;

        # Message already exists. Don't add to queue until some time elapses.
        if ($oktoins)
        {
            $msgq->{msgs}->{$type}->{ntimes} = $msgq->{msgs}->{$type}->{ntimes} + 1;
        }
    }
    else
    {
        my($msgstr) = join('', @msg);
        $msgq->{msgs}->{$type} = {instime => time(), subj => $subj, msg => $msgstr, ntimes => 1};
    }
}

__DATA__

Karen Tian
Powered by
ViewCVS 0.9.4