/usr/bin/slon_watchdog is in slony1-2-bin 2.2.3-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | #!/usr/bin/perl
#
# Author: Christopher Browne
# Copyright 2004-2009 Afilias Canada
use Getopt::Long;
# Defaults
$CONFIG_FILE = '/etc/slony1/slon_tools.conf';
$SHOW_USAGE = 0;
$WATCHDOG_VERBOSE = 1;
# Read command-line options
GetOptions("config=s" => \$CONFIG_FILE,
"help" => \$SHOW_USAGE);
my $USAGE =
"Usage: slon_watchdog [--config file] node# sleep_seconds
--config file Location of the slon_tools.conf file
sleep_seconds Number of seconds for the watchdog process to sleep
between checks
";
if ($SHOW_USAGE or scalar(@ARGV) != 2) {
die $USAGE;
}
require '/usr/share/slony1/slon-tools.pm';
require $CONFIG_FILE;
$node = $ARGV[0];
$sleep = $ARGV[1];
if ($node =~ /^(?:node)?(\d+)$/) {
$node = "node$1";
$nodenum = $1;
} else {
die $USAGE;
}
my $logfile = "$LOGDIR/slon-watchdog.log";
my $logfile_err = "$LOGDIR/slon-$CLUSTER_NAME-$node.err";
log_to_file( $logfile , "Invoking watchdog for $CLUSTER_NAME node $nodenum , sleep time = $sleep +/- " . int($sleep/2) . " seconds");
while (1) {
$pid = get_pid($node);
if (!($pid)) {
my ($dsn, $dbname) = ($DSN[$nodenum], $DBNAME[$nodenum]);
my $message = qq{WATCHDOG: No Slon is running for $CLUSTER_NAME node $nodenum!
You ought to check the postmaster and slon for evidence of a crash!
I'm going to restart slon for $node...
};
# log to error log as original routine
log_to_file( $logfile_err , $message );
# log to watchdog log
log_to_file( $logfile , "No Slon running for $CLUSTER_NAME node $nodenum, restarting slon");
# First, restart the node using slonik
if ($CONFIG_FILE ne "") {
system "(/usr/bin/slonik_restart_node --config=${CONFIG_FILE} $node | /usr/bin/slonik) >> $logfile_err 2>> $logfile_err";
} else {
system "(/usr/bin/slonik_restart_node $node | /usr/bin/slonik) >> $logfile_err 2>> $logfile_err";
}
# Next, restart the slon process to service the node
start_slon($nodenum);
$pid = get_pid($node);
log_to_file( $logfile_err , "WATCHDOG: Restarted slon for the $CLUSTER_NAME cluster $node, new PID $pid" );
} else {
if ( $WATCHDOG_VERBOSE ) {
log_to_file( $logfile , "Found slon daemon for $CLUSTER_NAME $node, PID $pid, Sleeping for $sleep +/- " . int($sleep/2) . " seconds" );
}
}
sleep $sleep + (rand($sleep) - $sleep/2);
}
sub log_to_file {
my ($logfile,$message) = @_;
chomp $message;
my $date = `date`;
chomp $date;
open (SLONLOG, ">>$logfile");
print SLONLOG $date, " | ", $message, "\n";
close SLONLOG;
}
|