/usr/bin/slon_watchdog2 is in slony1-2-bin 2.2.4-3.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | #!/usr/bin/perl
#
# Author: Christopher Browne
# Copyright 2004-2009 Afilias Canada
use Getopt::Long;
# Defaults
$CONFIG_FILE = '/etc/slony1/slon_tools.conf';
$SHOW_USAGE = 0;
$WATCHDOG_VERBOSE = 1;
# Read command-line options
GetOptions("config=s" => \$CONFIG_FILE,
"help" => \$SHOW_USAGE);
my $USAGE =
"Usage: slon_watchdog2 [--config file] node# sleep_seconds
--config file Location of the slon_tools.conf file
sleep_seconds Number of seconds for the watchdog process to sleep
between checks
";
if ($SHOW_USAGE or scalar(@ARGV) != 2) {
die $USAGE;
}
require '/usr/share/slony1/slon-tools.pm';
require $CONFIG_FILE;
$node = $ARGV[0];
$sleep = $ARGV[1];
if ($node =~ /^(?:node)?(\d+)$/) {
$node = "node$1";
$nodenum = $1;
} else {
die $USAGE;
}
my $logfile = "$LOGDIR/slon-watchdog.log";
log_to_file( $logfile , "Invoking watchdog for $CLUSTER_NAME node $nodenum, sleep time = $sleep +/- " . int($sleep/2) . " seconds");
# When slon daemon is just started, may not have time to start syncronization
# and the watchdog will kill the process with no mercy.
# So sleep to give time to slony try to do their job.
sleep $sleep;
while (1) {
my $res = query_slony_status($nodenum); # See where the node stands
my $eventsOK;
if ($res =~ /^\s*t\s*\|/) {
$eventsOK = "YES";
if ( $WATCHDOG_VERBOSE ) {
log_to_file( $logfile , "Query_slony_status returns true for $CLUSTER_NAME node $nodenum" );
}
} else {
$res = node_is_subscribing($nodenum);
if ($res =~ /SUBSCRIBE_SET/) {
$eventsOK = "YES";
} else {
$eventsOK = "NO";
}
if ( $WATCHDOG_VERBOSE ) {
log_to_file( $logfile , "Query_slony_status returns false for $CLUSTER_NAME node $nodenum, node_is_subscribing : $eventsOK" );
}
}
my $pid = get_pid($node); # See if the slon process is alive
my ($restart, $kick);
$kick = "NO"; # Initially, assume we don't need to submit a "restart node" command
if ($pid) { # PID is alive...
if ($eventsOK eq "YES") {
# All is well - do nothing!
$restart = "NO";
} else {
$restart = "YES";
}
} else {
$restart = "YES";
# The message searched isn't generated as bellow anymore...
# and may be exist another better way to know if the restart of node is necessary
# so this is a TODO, commenting the code since this is not working
#
# # See if the slon log ends with "FATAL localListenThread: Another slon daemon is serving this node already"
# my $lastlog=`/bin/ls -t $LOGDIR/node$nodenum/$dbname*log | head -1`;
# my $lastline=`tail -1 $lastlog`;
# if ($lastline =~ /Another slon daemon is serving this node already/) {
# $kick = "YES"; # Yup, need to tell slonik to reset this node
# }
# Kicking allways as slon_watchdog.pl do
$kick = "YES";
}
# If the node needs a swift kick in the "RESTART", then submit that to slonik
if ($kick eq "YES") {
log_to_file($logfile,"submit slonik to restart $CLUSTER_NAME node $nodenum");
if ($CONFIG_FILE ne "") {
system "(/usr/bin/slonik_restart_node --config=${CONFIG_FILE} $node | /usr/bin/slonik) >> $logfile 2>> $logfile";
} else {
system "(/usr/bin/slonik_restart_node $node | /usr/bin/slonik) >> $logfile 2>> $logfile";
}
}
if ($restart eq "YES") {
if ($pid) {
log_to_file($logfile,"terminate slon daemon for $CLUSTER_NAME node $nodenum, PID $pid");
# Kill slon until dead...
kill 2, $pid;
sleep 3;
kill 15, $pid;
sleep 3;
# if killed with 9 the pid file isn´t deleted and the service don´t restart
# kill 9, $pid;
}
log_to_file($logfile,"restart slon for $CLUSTER_NAME node $nodenum");
start_slon($nodenum);
}
sleep $sleep + (rand($sleep) - $sleep/2);
}
sub log_to_file {
my ($logfile,$message) = @_;
chomp $message;
my $date = `date`;
chomp $date;
open (SLONLOG, ">>$logfile");
print SLONLOG $date, "|", $message, "\n";
close SLONLOG;
}
|