#!/bin/sh
# This script pings a group of internal hosts, and if all fail to reply, it will reboot the server.
# This is a workaround due to eth1 lockups on this box.
#
# Copyright (c) 2003 by Paul Kreiner


#!/bin/sh
# This script pings a group of internal hosts, and if all fail to reply, it will reboot the server.
# This is a workaround due to eth1 lockups on this box.

PING=/bin/ping
MAILTO=root


# (0 sec.) Begin by pinging each host with a 1-second timeout.  Exit the script if the ping suceeeds.

if $PING -q -n -w1 -c1 172.31.0.2 > /dev/null; then exit 0
  fi
if $PING -q -n -w1 -c1 172.31.0.3 > /dev/null; then exit 0
  fi
if $PING -q -n -w1 -c1 172.31.0.4 > /dev/null; then exit 0
  fi


# (3 sec.) If we get to this point, then something is wrong... so we wait a bit and try again.
sleep 17

if $PING -q -n -w1 -c1 172.31.0.2 > /dev/null; then exit 0
  fi
if $PING -q -n -w1 -c1 172.31.0.3 > /dev/null; then exit 0
  fi
if $PING -q -n -w1 -c1 172.31.0.4 > /dev/null; then exit 0
  fi


# (23 sec.) One last chance before we reboot...
sleep 19

if $PING -q -n -w1 -c1 172.31.0.2 > /dev/null; then exit 0
  fi
if $PING -q -n -w1 -c1 172.31.0.3 > /dev/null; then exit 0
  fi
if $PING -q -n -w1 -c1 172.31.0.4 > /dev/null; then exit 0
  fi


# (45 sec.) If we fall through to here, then we've lost connectivity with the LAN for at least 45 seconds.  Time to reboot.
cat << EOL | sendmail $MAILTO
Subject: eth1 failure at `hostname -f`!
X-Priority: 1
Priority: Urgent
From: root@`hostname -f`
To: $MAILTO

$0: Failed (three times in 45 seconds) to get ping replies from any of three internal hosts.  Assuming eth1 NIC failure.

REBOOTING NOW!
EOL

sleep 5; sync; shutdown -r now

