Checking up on stuff

In doing some spring cleaning, I found this old article from years past. Enjoy!

If you work in networking, you probably want to be notified when something crashes, looses connection or just generally goes T.U.

Enter, Condor NG! Condor NG is a simple shell script used by yours truly to monitor my work network. It’s light, easy and just plain works.

There are only two config files, the Site file (site.cfg) and the Mail file (mail.cfg). site.cfg should have a list of host names to check, one per line. As many as you want. mail.cfg should have the email addresses that you want to send reports to, one per line.

The rest assumes that we are installed into /belfry with a /belfry/tmp and /belfry/log – change as needed. You can change that under defines in the script. If you want it to email you (or text your cell phone thru the email interface – check with your provider for details if you do not know how) you will need to set the mailgate define to the host name of your mail gateway. If not, set mailgate to localhost, so it will not think that the entire world has crashed.

Use cron to schedule (I run it every five minutes)

Update 10/17/2010: Check out the updated code on GitHub!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/bin/sh
#set -x
# Code (c) Greg Nokes under GPL 2 or later
# greg (at) nokes (dot) name
#Pinger 2.0a GNokes 10/7/05. Took original code, cleaned up.
#
#
#3.0.1 10/18/05 added mail.cfg and re-wrote callhome() to use it; also cleaned up code a wee bit
#3.0.2 10/18/05 removed path from config files
#3.0.3 10/18/05 readded path to config files
#
#3.1.1 10/19/05 removed two call homes - program only calls home when host is down, comes up.
#3.2 10/31/05 Need to follow one email directive
#3.3 11/02/05 Need to have email sent only after host down on second cycle; added safteynet
#3.3.1 11/03/05 Added MAILGATE, changed all paths to belfry. Moved exectution to belfry.
###########
# Defines #
###########
BASE=/bin/
TEMP=/belfry/tmp/condor.tmp
LOG=/belfry/log/condor.log
TRACK=/belfry/log/condordown.log
SITE=`cat /belfry/condorNG/site.cfg`
TOAST=/belfry/tmp/condor.toast
ML=`cat /belfry/condorNG/mail.cfg`
RPT=/belfry/tmp/condor.rpt
MAILGATE=
#############
# Functions #
#############
# All of my functions go here
PING ()
{
# Do Pinging and write to Logfile
# Also check for previous down state, ignore if site was up
# callhome with down if site down, callhome with up if site came back up
 
for i in $SITE
do
if ! ping -c 3 -w 5 $i>/dev/null
then
echo $i:noping
printf "Host:\tDown\t$i\t$(date)\n">>$LOG
if [ -e "/tmp/$i.ping.dropped" ]
then
echo $i:down
CALLHOME $i Down
rm /tmp/$i.ping*
touch /tmp/$i.ping.no
printf "Host:\tDown\t$i\t$(date)\n">>$TRACK
elif [ -e "/tmp/$i.ping.yes" ]
then
echo $i:dropped
rm /tmp/$i.ping*
touch /tmp/$i.ping.drop
printf "Host:\tDropped\t$i\t$(date)\n">>$TRACK
else
echo $i:dropped
rm /tmp/$i.ping*
touch /tmp/$i.ping.drop
printf "Host:\tDropped\t$i\t$(date)\n">>$TRACK
fi
else
if [ -e "/tmp/$i.ping.no" ]
then
echo $i:back
CALLHOME $i Up
printf "Host:\tUp\t$i\t$(date)\n">>$TRACK
printf "Host:\tUp\t$i\t$(date)\n">>$LOG
elif [ -e "/tmp/$i.ping.drop" ]
then
echo $i:back
printf "Host:\tUp\t$i\t$(date)\n">>$LOG
printf "Host:\tUp\t$i\t$(date)\n">>$TRACK
elif [ -e "/tmp/$i.ping.yes" ]
then
printf "Host:\t\t$i\t$(date)\n">>$LOG
fi
echo $i:pingcheck
rm /tmp/$i.ping*
touch /tmp/$i.ping.yes
fi
done
}
 
CALLHOME ()
{
#Build the Report
 
printf "$1 $2\n">> $RPT
 
}
 
MAILER ()
{
# Check to see if we can get to the gateway
if ping -c 6 -w 10 $MAILGATE>/dev/null
then
#Yup - proceed
for m in $ML
do
cat $RPT | mail -s "Condor Report" $m
done
else
# Uh-oh. We have a problem!
# In a perfect world we would dial out with a modem
printf "ALERT ALERT ALERT ALERT ALERT $(date) CANNOT SEND MAIL\n">>$LOG
printf "ALERT ALERT ALERT ALERT ALERT $(date) CANNOT SEND MAIL\n">>$TRACK
# Write a temp file so we can tell next cycle that we had a problem
touch $TOAST
 
fi
 
}
 
MCP ()
{
 
# Master Control Program
 
# Initial housekeeping
mv $TEMP /tmp/condor2.tmp
touch $TEMP>/dev/null
# We need to create the file, so the delete does not throw an error
touch $RPT
rm $RPT>/dev/null
 
# Write cool line to log
echo " =========================================================================">> $LOG
 
# Run Ping Checks on all target hosts
PING
 
# Did we have a major problem last cycle?
if [ -e "$TOAST" ]
then
printf "ALERT ALERT ALERT ALERT ALERT $(date) Main Site BACK!!\n">>$RPT
rm /tmp/spokane.toast
fi
 
# If we have something to report, then send it off
if [ -e "$RPT" ]
then
MAILER
fi
}
 
###########
# Program #
###########
 
MCP>> $TEMP

Leave a Reply