#!/bin/sh

# Check for netapp snapvault problems
# High lag time, failed State, etc.
# done via ssh, assuming a key is in place, and same userid we're running as
# e.g.
# secureadmin ssh setup
# secureadmin enable ssh2
# useradmin role add monitor -a cli-snapvault,cli-snapmirror
# useradmin group add monitor -r monitor
# useradmin user add nagios -g monitor
# put the nagios key into /etc/sshd/nagios/.ssh/authorized_keys

# - John Sellens

PATH=/usr/local/nagios/libexec:/usr/local/bin:/bin:/usr/bin
export PATH

myname=`basename "$0"`
tmpout="/tmp/$myname.out.$$"
trap "rm -f $tmpout" EXIT

usage="$myname hostname"

if [ $# -ne 1 ]; then
    msg="$myname: missing hostname: Usage: $usage"
    # too much noise
    # echo 1>&2 "$msg"
    echo "$msg"
    exit 3
fi
host="$1"

# We could use the -l option for long output, but the parsing
# would be somewhat harder.  But we could get error messages.
# This should do for now.
# Ah - but when people put spaces in source volumes, that
# can mix things up quite a bit.
rm -f "$tmpout"
ssh "$host" snapvault status > "$tmpout"
if [ $? -ne 0 ]; then
    echo "$myname: 'ssh $host snapvault status' failed"
    exit 3
fi


# We expect awk to print one line.
# Weird field nonsense watching for spaces in source volumes.
# We assume that no local netapp volumes have funny characters.
sed < "$tmpout" \
    -e 's/Pending with restart checkpoint.*/Pending_with_restart_checkpoint/' \
    -e 's/  *MB  *done) *$/_MB_done)/' \
    -e 's/  *GB  *done) *$/_GB_done)/' \
    -e 's/Quiescing  *(/Quiescing_(/' \
    -e 's/Transferring  *(/Transferring_(/' \
| awk '
BEGIN { msg = ""; }
/Snapvault.* is ON/ { next; }
/^Snapvault/ {		# some other status like OFF
    msg = msg $0 "; ";
    critical = 1;
    next;
}
$1=="Source" && $2=="Destination" { next; }
$(NF-2) != "Snapvaulted" {
    msg = msg "Unexpected state " $(NF-2) " for " $1 " to " $(NF-3) "; ";
    unknown = 1;
}
$NF != "Idle" && $NF !~ /^Transferring.*/ && $NF !~ /^Quiescing.*/ {
    msg = msg "Unexpected status " $NF " for " $1 " to " $(NF-3) "; ";
    unknown = 1;
}
{
    laghours = $(NF-1);
    sub( /:.*$/, "", laghours );
    if ( laghours > 72 ) {
	msg = msg "Lag > 72 hours " $(NF-1) " for " $1 " to " $(NF-3) "; ";
	critical = 1;
    } else if ( laghours > 36 ) {
	msg = msg "Lag > 36 hours " $(NF-1) " for " $1 " to " $(NF-3) "; ";
	warning = 1;
    }
}
END {
    if ( msg == "" ) {
	msg = "Snapvault status OK";
    }
    if ( critical ) {
	msg = "CRITICAL: " msg;
	errcode = 2;
    } else if ( warning ) {
	msg = "WARNING: " msg;
	errcode = 1;
    } else if ( unknown ) {
	msg = "UNKNOWN: " msg;
	errcode = 3;
    } else {
	msg = "OK: " msg;
	errcode = 0;
    }
    print msg;
    exit errcode;
}
'

exit $?
