monitoring-plugins/plugins/check_ide-smart.c
2003-03-11 22:22:12 +00:00

440 lines
10 KiB
C

/*
* check_ide-smart v.1 - hacked version of ide-smart for Nagios
* Copyright (C) 2000 Robert Dale <rdale@digital-mission.com>
*
* Net Saint - http://www.nagios.org
*
* Notes:
* ide-smart has the same functionality as before. Some return
* values were changed, otherwise the --net-saint option was added.
*
* Run with: check_ide-smart --net-saint [-d] <DRIVE>
* Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
*
* - Returns 0 on no errors
* - Returns 1 on advisories
* - Returns 2 on prefailure
* - Returns -1 not too often
*
* ide-smart 1.3 - IDE S.M.A.R.T. cheking tool
* Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
* 1998 Gadi Oxman <gadio@netvision.net.il>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <linux/hdreg.h>
#include <linux/types.h>
#include <errno.h>
#define NR_ATTRIBUTES 30
#ifndef TRUE
#define TRUE 1
#endif /* */
#define PREFAILURE 2
#define ADVISORY 1
#define OPERATIONAL 0
#define UNKNOWN -1
typedef struct threshold_s
{
__u8 id;
__u8 threshold;
__u8 reserved[10];
}
__attribute__ ((packed)) threshold_t;
typedef struct thresholds_s
{
__u16 revision;
threshold_t thresholds[NR_ATTRIBUTES];
__u8 reserved[18];
__u8 vendor[131];
__u8 checksum;
}
__attribute__ ((packed)) thresholds_t;
typedef struct value_s
{
__u8 id;
__u16 status;
__u8 value;
__u8 vendor[8];
}
__attribute__ ((packed)) value_t;
typedef struct values_s
{
__u16 revision;
value_t values[NR_ATTRIBUTES];
__u8 offline_status;
__u8 vendor1;
__u16 offline_timeout;
__u8 vendor2;
__u8 offline_capability;
__u16 smart_capability;
__u8 reserved[16];
__u8 vendor[125];
__u8 checksum;
}
__attribute__ ((packed)) values_t;
struct
{
__u8 value;
char *text;
}
offline_status_text[] =
{
{
0x00, "NeverStarted"}
, {
0x02, "Completed"}
, {
0x04, "Suspended"}
, {
0x05, "Aborted"}
, {
0x06, "Failed"}
, {
0, 0}
};
struct
{
__u8 value;
char *text;
}
smart_command[] =
{
{
SMART_ENABLE, "SMART_ENABLE"}
, {
SMART_DISABLE, "SMART_DISABLE"}
, {
SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"}
, {
SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
, };
/* Index to smart_command table, keep in order */
enum SmartCommand
{ SMART_CMD_ENABLE, SMART_CMD_DISABLE, SMART_CMD_IMMEDIATE_OFFLINE,
SMART_CMD_AUTO_OFFLINE
};
char *
get_offline_text (int status)
{
int i;
for (i = 0; offline_status_text[i].text; i++) {
if (offline_status_text[i].value == status) {
return offline_status_text[i].text;
}
}
return "unknown";
}
int
smart_read_values (int fd, values_t * values)
{
__u8 args[4 + 512] = {
WIN_SMART, 0, SMART_READ_VALUES, 1,};
if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
int e = errno;
printf ("Critical: SMART_READ_VALUES: %s\n", strerror (errno));
return e;
}
memcpy (values, args + 4, 512);
return 0;
}
int
values_not_passed (values_t * p, thresholds_t * t)
{
value_t * value = p->values;
threshold_t * threshold = t->thresholds;
int failed = 0;
int passed = 0;
int i;
for (i = 0; i < NR_ATTRIBUTES; i++) {
if (value->id && threshold->id && value->id == threshold->id) {
if (value->value <= threshold->threshold) {
++failed;
}
else {
++passed;
}
}
++value;
++threshold;
}
return (passed ? -failed : 2);
}
int
net_saint (values_t * p, thresholds_t * t)
{
value_t * value = p->values;
threshold_t * threshold = t->thresholds;
int status = OPERATIONAL;
int prefailure = 0;
int advisory = 0;
int failed = 0;
int passed = 0;
int total = 0;
int i;
for (i = 0; i < NR_ATTRIBUTES; i++) {
if (value->id && threshold->id && value->id == threshold->id) {
if (value->value <= threshold->threshold) {
++failed;
if (value->status & 1) {
status = PREFAILURE;
++prefailure;
}
else {
status = ADVISORY;
++advisory;
}
}
else {
++passed;
}
++total;
}
++value;
++threshold;
}
switch (status) {
case PREFAILURE:
printf ("Critical: %d Harddrive PreFailure%cDetected! "
"%d/%d tests failed.\n", prefailure, prefailure > 1 ? 's' : ' ',
failed, total);
break;
case ADVISORY:
printf ("Warning: %d Harddrive Advisor%s Detected. "
"%d/%d tests failed.\n", advisory, advisory > 1 ? "ies" : "y",
failed, total);
break;
case OPERATIONAL:
printf ("Status: Operational (%d/%d tests passed)\n", passed, total);
break;
default:
printf ("Error: Status '%d' uknown. %d/%d tests passed\n", status,
passed, total);
status = -1;
break;
}
return status;
}
void
print_value (value_t * p, threshold_t * t)
{
printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory ",
p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
p->value > t->threshold ? "Passed" : "Failed");
}
void
print_values (values_t * p, thresholds_t * t)
{
value_t * value = p->values;
threshold_t * threshold = t->thresholds;
int i;
for (i = 0; i < NR_ATTRIBUTES; i++) {
if (value->id && threshold->id && value->id == threshold->id) {
print_value (value++, threshold++);
}
}
printf
("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n",
p->offline_status, get_offline_text (p->offline_status & 0x7f),
(p->offline_status & 0x80 ? "Yes" : "No"), p->offline_timeout / 60);
printf ("OffLineCapability=%d {%s %s %s}\n", p->offline_capability,
p->offline_capability & 1 ? "Immediate" : "",
p->offline_capability & 2 ? "Auto" : "",
p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
printf ("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n",
p->revision, p->checksum, p->smart_capability,
p->smart_capability & 1 ? "SaveOnStandBy" : "",
p->smart_capability & 2 ? "AutoSave" : "");
}
void
print_thresholds (thresholds_t * p)
{
threshold_t * threshold = p->thresholds;
int i;
printf ("\n");
printf ("SmartRevision=%d\n", p->revision);
for (i = 0; i < NR_ATTRIBUTES; i++) {
if (threshold->id) {
printf ("Id=%3d, Threshold=%3d\n", threshold->id,
threshold->threshold); }
++threshold;
}
printf ("CheckSum=%d\n", p->checksum);
}
int
smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0,
char show_error)
{
__u8 args[4] = {
WIN_SMART, val0, smart_command[command].value, 0};
int e = 0;
if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
e = errno;
if (show_error) {
printf ("Critical: %s: %s\n", smart_command[command].text,
strerror (errno)); }
}
return e;
}
int
smart_read_thresholds (int fd, thresholds_t * thresholds)
{
__u8 args[4 + 512] = {
WIN_SMART, 0, SMART_READ_THRESHOLDS, 1,};
if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
int e = errno;
printf ("Critical: SMART_READ_THRESHOLDS: %s\n", strerror (errno));
return e;
}
memcpy (thresholds, args + 4, 512);
return 0;
}
void
show_version ()
{
printf ("check_ide-smart v.1 - FREE Software with NO WARRANTY\n");
printf ("Nagios feature - Robert Dale <rdale@digital-mission.com>\n");
printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
}
void
show_help ()
{
printf ("Usage: check_ide-smart [DEVICE] [OPTION]\n"
" -d, --device=DEVICE Select device DEVICE\n"
" -i, --immediate Perform immediately offline tests\n"
" -q, --quiet-check Returns the number of failed tests\n"
" -1, --auto-on Turn on automatic offline tests\n"
" -0, --auto-off Turn off automatic offline tests\n"
" -n, --net-saint Output suitable for Net Saint\n"
" -h, --help\n" " -V, --version\n");
}
int
main (int argc, char *argv[])
{
char *device = NULL;
int command = -1;
int o, longindex;
int retval = 0;
const struct option longopts[] = {
{"device", required_argument, 0, 'd'},
{"immediate", no_argument, 0, 'i'},
{"quiet-check", no_argument, 0, 'q'},
{"auto-on", no_argument, 0, '1'},
{"auto-off", no_argument, 0, '0'},
{"net-saint", no_argument, 0, 'n'},
{"help", no_argument, 0, 'h'},
{"version", no_argument, 0, 'V'}, {0, 0, 0, 0}
};
while (1) {
o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
if (o == -1 || o == EOF)
break;
switch (o) {
case 'd':
device = optarg;
break;
case 'q':
command = 3;
break;
case 'i':
command = 2;
break;
case '1':
command = 1;
break;
case '0':
command = 0;
break;
case 'n':
command = 4;
break;
case 'h':
show_help ();
return 0;
case 'V':
show_version ();
return 0;
default:
printf ("Try `%s --help' for more information.\n", argv[0]);
return 1;
}
if (optind < argc) {
device = argv[optind];
}
if (!device) {
show_help ();
show_version ();
return -1;
}
if (1) {
thresholds_t thresholds;
values_t values;
int fd = open (device, O_RDONLY);
if (fd < 0) {
printf ("Critical: Couldn't open device: %s\n", strerror (errno));
return 2;
}
if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
printf ("Critical: SMART_CMD_ENABLE\n");
return 2;
}
switch (command) {
case 0:
retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
break;
case 1:
retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
break;
case 2:
retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
break;
case 3:
smart_read_values (fd, &values);
smart_read_thresholds (fd, &thresholds);
retval = values_not_passed (&values, &thresholds);
break;
case 4:
smart_read_values (fd, &values);
smart_read_thresholds (fd, &thresholds);
retval = net_saint (&values, &thresholds);
break;
default:
smart_read_values (fd, &values);
smart_read_thresholds (fd, &thresholds);
print_values (&values, &thresholds);
break;
}
close (fd);
}
return retval;
}