#!/usr/bin/perl -w
#
# Wildcard plugin to monitor sensor by using ipmitool sensor program.
#
# Contributed by Jun Futagawa
# This script is based on sensors_ plugin.
#
# Usage:
#       ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_fan
#       ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_temp
#       ln -s /usr/share/munin/plugins/ipmitool_sensor_ /etc/munin/plugins/ipmitool_sensor_volt
#
# Requirements:
#       - OpenIPMI tool (ipmitool command)
#
# Note:
#       - Sensor names are read from the output of the ipmitool sensor program.
#
# Add the following to your /etc/munin/plugin-conf.d/munin-node:
#
#       [ipmitool_sensor*]
#       user root
#       timeout 20
#
# If you want to use "ipmitool sdr", add the following:
# Note: When you use this, the threshold provided by the sensor board is not used.
#
#       [ipmitool_sensor*]
#       user root
#       timeout 20
#       env.ipmitool_options sdr
#
# Parameters supported:
#
#       config
#       autoconf
#       suggest
#
# Configurable variables
#
#       ipmitool            - ipmitool command (default: ipmitool)
#       ipmitool_options    - ipmitool command options (default: sensor)
#                             sdr: you can use 'sdr' instead of sensor.
#       cache_file          - cache file
#                             (default: /var/lib/munin/plugin-state/plugin-ipmitool_sensor.cache)
#       cache_expires       - cache expires (default: 275)
#
#       fan_type_regex      - Regular expression for unit of fan (default: RPM)
#       temp_type_regex     - Regular expression for unit of temp (default: degrees C)
#       volt_type_regex     - Regular expression for unit of volt (default: (Volts|Watts|Amps))
#
#       fan_warn_percent    - Percentage over mininum for warning (default: 5)
#       fan_lower_critical  - Preferred lower critical value for fan
#       fan_upper_critical  - Preferred upper critical value for fan
#       temp_lower_critical - Preferred lower critical value for temp
#       temp_lower_warning  - Preferred lower warining value for temp
#       temp_upper_warning  - Preferred upper warning value for temp
#       temp_upper_critical - Preferred upper critical value for temp
#       volt_warn_percent   - Percentage over mininum/under maximum for warning
#                             Narrow the voltage bracket by this. (default: 20)
#
# $Log$
# Revision 1.6  2011/02/07 12:50:00  jfut
# Bug fix: Check temp_upper_warning and temp_upper_critical was not working again.
#
# Revision 1.5  2011/01/28 00:39:00  jfut
# Bug fix: Check temp_upper_warning and temp_upper_critical was not working.
#
# Revision 1.4  2009/02/08 23:51:00  jfut
# Support "ipmitool sdr".
# Add Watts and Amp as voltage unit.
# Add fan_type_regex/temp_type_regex/volt_type_regex as option of sensor type.
#
# Revision 1.3  2008/11/11 13:55:00  jfut
# Add infinity value check for HP ProLiant DL160.
# Add preferred value option for fan and temp.
#
# Revision 1.2  2008/10/28 19:21:22  jfut
# Add file check.
#
# Revision 1.1  2008/10/27 18:52:31  jfut
# Add cache mechanism.
#
# Revision 1.0  2008/10/27 14:25:12  jfut
# Initial release.
#
# Magic markers:
#%# family=manual
#%# capabilities=autoconf suggest

use strict;

$ENV{'LANG'} = "C"; # Force parseable output from sensors.
$ENV{'LC_ALL'} = "C"; # Force parseable output from sensors.
my $IPMITOOL = $ENV{'ipmitool'} || 'ipmitool';
my @IPMITOOL_OPTS = exists $ENV{'ipmitool_options'} ? split(/\s+/, $ENV{'ipmitool_options'}) : ('sensor');

my $CACHE_DIR = "/var/lib/munin/plugin-state";
my $CACHE_FILE = $ENV{'cache_file'} || "$CACHE_DIR/plugin-ipmitool_sensor.cache";
my $CACHE_EXPIRES = $ENV{'cache_expires'} || 275;

my %config = (
  fan => {
    regex => exists $ENV{'fan_type_regex'} ? qr/$ENV{'fan_type_regex'}/im : qr/RPM/im,
    title => 'IPMITool Sensor: Fans',
    vtitle => 'RPM',
    print_threshold => \&fan_threshold,
    graph_args => '--base 1000 -l 0'
  },
  temp => {
    regex => exists $ENV{'temp_type_regex'} ? qr/$ENV{'temp_type_regex'}/im : qr/degrees C/im,
    title => 'IPMITool Sensor: Temperatures',
    vtitle => 'Celsius',
    print_threshold => \&temp_threshold,
    graph_args => '--base 1000 -l 0'
  },
  volt => {
    regex => exists $ENV{'volt_type_regex'} ? qr/$ENV{'volt_type_regex'}/im : qr/(Volts|Watts|Amps)/im,
    title => 'IPMITool Sensor: Voltages',
    vtitle => '_AUTO_DETECT_FAILED_',
    print_threshold => \&volt_threshold,
    graph_args => '--base 1000'
  },
);

if (defined $ARGV[0] and $ARGV[0] eq 'autoconf') {
  close(STDERR);
  my $ret = system($IPMITOOL);
  open (STDERR, ">&STDOUT");
  if ($ret == 0 || $ret == 256) {
    print "yes\n";
    exit 0;
  } else {
    print "no (program $IPMITOOL not found)\n";
  }
  exit 1;
}

if (defined $ARGV[0] and $ARGV[0] eq 'suggest') {
  my $text = get_sensor_data();
  my $alltext = join('\n', @{$text});
  foreach my $func (keys %config) {
    print $func, "\n" if $alltext =~ $config{$func}->{regex};
  }
  exit;
}

$0 =~ /ipmitool_sensor_(.+)*$/;
my $func = $1;
exit 2 unless defined $func;

my $text = get_sensor_data();
my $sensor = 1;

if (defined $ARGV[0] and $ARGV[0] eq 'config') {
  # detect the unit of volt
  if ($func eq 'volt') {
    foreach my $line (@{$text}) {
      if ($line =~ /$config{$func}->{regex}/) {
        my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex});
        $config{$func}->{vtitle} = $unit;
        last;
      }
    }
    $text = get_sensor_data();
  }

  # print header
  print "graph_title $config{$func}->{title}\n";
  print "graph_vtitle $config{$func}->{vtitle}\n";
  print "graph_args $config{$func}->{graph_args}\n";
  print "graph_category sensors\n";

  # print data
  foreach my $line (@{$text}) {
    if ($line =~ /$config{$func}->{regex}/) {
      my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex});
      if (&is_valid_value($value)) {
        print "$func$sensor.label $label\n";
        $config{$func}->{print_threshold}->($func.$sensor, $lcr, $lnc, $unc, $ucr);
        print "$func$sensor.graph no\n" if exists $ENV{"ignore_$func$sensor"};
        $sensor++;
      }
    }
  }
  exit 0;
}

foreach my $line (@{$text}) {
  if ($line =~ /$config{$func}->{regex}/) {
    my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr) = &get_sensor_items($line, $config{$func}->{regex});
    # for debug
    # print "$func$sensor.value [$label] [$value] [$lcr] [$lnc] [$unc] [$ucr]\n";
    if (&is_valid_value($value)) {
      print "$func$sensor.value $value\n";
      $sensor++;
    }
  }
}

sub get_sensor_data {
  my $text = undef;
  if (-f $CACHE_FILE) {
    my $cache_timestamp = (stat($CACHE_FILE))[9];
    if ($CACHE_EXPIRES == -1 || time - $cache_timestamp <= $CACHE_EXPIRES) {
      open(IN, "<", $CACHE_FILE) or die "Could not open \"$CACHE_FILE\" for reading\n";
      while (<IN>) {
        push (@{$text}, $_);
      }
      close(IN);
    }
  }
  if (! defined $text) {
    my $pid = open(EXE, '-|');
    if ($pid == 0) {
      exec($IPMITOOL, @IPMITOOL_OPTS);
    } elsif (defined $pid) {
      while(<EXE>) {
        push (@{$text}, $_);
      }
      close(EXE);
    } else {
      die "fork failed: $!";
    }
    if (-w $CACHE_DIR) {
      open(OUT, ">", $CACHE_FILE) or die "Could not open \"$CACHE_FILE\" for writing\n";
      foreach my $line (@{$text}) {
        print OUT "$line";
      }
      close OUT;
    }
  }
  return $text;
}

sub get_sensor_items {
  my ($line, $regex) = @_;
  my @items = split(/\s*\|\s*/, $line);
  my ($label, $value, $unit, $lcr, $lnc, $unc, $ucr)
    = (trim($items[0]), trim($items[1]), trim($items[2]), trim($items[5]), trim($items[6]), trim($items[7]), trim($items[8]));
  if ($#items == 9) {
     # ipmitool sensor
  } elsif ($#items == 2) {
     # ipmitool sdr
     if ($value =~ /$regex/) {
        $value = trim($`);
        $unit = trim($1);
     }
  }

  # some boards show data in incorrect order.
  # - HP ProLiant ML110 G5
  # CPU FAN     | 1434.309 | RPM | ok | 5537.099 | 4960.317 | 4859.086 | na | 937.383 | na
  # SYSTEM FAN  | 1506.932 | RPM | ok | 5952.381 | 5668.934 | 5411.255 | na | 937.383 | na
  # - HP ProLiant DL160
  # FAN1 ROTOR1 | 7680.492 | RPM | ok | na | inf | na | na | 1000.400 | na
  if (&is_valid_value($lcr) && &is_valid_value($ucr) && $lcr > $ucr || $lcr eq 'inf') {
    ($lcr, $lnc, $unc, $ucr) = ($ucr, $unc, $lnc, $lcr);
  }
  if (&is_valid_value($lnc) && &is_valid_value($unc) && $lnc > $unc || $lnc eq 'inf') {
    ($lcr, $lnc, $unc, $ucr) = ($ucr, $unc, $lnc, $lcr);
  }
  return ($label, $value, $unit, $lcr, $lnc, $unc, $ucr);
}

sub fan_threshold {
  my ($name, $lcr, $lnc, $unc, $ucr) = @_;
  my $warn_percent = exists $ENV{fan_warn_percent} ? $ENV{fan_warn_percent} : 5;

  # lcr: lower critical
  if (exists $ENV{fan_lower_critical}) {
    $lcr = $ENV{fan_lower_critical};
  } elsif (! &is_valid_value($lcr)) {
    if ($lcr eq 'inf') { $lcr = ''; }
    else { $lcr = '50'; }
  }
  # lnc: lower warning
  if (! &is_valid_value($lnc)) {
    if ($lnc eq 'inf') { $lnc = ''; }
    else { $lnc = ($lcr eq '') ? '' : $lcr * (100 + $warn_percent) / 100; }
  }
  # ucr: upper critical
  if (exists $ENV{fan_upper_critical}) {
    $ucr = $ENV{fan_upper_critical};
  } elsif (! &is_valid_value($ucr)) {
    if ($ucr eq 'inf') { $ucr = ''; }
    else { $ucr = '6000'; }
  }
  # unc: upper warning
  if (! &is_valid_value($unc)) {
    if ($unc eq 'inf') { $unc = ''; }
    else { $unc = ($ucr eq '') ? '' : $ucr * (100 - $warn_percent) / 100; }
  }

  return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne '');

  printf "$name.warning $lnc:$unc\n";
  printf "$name.critical $lcr:$ucr\n";
}

sub temp_threshold {
  my ($name, $lcr, $lnc, $unc, $ucr) = @_;

  # lcr: lower critical
  if (exists $ENV{temp_lower_critical}) {
    $lcr = $ENV{temp_lower_critical};
  } elsif (! &is_valid_value($lcr)) {
    if ($lcr eq 'inf') { $lcr = ''; }
    else { $lcr = 5; }
  }
  # lnc: lower warning
  if (exists $ENV{temp_lower_warning}) {
    $lnc = $ENV{temp_lower_warning};
  } elsif (! &is_valid_value($lnc)) {
    if ($lnc eq 'inf') { $lnc = ''; }
    else { $lnc = 10; }
  }
  # unc: upper warning
  if (exists $ENV{temp_upper_warning}) {
    $unc = $ENV{temp_upper_warning};
  } elsif (! &is_valid_value($unc)) {
    if ($unc eq 'inf') { $unc = ''; }
    else { $unc = '65'; }
  }
  # ucr: upper critical
  if (exists $ENV{temp_upper_critical}) {
    $ucr = $ENV{temp_upper_critical};
  } elsif (! &is_valid_value($ucr)) {
    if ($ucr eq 'inf') { $ucr = ''; }
    else { $ucr = '70'; }
  }

  return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne '');

  printf "$name.warning $lnc:$unc\n";
  printf "$name.critical $lcr:$ucr\n";
}

sub volt_threshold {
  my ($name, $lcr, $lnc, $unc, $ucr) = @_;
  my $warn_percent = exists $ENV{volt_warn_percent} ? $ENV{volt_warn_percent} : 20;

  if (! &is_valid_value($lcr)) { $lcr = ''; }
  if (! &is_valid_value($lnc)) { $lnc = ($lcr eq '') ? '' : $lcr * (100 + $warn_percent) / 100; }
  if (! &is_valid_value($ucr)) { $ucr = ''; }
  if (! &is_valid_value($unc)) { $unc = ($ucr eq '') ? '' : $ucr * (100 - $warn_percent) / 100; }

  return unless ($lcr ne '' || $lnc ne '' || $unc ne '' || $ucr ne '');

  printf "$name.warning $lnc:$unc\n";
  printf "$name.critical $lcr:$ucr\n";
}

sub trim {
  my $value = shift;
  if (defined $value) {
    $value =~ s/^\s*(.*?)\s*$/$1/;
  } else {
    $value = 'na'
  }
  return $value;
}

sub is_valid_value() {
  my $value = shift;
  if ($value eq 'na' || $value eq 'inf' || $value eq '') {
    return 0;
  } else {
    return 1;
  }
}

########################################

=head1 How to test

  cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt
  cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt config
  cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt suggest
  cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt autoconf
  fan_warn_percent=50 fan_lower_critical=100 fan_upper_critical=1000 cache_file=ipmitool_sensor_ \
    cache_expires=-1 ./ipmitool_sensor_fan config
  temp_lower_warning=1 temp_lower_critical=2 temp_upper_critical=71 temp_upper_warning=72 \
    cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_temp config
  volt_warn_percent=50 \
    cache_file=ipmitool_sensor_ cache_expires=-1 ./ipmitool_sensor_volt config

=head1 Test Data

  unr  Upper Non-Recoverable
  ucr  Upper Critical
  unc  Upper Non-Critical
  lnc  Lower Non-Critical
  lcr  Lower Critical
  lnr  Lower Non-Recoverable

=head2 ipmitool sensor

  # HP ProLiant ML110 G5
  CPU FAN          | 1434.309   | RPM        | ok    | 5537.099  | 4960.317  | 4859.086  | na        | 937.383   | na
  SYSTEM FAN       | 1497.454   | RPM        | ok    | 5952.381  | 5668.934  | 5411.255  | na        | 937.383   | na
  System 12V       | 12.152     | Volts      | ok    | na        | na        | na        | na        | na        | na
  System 5V        | 5.078      | Volts      | ok    | na        | na        | na        | na        | na        | na
  System 3.3V      | 3.271      | Volts      | ok    | na        | na        | na        | na        | na        | na
  CPU0 Vcore       | 1.127      | Volts      | ok    | na        | na        | na        | na        | na        | na
  System 1.25V     | 1.254      | Volts      | ok    | na        | na        | na        | na        | na        | na
  System 1.8V      | 1.842      | Volts      | ok    | na        | na        | na        | na        | na        | na
  System 1.2V      | 1.107      | Volts      | ok    | na        | na        | na        | na        | na        | na
  CPU0 Diode       | na         | degrees C  | na    | na        | 20.000    | 25.000    | 85.000    | 90.000    | 95.000
  CPU0 Dmn 0 Temp  | 24.500     | degrees C  | ok    | na        | 0.000     | 0.000     | 97.000    | 100.000   | 100.500
  CPU0 Dmn 1 Temp  | 29.000     | degrees C  | ok    | na        | 0.000     | 0.000     | 97.000    | 100.000   | 100.500
  # HP ProLiant DL160
  FAN1 ROTOR1      | 7680.492   | RPM        | ok    | na        | inf       | na        | na        | 1000.400  | na
  # HP ProLiant DL360 G5
  Fan Block 1      | 34.888     | unspecified | nc    | na        | na        | 75.264    | na        | na        | na
  Fan Block 2      | 29.792     | unspecified | nc    | na        | na        | 75.264    | na        | na        | na
  Fan Block 3      | 37.240     | unspecified | nc    | na        | na        | 75.264    | na        | na        | na
  Fan Blocks       | 0.000      | unspecified | nc    | na        | na        | 0.000     | na        | na        | na
  Temp 1           | 40.000     | degrees C  | ok    | na        | na        | -64.000   | na        | na        | na
  Temp 2           | 21.000     | degrees C  | ok    | na        | na        | -64.000   | na        | na        | na
  Temp 3           | 30.000     | degrees C  | ok    | na        | na        | -64.000   | na        | na        | na
  Temp 4           | 30.000     | degrees C  | ok    | na        | na        | -64.000   | na        | na        | na
  Temp 5           | 28.000     | degrees C  | ok    | na        | na        | -64.000   | na        | na        | na
  Temp 6           | na         | degrees C  | na    | na        | na        | 32.000    | na        | na        | na
  Temp 7           | na         | degrees C  | na    | na        | na        | 32.000    | na        | na        | na
  Power Meter      | 214.000    | Watts      | cr    | na        | na        | 384.000   | na        | na        | na
  Power Meter 2    | 220.000    | watts      | cr    | na        | na        | 384.000   | na        | na        | na

=head2 ipmitool sdr

  # HP ProLiant ML110 G5
  CPU FAN          | 1434.31 RPM       | ok
  SYSTEM FAN       | 1497.45 RPM       | ok
  System 12V       | 12.10 Volts       | ok
  System 5V        | 5.08 Volts        | ok
  System 3.3V      | 3.27 Volts        | ok
  CPU0 Vcore       | 1.14 Volts        | ok
  System 1.25V     | 1.25 Volts        | ok
  System 1.8V      | 1.84 Volts        | ok
  System 1.2V      | 1.11 Volts        | ok
  CPU0 Diode       | disabled          | ns
  CPU0 Dmn 0 Temp  | 23.50 degrees C   | ok
  CPU0 Dmn 1 Temp  | 29 degrees C      | ok
  # HP ProLiant DL360 G5
  Fan Block 1      | 34.89 unspecifi | nc
  Fan Block 2      | 29.79 unspecifi | nc
  Fan Block 3      | 37.24 unspecifi | nc
  Fan Blocks       | 0 unspecified     | nc
  Temp 1           | 41 degrees C      | ok
  Temp 2           | 19 degrees C      | ok
  Temp 3           | 30 degrees C      | ok
  Temp 4           | 30 degrees C      | ok
  Temp 5           | 26 degrees C      | ok
  Temp 6           | disabled          | ns
  Temp 7           | disabled          | ns
  Power Meter      | 208 Watts         | cr
  Power Meter 2    | 210 watts         | cr

=cut

# vim:syntax=perl
