blovekyo 发表于 2019-1-14 09:54:02

nagios CPU温度检测插件

  http://hi.baidu.com/icanwen/item/e6ed673c3f79e7302f20c403
  最近公司的机房空调经常的自动关闭!使机房温度过热引起设备故障!因而想到用nagios检测CPU温度!当温度达到一定上限时!自动发送SMS到手机。在dell R200测试通过。参照了网上 很多高手的资料在此谢过!
  nagios 主程序服务服务定制设置如下:
  define service{
  use                           generic-service         ; Name of service template to use
  host_name                     fileserver
  service_description             cpu temperature
  check_command                   check_nrpe!check_cpu_temp
  }
  被监控主机服务定制如下:
  vi /usr/local/nagios/etc/nrpe.cfg
  #解释:用sensors检测cpu的温度当cpu温度到达35度时报警,到达40度时处于紧急状态
  command=/usr/local/nagios/libexec/check_cputemp.sh -m sensors -w 35 -c 40
  不可以上传附件 插件代码直接贴到这里了:)
  #!/bin/sh
  ######################check_cputemp#######################
  #version: 1.0
  #Author : xiaoyong wen
  #date : May 2011
  #Licence GPLv2
  #INSTALLATION
  #the script need to install lm_sensors
  #sensors's output need like below format
  #########################################
  #coretemp-isa-0000   #
  #Adapter: ISA adapter   #
  #Core 0:      +27°C(high =   +85°C) #
  #
  #coretemp-isa-0001   #
  #Adapter: ISA adapter   #
  #Core 1:      +25°C(high =   +85°C) #
  #########################################
  #you can use NRPE to define service in nagios
  #check_nrpe!check_cputemp.sh
  ######my taobao shop http://ujjj.taobao.com######just a AD :)####
  # Plugin return statements
  STATE_OK=0
  STATE_WARNING=1
  STATE_CRITICAL=2
  STATE_UNKNOWN=3
  print_help_msg(){
  $Echo "Usage: $0 -h to get help."
  }
  print_full_help_msg(){
  $Echo "Usage:"
  $Echo "$0 [ -v ] -m sensors -w cpuT -c cpuT"
  $Echo "Sepicify the method to use the temperature data sensors."
  $Echo "And the corresponding Critical value must greater than Warning value."
  $Echo "Example:"
  $Echo "${0} -m sensors -w 50 -c 55"
  }
  print_err_msg(){
  $Echo "Error."
  print_full_help_msg
  }
  to_debug(){
  if [ "$Debug" = "true" ]; then
  $Echo "$*" >> /var/log/check_sys_temperature.log.$$ 2>&1
  fi
  }
  unset LANG
  Echo="echo -e"
  if [ $# -lt 1 ]; then
  print_help_msg
  exit 3
  else
  while getopts :vhm:w:c: OPTION
  do
  case $OPTION
  in
  v)
  #$Echo "Verbose mode."
  Debug=true
  ;;
  m)
  method=$OPTARG
  ;;
  w)
  WARNING=$OPTARG
  ;;
  c)
  CRITICAL=$OPTARG ;;
  h)
  print_full_help_msg
  exit 3
  ;;
  ?)
  $Echo "Error: Illegal Option."
  print_help_msg
  exit 3
  ;;
  esac
  done
  if [ "$method" = "sensors" ]; then
  use_sensors="true"
  to_debug use_sensors
  else
  $Echo "Error. Must to sepcify the method to use sensors."
  print_full_help_msg
  exit 3
  fi
  to_debug All Valuesare \" Warning: "$WARNING" and Critical: "$CRITICAL" \".
  fi
  #########lm_sensors##################
  if [ "$use_sensors" = "true" ]; then
  sensorsCheckOut=`which sensors 2>&1`
  if [ $? -ne 0 ];then
  echo $sensorsCheckOut
  echo Maybe you need to check your sensors.
  exit 3
  fi
  to_debug Use $sensorsCheckOut to check system temperature
  TEMP1=`sensors | head -3 | tail -1 | gawk '{print $3}' | grep -o `
  TEMP2=`sensors | head -7 | tail -1 | gawk '{print $3}' | grep -o `
  SUM=$(( $TEMP1 + $TEMP2 ))
  TEMP=$(($SUM/2))
  if [ -z "$TEMP" ] ; then
  $Echo "No Data been get here. Please confirm your ARGS and re-check it with Verbose mode, then to check the log."
  exit 3
  fi
  to_debug temperature data is $TEMP
  else
  $Echo "Error. Must to sepcify the method to use sensors"
  print_full_help_msg
  exit 3
  fi
  ######### Comparaison with the warnings and criticals thresholds given by user############
  CPU_TEMP=$TEMP
  #if [ "$WARNING" != "0" ] || [ "$CRITICAL" != "0" ]; then
  if [ "$CPU_TEMP" -gt "$CRITICAL" ]&& [ "$CRITICAL" != "0" ]; then
  STATE="$STATE_CRITICAL"
  STATE_MESSAGE="CRITICAL"
  to_debug $STATE , Message is $STATE_MESSAGE
  elif [ "$CPU_TEMP" -gt "$WARNING" ] && [ "$WARNING" != "0" ]; then
  STATE="$STATE_WARNING"
  STATE_MESSAGE="WARNING"
  to_debug $STATE , Message is $STATE_MESSAGE
  else
  STATE="$STATE_OK"
  STATE_MESSAGE="OK"
  to_debug $STATE , Message is $STATE_MESSAGE
  fi
  echo "The TEMPERATURE "$STATE_MESSAGE" "-" The CPU's Temperature is "$CPU_TEMP" degree"
  exit $STATE

页: [1]
查看完整版本: nagios CPU温度检测插件