jxp2002 发表于 2019-1-14 09:41:29

centos6.4 x64 Nagios监控平台:监控linux主机的CPU温度

  http://ju.outofmemory.cn/entry/51075
  机房没有温度报警装置,我用此方法实现对机房温度的掌控,如果只有一台报警,则可认为单机故障,如果几台同时报警,则可认为机房空调出现了问题。
  具体实现方法如下:
  环境:被监控机:CentOS 6.4
  1、安装硬件传感器监控软件 sensors
  #yum install lm_sensors*
  2、运行sensors-detect进行传感器检测
  #sensors-detect ##一路回车即可,此步我在虚拟机下报错,但在物理机上没有问题
  3、运行sensors看是否能读取数据,如下像下面这样表示正常:
  # sensors
  coretemp-isa-0000
  Adapter: ISA adapter
  Core 0: +32.0°C (high = +76.0°C, crit = +100.0°C)
  Core 1: +32.0°C (high = +76.0°C, crit = +100.0°C)
  4、#vi /usr/local/nagios/libexec/check_cputemp ##粘贴如下#号之间的内容
  ##########################################################
  #!/bin/sh
  #########check_cputemp###########
  #date : May 2011
  #Licence GPLv2
  #INSTALLATION
  #the script need to install lm_sensors
  #sensors’s output need like below format
  #########################################
  #coretemp-isa-0000#
  #Adapter: ISA adapter#
  #Core 0: +27°C (high = +85°C)#
  #
  #coretemp-isa-0001#
  #Adapter: ISA adapter#
  #Core 1: +25°C (high = +85°C) #
  #########################################
  #you can use NRPE to define service in nagios
  #check_nrpe!check_cputemp.sh
  # Plugin return statements
  STATE_OK=0
  STATE_WARNING=1
  STATE_CRITICAL=2
  STATE_UNKNOWN=3
  print_help_msg(){
  $Echo “Usage: $0 -h to get help.”
  }
  print_full_help_msg(){
  $Echo “Usage:”
  $Echo “$0 [ -v ] -m sensors -w cpuT -c cpuT”
  $Echo “Sepicify the method to use the temperature data sensors.”
  $Echo “And the corresponding Critical value must greater than Warning value.”
  $Echo “Example:”
  $Echo “${0} -m sensors -w 40 -c 50″
  }
  print_err_msg(){
  $Echo “Error.”
  print_full_help_msg
  }
  to_debug(){
  if [ "$Debug" = "true" ]; then
  $Echo “$*” >> /var/log/check_sys_temperature.log.$$ 2>&1
  fi
  }
  unset LANG
  Echo=”echo -e”
  if [ $# -lt 1 ]; then
  print_help_msg
  exit 3
  else
  while getopts :vhm:w:c: OPTION
  do
  case $OPTION
  in
  v)
  #$Echo “Verbose mode.”
  Debug=true
  ;;
  m)
  method=$OPTARG
  ;;
  w)
  WARNING=$OPTARG
  ;;
  c)
  CRITICAL=$OPTARG ;;
  h)
  print_full_help_msg
  exit 3
  ;;
  ?)
  $Echo “Error: Illegal Option.”
  print_help_msg
  exit 3
  ;;
  esac
  done
  if [ "$method" = "sensors" ]; then
  use_sensors=”true”
  to_debug use_sensors
  else
  $Echo “Error. Must to sepcify the method to use sensors.”
  print_full_help_msg
  exit 3
  fi
  to_debug All Values are \” Warning: “$WARNING” and Critical: “$CRITICAL” \”.
  fi
  #########lm_sensors##################
  if [ "$use_sensors" = "true" ]; then
  sensorsCheckOut=`which sensors 2>&1`
  if [ $? -ne 0 ];then
  echo $sensorsCheckOut
  echo Maybe you need to check your sensors.
  exit 3
  fi
  to_debug Use $sensorsCheckOut to check system temperature
  TEMP1=`sensors | head -3 | tail -1 | gawk ‘{print $3}’ | grep -o `
  TEMP2=`sensors | head -4 | tail -1 | gawk ‘{print $3}’ | grep -o `
  SUM=$(( $TEMP1 + $TEMP2 ))
  TEMP=$(($SUM/2))
  if [ -z "$TEMP" ] ; then
  $Echo “No Data been get here. Please confirm your ARGS and re-check it with Verbose mode, then to check the log.”
  exit 3
  fi
  to_debug temperature data is $TEMP
  else
  $Echo “Error. Must to sepcify the method to use sensors”
  print_full_help_msg
  exit 3
  fi
  ######### Comparaison with the warnings and criticals thresholds given by user############
  CPU_TEMP=$TEMP
  #if [ "$WARNING" != "0" ] || [ "$CRITICAL" != "0" ]; then
  if [ "$CPU_TEMP" -gt "$CRITICAL" ] && [ "$CRITICAL" != "0" ]; then
  STATE=”$STATE_CRITICAL”
  STATE_MESSAGE=”CRITICAL”
  to_debug $STATE , Message is $STATE_MESSAGE
  elif [ "$CPU_TEMP" -gt "$WARNING" ] && [ "$WARNING" != "0" ]; then
  STATE=”$STATE_WARNING”
  STATE_MESSAGE=”WARNING”
  to_debug $STATE , Message is $STATE_MESSAGE
  else
  STATE=”$STATE_OK”
  STATE_MESSAGE=”OK”
  to_debug $STATE , Message is $STATE_MESSAGE
  fi
  echo “The TEMPERATURE “$STATE_MESSAGE” “-” The CPU’s Temperature is “$CPU_TEMP” ℃ !”
  exit $STATE
  #######################################################

页: [1]
查看完整版本: centos6.4 x64 Nagios监控平台:监控linux主机的CPU温度