淑昊柠 发表于 2019-1-16 08:24:01

nagios监控服务器的搭建(借鉴总结)

  下载软件wget
  http://prdownloads.sourceforge.net/sourceforge/nagios/nagios-3.2.1.tar.gz
  http://prdownloads.sourceforge.net/sourceforge/nagiosplug/nagios-plugins-1.4.14.tar.gz
  http://prdownloads.sourceforge.net/sourceforge/nagios/nrpe-2.12.tar.gz
  安装环境搭建
  yum -y install gcc glibc glibc-common gd gd-devel
  安装apache+php rpm或者tar包都可以
  创建用户
  groupadd nagios
  usermod -a -G nagcmd nagios
  usermod -a -G nagcmd apache (tar包apache用户是daemon)
  安装nagios主程序
  tar nagios
  cd nagios
  ./configure --with-command-group=nagcmd \
  > --with-nagios-user=nagios \
  > --with-nagios-group=nagios
  make all
  make install
  make install-init #生成init 启动脚本
  make install-commandmode #设置相应的目录权限
  make install-config #生成模板配置文件
  make install-webconf #生成apache 配置文件(如果是tar包需要手动创建# pwd
  /etc/httpd/conf.d 里面建nagios.conf,然后把里面产生的数据写到tar包apache的主配置文档httpd.conf最后边)
  htpasswd -c /usr/local/nagios/etc/htpasswd.users
  nagiosadmin #为apache 创建一个登陆用户,注意用户名是nagiosadmin
  注意:第一次添加用户用-c 选项,以后再添加千万别在用这个选项了,会覆盖以前的所有用户。
  添加开机启动
  chkconfig --add nagios
  chkconfig nagios on
  chkconfig httpd on
  安装nagios插件nagios-plugins
  # cd /usr/local/src
  # tar zxvf nagios-plugins-1.4.14.tar.gz
  # cd nagios-plugins-1.4.14
  # ./configure --prefix=/usr/local/nagios
  --with-nagios-user=nagios --with-nagios-group=nagios
  # make && make install
  启动nagios
  service httpd start
  service nagios start
  web查看
  ip/nagios/ 如果无显示后面加index.php
  添加需要的配置文件
  vi /usr/local/nagios/etc/nagios.cfg
  cfg_file=/usr/local/nagios/etc/objects/commands.cfg #命令配置文件
  cfg_file=/usr/local/nagios/etc/objects/contacts.cfg #联系人配置文件
  cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
  cfg_file=/usr/local/nagios/etc/objects/templates.cfg
  cfg_file=/usr/local/nagios/etc/objects/contactgroups.cfg #联系组定义文件
  cfg_file=/usr/local/nagios/etc/objects/hosts.cfg #增加主机配置文件
  cfg_file=/usr/local/nagios/etc/objects/hostgroups.cfg #增加主机组配置文件
  cfg_file=/usr/local/nagios/etc/objects/services.cfg #增加服务配置文件
  cfg_file=/usr/local/nagios/etc/objects/servicegroups.cfg #增加服务组配置文件
  # Definitions for monitoring the local (Linux) host
  #cfg_file=/usr/local/nagios/etc/objects/localhost.cfg #注释掉
  主机定义文件
  # vi /usr/local/nagios/etc/objects/hosts.cfg
  define host{
  host_name Nagios-Server #设置主机的名字,该名字会出现在
  hostgroups.cfg 和services.cfg 中。
  alias Nagios Server #别名
  address 192.168.140.128 #主机的IP 地址
  check_command check-host-alive #检查的命令
  check_interval 5 #检测的时间间隔
  retry_interval 1 #检测失败后重试的时间间隔
  max_check_attempts 5 #最大重试次数
  check_period 24x7 #检测的时段
  process_perf_data 0
  retain_nonstatus_information 0
  contact_groups sagroup #联系组
  notification_interval 30 #通知的时间间隔
  notification_period 24x7 #通知的时间段
  notification_options d,u,r #通知的选项
  #w—报警(warning),u—未知(unkown)
  #c—严重(critical),r—从异常情况恢复正常
  }
  define host{
  host_name Nagios-Client
  alias Nagios Client
  address 192.168.140.129
  check_command check-host-alive
  check_interval 5
  retry_interval 1
  max_check_attempts 5
  check_period 24x7
  process_perf_data 0
  retain_nonstatus_information 0
  contact_groups sagroup
  notification_interval 30
  notification_period 24x7
  notification_options d,u,r
  }
  主机组文件
  # vi /usr/local/nagios/etc/objects/hostgroups.cfg
  define hostgroup {
  hostgroup_name Nagios-Example #主机组名字
  alias Nagios Example #主机组别名
  members Nagios-Server,Nagios-Client #主机组成员,用逗号隔开
  }
  服务定义文件
  vi /usr/local/nagios/etc/objects/services.cfg
  define service {
  host_name Nagios-Server #主机名
  service_description check-host-alive #服务描述
  check_period 24x7 #检测的时间段
  max_check_attempts 4
  normal_check_interval 3
  retry_check_interval 2
  contact_groups sagroup
  notification_interval 10
  notification_period 24x7
  notification_options w,u,c,r
  check_command check-host-alive#调用的命令
  }
  define service {
  host_name Nagios-Client
  service_description check-host-alive
  check_period 24x7
  max_check_attempts 4
  normal_check_interval 3
  retry_check_interval 2
  contact_groups sagroup
  notification_interval 10
  notification_period 24x7
  notification_options w,u,c,r
  check_command check-host-alive
  }
  服务组定义文件
  vi /usr/local/nagios/etc/objects/servicegroup.cfg
  define servicegroup{
  servicegroup_name Host-Alive
  alias Host Alive
  members Nagios-Server,check-host-alive,Nagios-Client,check-host-alive
  }
  联系人定义文件
  vi /usr/local/nagios/etc/objects/contacts.cfg
  define contact{
  contact_name nagiosadmin
  use generic-contact
  alias System Administrator
  email nagios@localhost
  }
  定义联系组
  # vi /usr/local/nagios/etc/objects/contactgroups.cfg
  define contactgroup{
  contactgroup_name sagroup
  alias Nagios Administrators
  members nagiosadmin
  }
  检查错误
  /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
  监控远程服务器
  服务器端
  安装NRPE
  # cd /usr/local/src/
  # tar zxvf nrpe-2.12.tar.gz
  # cd nrpe-2.12
  # ./configure && make all
  # make install-plugin
  # make install-daemon
  # make install-daemon-config
  # make install-xinetd
  配置NRPE
  # vi /etc/xinetd.d/nrpe
  # default: on
  # description: NRPE (Nagios Remote Plugin Executor)
  service nrpe
  {
  flags = REUSE
  socket_type = stream
  port = 5666
  wait = no
  user = nagios
  group = nagios
  server = /usr/local/nagios/bin/nrpe
  server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
  log_on_failure += USERID
  disable = no
  only_from = 127.0.0.1 192.168.140.129 添加要监控主机的ip
  vim /etc/services
  nrpe 5666/tcp #nrpe
  /etc/init.d/xinetd restart 重启
  netstat -na | grep 5666 检测是否启动
  修改命令定义文件
  vim /usr/local/nagios/etc/objects/commands.cfg
  define command{
  command_name check_nrpe #用于远程监控的命令
  command_line /usr/local/nagios/libexec/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
  }
  受监控服务器配置
  useradd -s /sbin/nologin nagios
  安装nagios-plugin 与nrpe
  安装步骤同上
  配置nrpe
  vi /etc/xinetd.d/nrpe
  only_from = 127.0.0.1 192.168.140.128 #加上nagios服务器ip
  添加服务端口
  vim /etc/services
  nrpe 5666/tcp #nrpe
  重启
  /etc/init.d/xinetd restart
  查看
  netstat -na | grep 5666
  chown -R nagios:nagios /usr/local/nagios/
  修改受控端nrpe配置文档
  # vi /usr/local/nagios/etc/nrpe.cfg
  command=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
  command=/usr/local/nagios/libexec/check_disk -w 20% -c 10%
  -p /dev/mapper/VolGroup00-LogVol00 #我的Client 端根分区时LVM 逻辑卷 如果是普通的就是-p /dev/sda1
  command=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
  command=/usr/local/nagios/libexec/check_procs -w 150 -c 200
  command=/usr/local/nagios/libexec/check_swap -w 20% -c 10%
  4.4 修改Nagios 服务器服务定义文件
  # vi /usr/local/nagios/etc/objects/services.cfg
  添加服务
  define service {
  host_name Nagios-Client
  service_description check-users
  check_period 24x7
  max_check_attempts 4
  normal_check_interval 3
  retry_check_interval 2
  contact_groups sagroup
  notification_interval 10
  notification_period 24x7
  notification_options w,u,c,r
  check_command check_nrpe!check_users
  }
  define service {
  host_name Nagios-Client
  service_description check-load
  check_period 24x7
  max_check_attempts 4
  normal_check_interval 3
  retry_check_interval 2
  contact_groups sagroup
  notification_interval 10
  notification_period 24x7
  notification_options w,u,c,r
  check_command check_nrpe!check_load
  }
  define service {
  host_name Nagios-Client
  service_description check-/
  check_period 24x7
  max_check_attempts 4
  normal_check_interval 3
  retry_check_interval 2
  contact_groups sagroup
  notification_interval 10
  notification_period 24x7
  notification_options w,u,c,r
  check_command check_nrpe!check_/
  }

  然后/etc/init.d/nagios>  监控内存
  /usr/local/nagios/libexec 添加插件
  check_mem.sh 插件
  #script to check real memory usage
  # L.Gill 02/05/06 - V.1.0
  # ------------------------------------------
  # ########Script Modifications##########
  # ------------------------------------------
  # Who         When         What
  # ---    ----      ----
  # LGill         17/05/06"$percent" lt 1% fix - sed edits dc result beggining with "."
  #
  #
  #!/bin/bash
  USAGE="`basename $0` [-w|--warning] [-c|--critical]"
  THRESHOLD_USAGE="WARNING threshold must be greater than CRITICAL: `basename $0` $*"
  calc=/tmp/memcalc
  percent_free=/tmp/mempercent
  critical=""
  warning=""
  STATE_OK=0
  STATE_WARNING=1
  STATE_CRITICAL=2
  STATE_UNKNOWN=3
  # print usage
  if [[ $# -lt 4 ]]
  then
  echo ""
  echo "Wrong Syntax: `basename $0` $*"
  echo ""
  echo "Usage: $USAGE"
  echo ""
  exit 0
  fi
  # read input
  while [[ $# -gt 0 ]]
  do
  case "$1" in
  -w|--warning)
  shift
  warning=$1
  ;;
  -c|--critical)
  shift
  critical=$1
  ;;
  esac
  shift
  done
  # verify input
  if [[ $warning -eq $critical || $warning -lt $critical ]]
  then
  echo ""
  echo "$THRESHOLD_USAGE"
  echo ""
  echo "Usage: $USAGE"
  echo ""
  exit 0
  fi
  # Total memory available
  total=`free -m | head -2 |tail -1 |gawk '{print $2}'`
  # Total memory used
  used=`free -m | head -2 |tail -1 |gawk '{print $3}'`
  # Calc total minus used
  free=`free -m | head -2 |tail -1 |gawk '{print $2-$3}'`
  # normal values
  #echo "$total"MB total
  #echo "$used"MB used
  #echo "$free"MB free
  # make it into % percent free = ((free mem / total mem) * 100)
  echo "5" > $calc # decimal accuracy
  echo "k" >> $calc # commit
  echo "100" >> $calc # multiply
  echo "$free" >> $calc # division integer
  echo "$total" >> $calc # division integer
  echo "/" >> $calc # division sign
  echo "*" >> $calc # multiplication sign
  echo "p" >> $calc # print
  percent=`/usr/bin/dc $calc|/bin/sed 's/^\./0./'|/usr/bin/tr "." " "|/usr/bin/gawk {'print $1'}`
  #percent1=`/usr/bin/dc $calc`
  #echo "$percent1"
  if [[ "$percent" -le$critical ]]
  then
  echo "CRITICAL - $free MB ($percent%) Free Memory"
  exit 2
  fi
  if [[ "$percent" -le$warning ]]
  then
  echo "WARNING - $free MB ($percent%) Free Memory"
  exit 1
  fi
  if [[ "$percent" -gt$warning ]]
  then
  echo "OK - $free MB ($percent%) Free Memory"
  exit 0
  fi
  给予执行权限
  nrpe里添加命令
  command=/usr/local/nagios/libexec/check_mem.sh-w 10 -c 5
  做短信与邮件报警
  修改contacts.cfg添加联系人邮箱
  define contact{
  contact_name nagiosadmin
  use             generic-contact
  alias         System Administrator
  email         15910613577@139.com 575926838@qq.com
  }
  用第三方邮件sendEmail报警
  修改 command.cfg在原有报警机制上更改
  # 'notify-host-by-email' command definition
  define command{
  command_name    notify-host-by-email
  command_line    /usr/bin/printf "%b" "***** Nagios-BJ *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress:
  $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /usr/local/bin/sendEmail -f fanhb***@sina.com -t $CONTACTEMAIL$ -s smtp.sina.com -u "** $
  NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" -xu fanhb***@sina.com -xp ******* -l /var/log/sendEmail.log
  }
  # 'notify-service-by-email' command definition
  define command{
  command_name    notify-service-by-email
  command_line    /usr/bin/printf "%b" "***** Nagios-BJ *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTNAME$\nAdd
  ress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/local/bin/sendEmail -f fanhb517@sina
  .com -t $CONTACTEMAIL$ -s smtp.sina.com -u"** $NOTIFICATIONTYPE$ Service Alert: $HOSTNAME$/$SERVICEDESC$ is $SERVICESTATE$ **" -xu fanhb***@sina.com -xp ******* -l /var/log/sendEmail.log
  }
  安装sendEmail

[*]wget http://caspian.dotconf.net/menu/ ... dEmail-v1.55.tar.gz
[*]tar ?zxvf sendEmail-v1.55.tar.gz
[*]cd sendEmail-v1.55
[*]cp sendEmail /usr/local/bin
[*]chmod 0755 /usr/local/bin/sendEmail
  解释:
  -f 表示发送者的邮箱
  -t 表示接收者的邮箱
  -s 表示SMTP服务器的域名或者ip
  -u 表示邮件的主题
  -xu 表示SMTP验证的用户名
  -xp 表示SMTP验证的密码(注意,这个密码貌似有限制,例如我用d!5neyland就不能被正确识别)
  -m 表示邮件的内容
  剩下的就是139邮箱手机短信通知的设置了略
  先这么多,想起再写

页: [1]
查看完整版本: nagios监控服务器的搭建(借鉴总结)