CentOS 6.5+Nagios4.0.2+Msmtp+Mutt邮箱报警

xuanhao 发表于 2019-1-14 09:49:13

　　环境：

　　Nagios ServerNagios 6.5
　　IP：192.168.0.58
　　Nagios Clint Nagios 6.5
　　IP：192.168.0.83
　　另注意：
　　Nagios需要PHP支持，安装Apache+PHP步骤略....
　　------------------------------------------------------
　　——安装Nagios
　　1）下载Nagios
# cd /Linux
#http://jaist.dl.sourceforge.net/project/nagios/nagios-4.x/nagios-4.0.2/nagios-4.0.2.tar.gz　　2）编译安装
# tar fzvx nagios-4.0.2.tar.gz
# cd nagios-4.0.2
# useradd -M -s /sbin/nologin nagios
# mkdir /usr/local/nagios
# chown -R nagios:nagios /usr/local/nagios/
# ./configure --prefix=/usr/local/nagios/
# make all
# make install
# make install-init
# make install-commandmode
# make install-config
# chkconfig --add nagios
# chkconfig --level 35 nagios on
# chkconfig --list nagios
nagios       0:关闭1:关闭2:关闭3:启用4:关闭5:启用6:关闭
# service nagios restart　　3）服务端安装Nagios插件plugins
# tar zfvx nagios-plugins-1.5.tar.gz
# cd nagios-plugins-1.5
# ./configure --prefix=/usr/local/nagios/
# make && make install　　----------------------------------------------------------------------------
　　4）服务端安装nrpe
# tar zfvx nrpe-2.15.tar.gz
# cd nrpe-2.15
# ./configure
# make all
# make install-plugin　　——Apache配置修改
　　1）使apache支持nagios
# vi /usr/local/apache-2.4/conf/httpd.conf
ScriptAlias /nagios/cgi-bin "/usr/local/nagios/sbin"

AuthType Basic
Options ExecCGI
AllowOverride None
Order allow,deny
Allow from all
AuthName "Nagios Access"
AuthUserFile /usr/local/nagios/etc/htpasswd
Require valid-user

Alias /nagios "/usr/local/nagios/share"

AuthType Basic
Options None
AllowOverride None
Order allow,deny
Allow from all
AuthName "nagios Access"
AuthUserFile /usr/local/nagios/etc/htpasswd
Require valid-user
　　2）添加nagios登陆用户
# /usr/local/apache-2.4/bin/htpasswd -c/usr/local/nagios/etc/htpasswd nagios
New password:
Re-type new password:
Adding password for user nagios　　3）修改apache的用户和用户组
# vi /usr/local/apache-2.4/conf/httpd.conf
User nagios
Group nagios　　--------------------------------------------------------------------------
　　4）解决Nagios乱码问题
# vi /usr/local/apache-2.4/conf/httpd.conf
LoadModule cgid_module modules/mod_cgid.so
LoadModule actions_module modules/mod_actions.so
# service httpd restart　　5）测试访问
http://s3.运维网.com/wyfs02/M00/11/A2/wKioL1LXl87wpR_dAAJfuBut6kA717.jpg
　　——客户端配置
　　1）客户端安装nagios-plugins

# cd /Linux
# useradd -M -s /sbin/nologin nagios
# tar zfvx nagios-plugins-1.5.tar.gz
# cd nagios-plugins-1.5
# ./configure --prefix=/usr/local/nagios/
# make && make install　　2）客户端安装nrpe

# cd ..
# tar zfvx nrpe-2.15.tar.gz
# cd nrpe-2.15
# ./configure
# make all
# make install-plugin
# make install-daemon
# make install-daemon-config
# vi /usr/local/nagios/etc/nrpe.cfg
allowed_hosts=127.0.0.1,192.168.0.58　　3）启动nrpe
# /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
# echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" /etc/rc.local　　4）检查是否启动
# netstat -antup |grep 5666
tcp    0    0 0.0.0.0:5666             0.0.0.0:*                LISTEN    48456/nrpe
tcp    0    0 :::5666                   :::*                      LISTEN    48456/nrpe　　5）在服务端检查 [如果返回NRPE版本及成功，相反则检查配置]
# /usr/local/nagios/libexec/check_nrpe -H 192.168.0.83
NRPE v2.15　　=========================================================================================
　　——配置nagios
　　# cd /usr/local/nagios/etc/
　　1）编辑nagios.cfg
# vi nagios.cfg
注释下面的内容
#cfg_file=/usr/local/nagios/etc/objects/commands.cfg
#cfg_file=/usr/local/nagios/etc/objects/contacts.cfg
#cfg_file=/usr/local/nagios/etc/objects/timeperiods.cfg
#cfg_file=/usr/local/nagios/etc/objects/templates.cfg
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
#cfg_file=/usr/local/nagios/etc/objects/windows.cfg
#cfg_file=/usr/local/nagios/etc/objects/switch.cfg
#添加监控主机目录
cfg_dir=/usr/local/nagios/etc/objects/hosts
#添加模板目录
cfg_dir=/usr/local/nagios/etc/objects/templates　　2）创建相关目录
# mdkirobjects/templates
# mdkirobjects/hosts　　3）复制所需文件
# cp objects/commands.cfg objects/templates/
# cp objects/timeperiods.cfg objects/templates/　　4）创建自定义模板
# vi objects/templates/templates.cfg
#联系人模板
define contact{
#联系人名称
name                         tao
#当服务出现异常时发送通知的时间段
service_notification_period 24x7
#当主机出现异常时发送通知的时间段
host_notification_period    24x7
#w 表示警告 u 表示不明 c 表示紧急 r 表示恢复
service_notification_options w,u,c,r
#d 表示宕机 u 表示不可达 r 表示重新恢复
host_notification_options    d,u,r
#服务故障时发送通知的方式[邮件]
service_notification_commands notify-service-by-email
#主机故障时发送通知的方式[邮件]
host_notification_commands    notify-host-by-email
register                      0
}
#主机模板
define host{
#定义主机名
name                         linux-host
#其值为1 Nagios将收集的数据写入某个文件中以备提取
process_perf_data             1
passive_checks_enabled       0
#主机与服务的刷新检测 0为关闭
check_freshness             0
#事件处理开启
event_handler_enabled       1
#打开抖动感知
flap_detection_enabled       1
active_checks_enabled       1
#发送通知的时间段
notification_period          24x7
#检查主机的时间段
check_period                24x7
#对主机的检查时间间隔/分钟
check_interval                5
#如果宕机重试检查时间/分钟
retry_interval                1
#宕机后对主机的最大检查次数
max_check_attempts                2
#检查主机状态指令[在commands.cfg中定义]
check_command                check-host-alive
#在主机出现异常后故障一直没有解决再次发送通知的时间/分钟
notification_interval       5
#d 表示宕机 u 表示不可达 r 表示重新恢复
notification_options          d,u,r
#1为开启报警信息
notifications_enabled       0
#指定联系人组[在contacts.cfg中定义]
contact_groups                admins
#处理性能数据
#一天刷新检测一次，以防止检测结果不是实时的
freshness_threshold          86400
#0为不注册，意思是这个作为模板
register                      0
}
#服务模板
define service{
#定义一个服务名称
name                         linux-service
#启用主动服务检查
active_checks_enabled       1
#启用被动服务检查
passive_checks_enabled       0
#主机与服务的刷新检测
check_freshness             0
#开启报警信息
notifications_enabled       1
#重新发送报警信息的间隔
notification_interval       5
#事件处理开启
event_handler_enabled       1
#启用抖动感知
flap_detection_enabled       1
#处理性能数据
process_perf_data             1
#发送通知的时间段
notification_period          24x7
#检查时间段
check_period                24x7
#如果宕机最大检查次数
max_check_attempts          5
#检查服务时间间隔/分钟
normal_check_interval       1
#如果宕机再次检查的间隔时间/分钟
retry_check_interval          1
#一天刷新检测一次，以防止检测结果不是实时的
freshness_threshold          86400
#0为不注册，意思是这个作为模板
register                      0
}
define service{
name                                  test
#1为开启报警信息
notifications_enabled                0
#重新发送报警信息的间隔
notification_interval                5
notification_options                w,u,c,
#事件处理开启
event_handler_enabled                1
#打开抖动感知
flap_detection_enabled                1
#发送通知的时间段
notification_period                   24x7
#检查时间段
check_period                         24x7
#每一次检测的间隔时间
check_interval                      1
#如果宕机再次检查的间隔时间
retry_interval                      1
#检测次数,如果2次都是宕机的话就报警
max_check_attempts                   2
active_checks_enabled                1
passive_checks_enabled                0
#处理性能数据
process_perf_data                   1
#主机与服务的刷新检测 0为关闭
check_freshness                      0
#一天刷新检测一次，以防止检测结果不是实时的
freshness_threshold                   86400
#0为不注册，意思是这个作为模板
register                            0
}
# vi objects/templates/contacts.cfg
#联系人
define contact{
#用户名称
contact_name                nagios
#引用模板
use                         tao
#别名
alias                         Nagios Admin
#发送邮箱
email                         xxxxxxxx@qq.com
}
#联系人组
define contactgroup{
#组名称
contactgroup_name    admins
#组别名
alias                Nagios Administrators
#组用户成员
members             nagios
}　　5）添加监控主机
　　最好使用被监控机IP为名称、比如:被监控服务器为192.168.0.83 [后期服务器多的时候、容易维护]
　　这里为了省事、一台服务器监控多种服务。大家测试的时候、可以添加多个服务器监控不同服务。
# vi objects/hosts/192.168.0.83.cfg
#监控主机
define host {
host_name                         192.168.0.83
address                         192.168.0.83
use                               linux-host
}
#监控SSH
define service{
service_description             check_ssh
check_command                   check_ssh!-p 22
host_name                         192.168.0.83
use                               linux-service
register                         1
}
#监控登陆用户
define service{
service_description             check_users
check_command                   check_nrpe!check_users
host_name                         192.168.0.83
use                               linux-service
register                         1
}
#监控mysql [需要在被监控配置、下面有配置说明]
define service{
service_description             check_mysql
check_command                   check_mysql
host_name                         192.168.0.83
use                               linux-service
register                         1
}
#监控交换空间
define service{
service_description             check_swap
check_command                   check_nrpe!check_swap
host_name                         192.168.0.83
use                               linux-service
register                         1
}
#监控磁盘
define service{
service_description             check_disk
check_command                   check_nrpe!check_disk
host_name                         192.168.0.83
use                               linux-service
register                         1
}
#监控HTTP
define service{
service_description             check_http
check_command                   check_http!-p 80
host_name                         192.168.0.83
use                               linux-service
register                         1
}
#监控负载
define service{
service_description             check_load
check_command                   check_nrpe!check_load
host_name                         192.168.0.83
use                               linux-service
register
}　　6）重启启动nagios、登陆WEB查看
　　网上很多人说配置后，执行命令检查是否有错误。个人感觉完全没必要、因为启动的时候Nagios默认会自动检查配置、如果有错误Nagios会提示。
# service nagios restart　　——测试
　　1）在客户端登陆nagios
http://s3.运维网.com/wyfs02/M00/11/A4/wKiom1LXniaxEQwCAARhIBZMkp8864.jpg
　　注意：监控MySQL需要在被监控端配置如下：
　　1）客户端连接mysql添加nagios用户
mysql> create database nagios;
Query OK, 1 row affected (0.00 sec)
mysql> grant select on nagios.* to nagios@'192.168.1.18' identified by 'nagios';
Query OK, 0 rows affected (0.11 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)
mysql> quit　　2）Nagios服务器端添加mysql监控
# vi /usr/local/nagios/etc/objects/templates/commands.cfg
# 'check_mysql' command definition
define command{
command_name check_mysql
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -u nagios -d nagios -p nagios
}　　——安装msmtp
　　1）下载安装msmtp
# wget http://jaist.dl.sourceforge.net/project/msmtp/msmtp/1.4.31/msmtp-1.4.31.tar.bz2
# tar fvx msmtp-1.4.31.tar.bz2
# cd msmtp-1.4.31
# ./configure --prefix=/usr/local/msmtp
# make && make install
# cd /usr/local/msmtp/　　2）配置msmtp
# mkdir etc
# cd etc/
# vi msmtprc
account default
logfile /usr/local/msmtp/msmtp.log
host smtp.163.com
from xxxxxx@163.com
auth login
user xxxxxx@163.com
#邮箱密码
password xxxxxx　　3）安装mutt
# yum install mutt -y　　4）配置mutt
# vi /etc/Muttrc
set sendmail="/usr/local/msmtp/bin/msmtp"
set use_from=yes
set realname="YingTao"
set from=hypocriticals@163.com
set envelope_from=yes
set editor="vim"
set record="/usr/local/msmtp/etc/sent"　　5）Nagios默认使用sendmail发送邮件、只需要把 /bin/mail 改为 /usr/bin/mutt即可。
# vi objects/templates/commands.cfg
# 'notify-host-by-email' command definition
define command{
command_name notify-host-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /usr/bin/mutt -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
# 'notify-service-by-email' command definition
define command{
command_name notify-service-by-email
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/bin/mutt -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONTACTEMAIL$
}　　——测试能否通过邮箱报警
　　1）在被监控端关闭mysql
# /etc/init.d/mysqld stop
Shutting down MySQL. SUCCESS!
# netstat -antup |grep 3306　　2）登陆WEB Nagios查看MySQL状态
http://s3.运维网.com/wyfs02/M01/11/A4/wKioL1LXqUHxkv1lAASFcXJRBDw945.jpg
　　3）在服务端查看日志 [默认日志路径为/usr/local/nagios/var/]
# cd /usr/local/nagios/var/
# tail -f nagios.log
#检查五次、如果五次服务还是无法检测到服务是正常的，则发邮件通知。
SERVICE ALERT: 192.168.0.83;check_mysql;CRITICAL;SOFT;1;Can't connect to MySQL server on '192.168.0.83' (111)
SERVICE ALERT: 192.168.0.83;check_mysql;CRITICAL;SOFT;2;Can't connect to MySQL server on '192.168.0.83' (111)
SERVICE ALERT: 192.168.0.83;check_mysql;CRITICAL;SOFT;3;Can't connect to MySQL server on '192.168.0.83' (111)
SERVICE ALERT: 192.168.0.83;check_mysql;CRITICAL;SOFT;4;Can't connect to MySQL server on '192.168.0.83' (111)
SERVICE ALERT: 192.168.0.83;check_mysql;CRITICAL;HARD;5;Can't connect to MySQL server on '192.168.0.83' (111)
SERVICE NOTIFICATION: nagios;192.168.0.83;check_mysql;CRITICAL;notify-service-by-email;Can't connect to MySQL server on '192.168.0.83' (111)　　4）登陆邮箱查看是否收到邮件 [由于是虚拟机时间没有调整、和现实时间有差别]
http://s3.运维网.com/wyfs02/M02/11/A5/wKiom1LXqxajK7J6AAIL-TGYd1o835.jpg
　　5）重新启动MySQL
# /etc/init.d/mysqld start
Starting MySQL.. SUCCESS!
# netstat -antup |grep 3306
tcp    0    0 0.0.0.0:3306       0.0.0.0:*          LISTEN    51040/mysqld　　6）检测服务恢复正常后，是否收到邮件通知
http://s3.运维网.com/wyfs02/M00/11/A5/wKiom1LXqpCCoyDjAAJW4PkeYC0802.jpg
　　

页: [1]

运维网's Archiver

CentOS 6.5+Nagios4.0.2+Msmtp+Mutt邮箱报警