|
nagios监控机IP:192.168.1.141,被监控机:192.168.1.168
监控CPU
前提是在监控机上安装好nagios和nrpe客户端以及被监控端安装nrpe-server
在被监控机,写个脚本放在/usr/local/nagios/libexec并赋予执行权限
[root@localhost libexec]# pwd
/usr/local/nagios/libexec
[root@localhost libexec]# cat check_cpu
#!/bin/bash
a=`uptime | awk 'NR==1{T=$8}END{print T*100}' | cut -d "," -f 1`
if [ $a -gt 300 ];then
echo taigao
exit 2
fi
echo $a
if [ $a -lt 300 ] && [ $a -gt 200 ];then
echo haixing
exit 1
else
echo meiwent
exit 0
fi
在nrpe.cfg文件添加
[root@localhost etc]# pwd
/usr/local/nagios/etc
[root@localhost etc]# vim nrpe.cfg
command[check_cpu]=/usr/local/nagios/libexec/check_cpu -w 150 -c 200
监控机
定义commands.cfg 文件
[root@localhost objects]# pwd
/usr/local/nagios/etc/objects
[root@localhost objects]# vim commands.cfg
define command{
command_name check_cpu
command_line $USER1$/check_cpu -H $HOSTADDRESS$ -c $ARG1$
}
定义localhost.cfg文件
define host {
host_name user1
alias test
address 192.168.1.168
check_command check-host-alive
notification_options d,u,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 10
notification_period 24x7
}
define service {
host_name user1
service_description check_cpu
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 2
notification_period 24x7
notification_options w,u,c,r
check_command check_nrpe!check_cpu
}
重启nagios服务即可
[root@localhost objects]# service nagios restart
Running configuration check...done.
Stopping nagios: .done.
Starting nagios: done.
打开nagios监控机

监控内存
首先看下内存信息吧
[root@localhost ~]# free -m
total used free shared buffers cached
Mem: 249 235 14 0 3 96
-/+ buffers/cache: 134 114
Swap: 2047 43 2003
一句话思路,取可用内存值(free)
free -m | awk 'NR==3{print $4}' 简单的取第三行第四列的值
#!/bin/bash
a=`free -m | awk 'NR==3{print $4}'`
if [ $a -ge 100 ]; then 大于等于100
echo $a
exit 0
elif [ $a -lt 100 ] && [ $a -gt 50 ];then 大于50小于100
echo $a
exit 1
else
echo $a
exit 2
fi
根据情况取值比如mem_free + cached,也可写成根据百分比判断
保存自定义插件到nagios下的插件目录 /usr/local/nagios/libexec/check_mem
配置/usr/local/nagios/etc/objects/localhost.cfg
define host {
host_name shenmeh
alias server1
address 192.168.1.129
check_command check-host-alive
notification_options d,r
check_interval 1
max_check_attempts 2
contact_groups admins
notification_interval 1
notification_period 24x7
}
define service {
host_name shenmeh
service_description server1_apache_mem
check_period 24x7
normal_check_interval 2
retry_check_interval 1
max_check_attempts 2
notification_period 24x7
notification_options w,u,c,r
check_command check_mem
}
配置/usr/local/nagios/etc/objects/commands.cfg
# 'check_mem' command definition
define command{
command_name check_mem
command_line $USER1$/check_mem
}
|
|
|