kaiser_cn 发表于 2015-11-23 09:49:47

Nagios监控nginx服务详细过程

1在nginx 服务器上安装nrpe客户端:
Nginx的服务需要监控起来,不然万一down了而不及时修复,会影响web应用,如下web应用上面启动的nginx后台进程
# ps aux|grep nginx
nobody   152940.00.0224323464 ?      S    Jul03   0:05 nginx: worker process      
nobody   152950.00.0224323480 ?      S    Jul03   0:05 nginx: worker process      
......      
nobody   153160.00.0224323468 ?      S    Jul03   0:05 nginx: worker process      
nobody   153170.00.0224323480 ?      S    Jul03   0:05 nginx: worker process      
root   162600.00.0205841684 ?      Ss   Jun18   0:00 nginx: master process /usr/local/nginx/sbin/nginx
root   212110.00.0 103252   860 pts/1    S+   17:50   0:00 grep nginx


1.1,rpm方式安装nrpe客户端
下载地址:http://download.iyunv.com/detail/mchdba/7493875
# ll
总计 768
-rw-r--r-- 1 root root 713389 12-16 12:08 nagios-plugins-1.4.11-1.x86_64.rpm
-rw-r--r-- 1 root root32706 12-16 12:09 nrpe-2.12-1.x86_64.rpm
-rw-r--r-- 1 root root18997 12-16 12:08 nrpe-plugin-2.12-1.x86_64.rpm
# rpm -ivh *.rpm --nodeps--force

1.2 在配置文件最末尾,添加配置信息以及监控主机服务器ip地址
# vim /etc/nagios/nrpe.cfg
# add by tim on 2014-06-11
command=/usr/local/nagios/libexec/check_users -w 8 -c 15
command=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /dev/sda
command=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
#command=/usr/local/nagios/libexec/check_procs -w 50 -c 80
command=/usr/local/nagios/libexec/check_procs -w 750 -c 800
command=/usr/local/nagios/libexec/check_ping -H 10.xx.3.29 -w 3000.0,80% -c 5000.0,100% -p 5
allowed_hosts = 127.0.0.1,10.xx.3.41
check下命令是否生效:
# /usr/local/nagios/libexec/check_users -w 8 -c 15
USERS OK - 2 users currently logged in |users=2;8;15;0
#
看到已经USERS OK -….命令已经生效。

1.3 启动nrpe报错如下:
# service nrpe restart
Shutting down nrpe:                                        [失败]
Starting nrpe: /usr/sbin/nrpe: error while loading shared libraries: libssl.so.6: cannot open shared object file: No such file or directory
                                                         [失败]
#
# service nrpe start
Starting nrpe: /usr/sbin/nrpe: error while loading shared libraries: libssl.so.6: cannot open shared object file: No such file or directory
                                                         [失败]
#
建立连接
# ln -s /usr/lib64/libssl.so /usr/lib64/libssl.so.6
(如果没有libssl.so,就采用别的libssl.so.10来做软连接,ln -s /usr/lib64/libssl.so.10 /usr/lib64/libssl.so.6)
#
再重新启动如下:
# service nrpe start
Starting nrpe: /usr/sbin/nrpe: error while loading shared libraries: libcrypto.so.6: cannot open shared object file: No such file or directory
                                                         [失败]
# ll /usr/lib64/libcrypto.so
lrwxrwxrwx. 1 root root 18 10月 13 2013 /usr/lib64/libcrypto.so -> libcrypto.so.1.0.0
#
再建链接:
# ln -s /usr/lib64/libcrypto.so /usr/lib64/libcrypto.so.6
(或者如果没有libcrypto.so,就采用libcrypto.so.10做软连接, ln -s /usr/lib64/libcrypto.so.10 /usr/lib64/libcrypto.so.6)
# service nrpe start
Starting nrpe:                                             [确定]
#

1.4 检测下nrpe是否正常运行:
去nagios服务器端check下
#/usr/local/nagios/libexec/check_nrpe -H 10.xx.3.xx
NRPE v2.12
#
看到返回NRPE v2.15表示已经连接成功,客户端的nrpe服务已经监控完成。

2,比较简单的通过check_http的方式监控
可以在/etc/nagios/nrpe.cfg里面采用check_http的方式来获取nginx是否运行:
(1)   编辑nrpe.cfg
Vim /etc/nagios/nrpe.cfg
command=/usr/lib/nagios/plugins/check_http -I localhost -p 80 -u /nginx_status -e 200 -w 3 -c 10
(2)   重启nrpe服务
# service nrpe restart
Shutting down nrpe:                                        [确定]
Starting nrpe:                                             [确定]
#
(3)   在nagios服务器端check,成功。
# /usr/local/nagios/libexec/check_nrpe -H10.xx.1.22 -c check_nginx_status
HTTP OK HTTP/1.1 200 OK - 254 bytes in 0.002 seconds |time=0.002031s;3.000000;10.000000;0.000000 size=254B;;;0
(4)   在services.cfg里面添加check_nginx_status服务
define service{
      host_name               lb-net-2
      service_description   check_nginx_status
      check_command         check_nrpe!check_nginx_status
      max_check_attempts      5
      normal_check_interval   3
      retry_check_interval    2
      check_period            24x7
      notification_interval   10
      notification_period   24x7
      notification_options    w,u,c,r
      contact_groups          opsweb
      }
(5)   在command.cfg添加check_nginx_status服务
define command{
      command_name    check_nginx_status
      command_line    $USER1$/check_nginx_status -I $HOSTADDRESS$ -w $Warning$ -c $Cri$
      }
(6)   重新加载nagios
# service nagios reload
Running configuration check...
Reloading nagios configuration...
done
#
(7)   查看界面的nginx监控服务,如下所示:




3 编写脚本来监控nginx服务
3.1 调试详细经过
# find / -name nginx.pid
/usr/local/nginx/logs/nginx.pid
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -n /usr/local/nginx/logs/nginx.pid -s nginx_status -o /tmp/ -w 1500 -c 2000
expr: 参数数目错误
expr: 语法错误
(standard_in) 1: syntax error
/usr/lib/nagios/plugins/check_nginxstatus: line 258: [: : integer expression expected
/usr/lib/nagios/plugins/check_nginxstatus: line 262: [: : integer expression expected
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]

去查看262行,将逻辑运算符"-a" 改成 "&&"
# vim /usr/lib/nagios/plugins/check_nginxstatus
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -n /usr/local/nginx/logs/nginx.pid -s nginx_status -o /tmp/ -w 1500 -c 2000
expr: 参数数目错误
expr: 语法错误
(standard_in) 1: syntax error
/usr/lib/nagios/plugins/check_nginxstatus: line 258: [: missing `]'
/usr/lib/nagios/plugins/check_nginxstatus: line 262: [: : integer expression expected
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]
#
看到已经OK了,再修改文件。

# vim /usr/lib/nagios/plugins/check_nginxstatus
#
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -n /usr/local/nginx/logs/nginx.pid -s nginx_status -o /tmp/ -w 1500 -c 2000
expr: 参数数目错误
expr: 语法错误
(standard_in) 1: syntax error
/usr/lib/nagios/plugins/check_nginxstatus: line 258: [: missing `]'
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]
#
将[]改成使用"[[]]", 即可!

# vim /usr/lib/nagios/plugins/check_nginxstatus
#
#
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -n /usr/local/nginx/logs/nginx.pid -s nginx_status -o /tmp/ -w 1500 -c 2000
expr: 参数数目错误
expr: 语法错误
(standard_in) 1: syntax error
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]
#

注释掉#reqpcon=`echo "scale=2; $reqpsec / $conpsec" | bc -l`之后,就不会报(standard_in) 1: syntax error错误,如下所示:
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -s nginx_status -n nginx.pid -w 15000 -c 20000
expr: 参数数目错误
expr: 语法错误
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]
#

注释掉# reqpsec=`expr $tmp2_reqpsec - $tmp1_reqpsec` 就不会再报 expr: 参数数目错误,如下所示:
报错:
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -s nginx_status -n nginx.pid -w 15000 -c 20000
expr: 语法错误
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]

再次注释掉 #reqpcon=`echo "scale=2; $reqpsec / $conpsec" | bc -l` 后,运行不会报expr: 语法错误,如下所示:
# /usr/lib/nagios/plugins/check_nginxstatus -H localhost -P 80 -p /usr/local/nginx/logs/ -s nginx_status -n nginx.pid -w 15000 -c 20000
OK - nginx is running.requests per second,connections per second ( requests per connection) | 'reqpsec'= 'conpsec'= 'conpreq'= ]
#

看到这里发现'reqpsec'= 'conpsec'= 'conpreq'=都没有值,但是nginx又是在启动运行着,问题出在哪里?经过排查,原来是nginx_status服务没有启动,需要在/usr/local/nginx/conf/nginx.conf配置文件里面添加如下配置:
# 添加pid参数
pid      logs/nginx.pid;
#charset koi8-r;
      access_loglogs/host.access.logmain;
      location /nginx_status {
                stub_status on;
                access_log   off;
                                     deny all;
         }

然后重新加载nginx,看到新的nginx-status文件是生成了,但是文件内容为空,如下所示:
# ll /tmp/nginx*
-rw-r--r--. 1 root root 0 7月   3 15:06 /tmp/nginx-status.1
#

去查看ngins后台日志
# cd /usr/local/nginx/
# tail -n 300 error.log
……
2014/07/03 15:05:47 4285#0: *1851293 access forbidden by rule, client: 127.0.0.1, server: localhost, request: "GET /nginx_status HTTP/1.0", host: "localhost"
2014/07/03 15:05:48 4285#0: *1851294 access forbidden by rule, client: 127.0.0.1, server: localhost, request: "GET /nginx_status HTTP/1.0", host: "localhost"
2014/07/03 15:06:12 4282#0: *1851362 access forbidden by rule, client: 127.0.0.1, server: localhost, request: "GET /nginx_status HTTP/1.0", host: "localhost"
2014/07/03 15:06:13 4282#0: *1851363 access forbidden by rule, client: 127.0.0.1, server: localhost, request: "GET /nginx_status HTTP/1.0", host: "localhost"
2014/07/03 15:06:55 4285#0: *1851509 access forbidden by rule, client: 127.0.0.1, server: localhost, request: "GET /nginx_status HTTP/1.0", host: "localhost"
2014/07/03 15:06:56 4285#0: *1851519 access forbidden by rule, client: 127.0.0.1, server: localhost, request: "GET /nginx_status HTTP/1.0", host: "localhost"

查看nginx编译参数
# /usr/local/nginx/sbin/nginx -V
nginx version: nginx/1.4.2
built by gcc 4.4.7 20120313 (Red Hat 4.4.7-3) (GCC)
configure arguments: --prefix=/usr/local/nginx --with-http_stub_status_module --with-http_realip_module

证明确实是加载了stub_status插件,之后去修改配置文件,注释掉deny all;重新加载nginx。
# vim /usr/local/nginx/conf/nginx.conf
#deny all;
# service nginx reload
reload nginx
#
# ll /tmp/nginx*
ls: 无法访问/tmp/nginx*: 没有那个文件或目录
#
还是没有看到/tmp/nginx-status.1状态文件生成,因为nagios下监控nginx的脚本是从nginx-status.1获取数据,如果没有这个文件,没有办法获取数据。

继续google,”nginx stub_status没有生成nginx-status.1”文件,看到有人说只要配置好了这个状态文件有没有无所谓,我就试着直接运行脚本看看能否生效。
# ll /tmp/nginx*
ls: 无法访问/tmp/nginx*: 没有那个文件或目录
# /root/check_nginx2.sh-H localhost -P 80 -p /usr/local/nginx/logs/ -n nginx.pid -s nginx_status -w 15000 -c 20000
OK - nginx is running. 1 requests per second, 2 connections per second (.50 requests per connection) | 'reqpsec'=1 'conpsec'=2 'conpreq'=.50 ]
#
看到'reqpsec'=1 'conpsec'=2 'conpreq'=.50里面有数据了,再去check下文件有没有生成,如下所示:
# ll /tmp/nginx*
ls: 无法访问/tmp/nginx*: 没有那个文件或目录
#
还是没有文件生成,但是check已经有数据了,证明不一定要拘泥于是否在/tmp/目录下是否有nginx-status.1文件。通过脚本分析如下:
# vim /usr/lib/nagios/plugins/check_nginxstatus
180 get_status() {
181   if [ "$secure" = 1 ]
182   then
183         wget_opts="-O- -q -t 3 -T 3 --no-check-certificate"
184         out1=`wget ${wget_opts} http://${hostname}:${port}/${status_page}`
185         sleep 1
186         out2=`wget ${wget_opts} http://${hostname}:${port}/${status_page}`
187   else
188         wget_opts="-O- -q -t 3 -T 3"
189         out1=`wget ${wget_opts} http://${hostname}:${port}/${status_page}`
190         sleep 1
191         out2=`wget ${wget_opts} http://${hostname}:${port}/${status_page}`
192   fi
193
194   if [ -z "$out1" -o -z "$out2" ]
195   then
196         echo "UNKNOWN - Local copy/copies of $status_page is empty."
197         exit $ST_UK
198   fi
199 }

是通过访问`wget -O- -q -t 3 -T 3 --no-check-certificate http://10.xx.xx.xx:80/nginx_status`这个链接来获取status的数据记录的,而不是去加载/tmp/nginx-status.1文件来获取数据的。直接访问 http://10.xx.xx.xx:80/nginx_status 地址就能获取nginx运行数据,如下图所示:


在nagios服务器上check下,报错:
# /usr/local/nagios/libexec/check_nrpe -H10.xx.xx.xx -c check_nginx_status
UNKNOWN - Local copy/copies of nginx_status is empty.
#
检查监控脚本,搜索 ‘Local copy/copies of nginx_status is empty.’在第197行,有如下代码:
195   if [ -z "$out1" -o -z "$out2" ]
196   then
197         echo "UNKNOWN - Local copy/copies of $status_page is empty."
198         exit $ST_UK
199   fi
看出是由于if [ -z "$out1" -o -z "$out2" ]这个判断生效,导致监控脚本运行到这里就exit了。继续调试,发现用nagios服务器调用脚本的时候,执行到以下第190行到第192行
out1=`/usr/bin/wget ${wget_opts} http://${hostname}:${port}/${status_page}`
      sleep 1
      out2=`/usr/bin/wget ${wget_opts} http://${hostname}:${port}/${status_page}`
的时候,out1为空,out2也为空,所以在后面的if [ -z "$out1" -o -z "$out2" ]判断通过报出信息为:UNKNOWN - Local copy/copies of $status_page is empty. 然后直接exit。

说明:由于nginx是要调用wget命令来获取nginx_status状态的,而wget命令是只能以root用户来运行的, 所以需要将nagios用户设置成可以无需密码直接su成root,这样就能以nagios用户运行命令sudo /usr/lib/nagios/plugins/check_nginxstatus 。在centos系统中,无法直接调用sudo命令,需要修改/etc/sudoers, 找到 #Defaults requiretty 并取消注释,另外新增一行。表示nagios用户不需要登陆终端就可以调用命令,如下所示:
Defaults    requiretty
Defaults:nagios    !requiretty
#添加nagios 请求sudo,允许特定指令时(可跟参数),不需要密码(如)。
nagios ALL=(ALL) NOPASSWD: ALL

修改完后,再check,数据出来了:
# /usr/local/nagios/libexec/check_nrpe -H10.xx.xx.xx -c check_nginx_status
OK - nginx is running. 1 requests per second, 1 connections per second (1.00 requests per connection) | 'reqpsec'=1 'conpsec'=1 'conpreq'=1.00 ]
#

3.2share下check_nginxstatus脚本



[*]#!/bin/sh

[*]
[*]PROGNAME=`basename $0`
[*]VERSION=\\\"Version 1.1,\\\"
[*]AUTHOR=\\\"tim man\\\"
[*]
[*]ST_OK=0
[*]ST_WR=1
[*]ST_CR=2
[*]ST_UK=3
[*]hostname=\\\"localhost\\\"
[*]port=80
[*]path_pid=/var/run
[*]name_pid=\\\"nginx.pid\\\"
[*]status_page=\\\"nginx_status\\\"
[*]pid_check=1
[*]secure=0
[*]
[*]print_version() {
[*]    echo \\\"$VERSION $AUTHOR\\\"
[*]}
[*]
[*]print_help() {
[*]    print_version $PROGNAME $VERSION
[*]    echo \\\"\\\"
[*]    echo \\\"$PROGNAME is a Nagios plugin to check whether nginx is running.\\\"
[*]    echo \\\"It also parses the nginx\\\'s status page to get requests and\\\"
[*]    echo \\\"connections per second as well as requests per connection. You\\\"
[*]    echo \\\"may have to alter your nginx configuration so that the plugin\\\"
[*]    echo \\\"can access the server\\\'s status page.\\\"
[*]    echo \\\"The plugin is highly configurable for this reason. See below for\\\"
[*]    echo \\\"available options.\\\"
[*]    echo \\\"\\\"
[*]    echo \\\"$PROGNAME -H localhost -P 80 -p /var/run -n nginx.pid \\\"
[*]    echo \\\" -s nginx_statut -o /tmp [-w INT] [-c INT] [-S] [-N]\\\"
[*]    echo \\\"\\\"
[*]    echo \\\"Options:\\\"
[*]    echo \\\" -H/--hostname)\\\"
[*]    echo \\\" Defines the hostname. Default is: localhost\\\"
[*]    echo \\\" -P/--port)\\\"
[*]    echo \\\" Defines the port. Default is: 80\\\"
[*]    echo \\\" -p/--path-pid)\\\"
[*]    echo \\\" Path where nginx\\\'s pid file is being stored. You might need\\\"
[*]    echo \\\" to alter this path according to your distribution. Default\\\"
[*]    echo \\\" is: /var/run\\\"
[*]    echo \\\" -n/--name_pid)\\\"
[*]    echo \\\" Name of the pid file. Default is: nginx.pid\\\"
[*]    echo \\\" -N/--no-pid-check)\\\"
[*]    echo \\\" Turn this on, if you don\\\'t want to check for a pid file\\\"
[*]    echo \\\" whether nginx is running, e.g. when you\\\'re checking a\\\"
[*]    echo \\\" remote server. Default is: off\\\"
[*]    echo \\\" -s/--status-page)\\\"
[*]    echo \\\" Name of the server\\\'s status page defined in the location\\\"
[*]    echo \\\" directive of your nginx configuration. Default is:\\\"
[*]    echo \\\" nginx_status\\\"
[*]    echo \\\" -S/--secure)\\\"
[*]    echo \\\" In case your server is only reachable via SSL, use this\\\"
[*]    echo \\\" this switch to use HTTPS instead of HTTP. Default is: off\\\"
[*]    echo \\\" -w/--warning)\\\"
[*]    echo \\\" Sets a warning level for requests per second. Default is: off\\\"
[*]    echo \\\" -c/--critical)\\\"
[*]    echo \\\" Sets a critical level for requests per second. Default is:\\\"
[*]    echo \\\" off\\\"
[*]    exit $ST_UK
[*]}
[*]
[*]while test -n \\\"$1\\\"; do
[*]    case \\\"$1\\\" in
[*]      -help|-h)
[*]            print_help
[*]            exit $ST_UK
[*]            ;;
[*]      --version|-v)
[*]            print_version $PROGNAME $VERSION
[*]            exit $ST_UK
[*]            ;;
[*]      --hostname|-H)
[*]            hostname=$2
[*]            shift
[*]            ;;
[*]      --port|-P)
[*]            port=$2
[*]            shift
[*]            ;;
[*]      --path-pid|-p)
[*]            path_pid=$2
[*]            shift
[*]            ;;
[*]      --name-pid|-n)
[*]            name_pid=$2
[*]            shift
[*]            ;;
[*]      --no-pid-check|-N)
[*]            pid_check=0
[*]            ;;
[*]      --status-page|-s)
[*]            status_page=$2
[*]            shift
[*]            ;;
[*]      --secure|-S)
[*]            secure=1
[*]            ;;
[*]      --warning|-w)
[*]            warning=$2
[*]            shift
[*]            ;;
[*]      --critical|-c)
[*]            critical=$2
[*]            shift
[*]            ;;
[*]      *)
[*]            echo \\\"Unknown argument: $1\\\"
[*]            print_help
[*]            exit $ST_UK
[*]            ;;
[*]      esac
[*]    shift
[*]done
[*]
[*]get_wcdiff() {
[*]    if [ ! -z \\\"$warning\\\" -a ! -z \\\"$critical\\\" ]
[*]    then
[*]      wclvls=1
[*]
[*]      if [ ${warning} -ge ${critical} ]
[*]      then
[*]            wcdiff=1
[*]      fi
[*]    elif [ ! -z \\\"$warning\\\" -a -z \\\"$critical\\\" ]
[*]    then
[*]      wcdiff=2
[*]    elif [ -z \\\"$warning\\\" -a ! -z \\\"$critical\\\" ]
[*]    then
[*]      wcdiff=3
[*]    fi
[*]}
[*]
[*]val_wcdiff() {
[*]    if [ \\\"$wcdiff\\\" = 1 ]
[*]    then
[*]      echo \\\"Please adjust your warning/critical thresholds. The warning \\\\

[*]must be lower than the critical level!\\\"
[*]      exit $ST_UK
[*]    elif [ \\\"$wcdiff\\\" = 2 ]
[*]    then
[*]      echo \\\"Please also set a critical value when you want to use \\\\

[*]warning/critical thresholds!\\\"
[*]      exit $ST_UK
[*]    elif [ \\\"$wcdiff\\\" = 3 ]
[*]    then
[*]      echo \\\"Please also set a warning value when you want to use \\\\

[*]warning/critical thresholds!\\\"
[*]      exit $ST_UK
[*]    fi
[*]}
[*]
[*]check_pid() {
[*]    if [ -f \\\"$path_pid/$name_pid\\\" ]
[*]    then
[*]      retval=0
[*]    else
[*]      retval=1
[*]    fi
[*]}
[*]
[*]get_status() {
[*]    if [ \\\"$secure\\\" = 1 ]
[*]    then
[*]      wget_opts=\\\"-O- -q -t 3 -T 3 --no-check-certificate\\\"
[*]      #out1=`/usr/bin/wget ${wget_opts} http://${hostname}:${port}/${status_page}`
[*]       out1=`/usr/bin/wget -O- -q -t 3 -T 3 http://localhost:80/nginx_status`
[*]   sleep 1
[*]    out2=`/usr/bin/wget -O- -q -t 3 -T 3 http://localhost:80/nginx_status`
[*]    else
[*]      wget_opts=\\\"-O- -q -t 3 -T 3\\\"
[*]    out1=`/usr/bin/wget -O- -q -t 3 -T 3 http://localhost:80/nginx_status`
[*]    sleep 1
[*]      out2=`/usr/bin/wget -O- -q -t 3 -T 3 http://localhost:80/nginx_status`
[*]    fi
[*]    if [ -z \\\"$out1\\\" -o -z \\\"$out2\\\" ]
[*]    then
[*]      echo \\\"out1:$out1 out2:$out2, UNKNOWN - Local copy/copies of $status_page is empty.\\\"
[*]    exit $ST_UK
[*]    fi
[*]}
[*]
[*]get_vals() {
[*]    tmp1_reqpsec=`echo ${out1}|awk \\\'{print $10}\\\'`
[*]    tmp2_reqpsec=`echo ${out2}|awk \\\'{print $10}\\\'`
[*]    reqpsec=`expr $tmp2_reqpsec - $tmp1_reqpsec`
[*]
[*]    tmp1_conpsec=`echo ${out1}|awk \\\'{print $9}\\\'`
[*]    tmp2_conpsec=`echo ${out2}|awk \\\'{print $9}\\\'`
[*]    conpsec=`expr $tmp2_conpsec - $tmp1_conpsec`
[*]
[*]    reqpcon=`echo \\\"scale=2; $reqpsec / $conpsec\\\" | bc -l`
[*]    if [ \\\"$reqpcon\\\" = \\\".99\\\" ]
[*]    then
[*]      reqpcon=\\\"1.00\\\"
[*]    fi
[*]}
[*]
[*]do_output() {
[*]    output=\\\"nginx is running. $reqpsec requests per second, $conpsec connections per second ($reqpcon requests per connection)\\\"
[*]}
[*]
[*]do_perfdata() {
[*]    perfdata=\\\"\\\'reqpsec\\\'=$reqpsec \\\'conpsec\\\'=$conpsec \\\'conpreq\\\'=$reqpcon\\\"
[*]}
[*]
[*]# Here we
[*]get_wcdiff
[*]val_wcdiff
[*]
[*]if [ ${pid_check} = 1 ]
[*]then
[*]    check_pid
[*]    if [ \\\"$retval\\\" = 1 ]
[*]    then
[*]      echo \\\"There\\\'s no pid file for nginx. Is nginx running? Please also make sure whether your pid path and name is correct.\\\"
[*]      exit $ST_CR
[*]    fi
[*]fi
[*]
[*]get_status
[*]get_vals
[*]do_output
[*]do_perfdata
[*]
[*]if [[ -n \\\"$warning\\\" ]] && [[ -n \\\"$critical\\\" ]]
[*]then
[*]    if [[ \\\"$reqpsec\\\" -ge \\\"$warning\\\" ]] && [[ \\\"$reqpsec\\\" -lt \\\"$critical\\\" ]]
[*]    then
[*]      echo \\\"WARNING - ${output} | ${perfdata}\\\"
[*]    exit $ST_WR
[*]    elif [ \\\"$reqpsec\\\" -ge \\\"$critical\\\" ]
[*]    then
[*]      echo \\\"CRITICAL - ${output} | ${perfdata}\\\"
[*]    exit $ST_CR
[*]    else
[*]      echo \\\"OK - ${output} | ${perfdata} ]\\\"
[*]    exit $ST_OK
[*]    fi
[*]else
[*]    echo \\\"OK - ${output} | ${perfdata}\\\"
[*]    exit $ST_OK
[*]fi
页: [1]
查看完整版本: Nagios监控nginx服务详细过程