eddik 发表于 2019-1-16 10:12:07

nagios监控mysql集群脚本

  1, make a python script to check mysql replication status
  


[*]#!/usr/bin/python
[*]
[*]#this is nagios plugin for check mysql replication
[*]#
[*]#Author lxm
[*]#
[*]
[*]import getopt
[*]import sys
[*]import MySQLdb
[*]
[*]def usage():
[*]    print "Usage %s [-?|--help] -h|--slavehost -u|--slaveuser -p|--slavepasswd -H|--masterhost -U|--masteruser -P|--masterpasswd [--masterport] [--slaveport]"
[*]
[*]
[*]def getMysqlConnect(host, user, passwd, port=3306):
[*]    try:
[*]      conn = MySQLdb.connect(hosthost=host, useruser=user, passwdpasswd=passwd, portport=port)
[*]      cursor = conn.cursor(cursorclass = MySQLdb.cursors.DictCursor)
[*]      return cursor
[*]    except:
[*]      print "UNKNOWN - Can't connect %s:%s" % (host, port)
[*]      sys.exit(3)
[*]
[*]def getMasterStatus(cursor):
[*]    try:
[*]      cursor.execute('show master status')
[*]      rs = cursor.fetchone()
[*]      cursor.close()
[*]      return rs
[*]    except:
[*]      print "UNKNOWN - Can't get master status"
[*]      sys.exit(3)
[*]
[*]def getSlaveStatus(cursor):
[*]    try:
[*]      cursor.execute('show slave status')
[*]      rs = cursor.fetchone()
[*]      return rs
[*]      cursor.close()
[*]    except:
[*]      print "UNKNOWN - Can't get slave status"
[*]      sys.exit(3)
[*]
[*]def do(masterhost, masteruser, masterpasswd, slavehost, slaveuser, slavepasswd, masterport=3306, slaveport=3306):
[*]    master = getMysqlConnect(masterhost, masteruser, masterpasswd, masterport)
[*]    slave = getMysqlConnect(slavehost, slaveuser,slavepasswd, slaveport)
[*]    masterrs = getMasterStatus(master)
[*]    slavers = getSlaveStatus(slave)
[*]
[*]    if(slavers['Slave_IO_Running'] != 'Yes') or (slavers['Slave_SQL_Running'] != 'Yes'):
[*]      print 'CRITICAL - Slave_IO_Running: %s\t Slave_SQL_Running:%s' % (slavers['Slave_IO_Running'], slavers['Slave_SQL_Running'])
[*]      sys.exit(2)
[*]
[*]    if(masterrs['File'] != slavers['Master_Log_File']):
[*]      print 'CRITICAL - Master binlog file is %s but slave read master log file is %s' % (masterrs['File'], slavers['Master_Log_File'])
[*]      sys.exit(2)
[*]
[*]    if(slavers['Master_Log_File'] != slavers['Relay_Master_Log_File']):
[*]      print 'WARING - Master_Log_File is %s but Relay_Master_Log_File is %s' % (slavers['Master_Log_File'], slavers['Relay_Master_Log_File'])
[*]      sys.exit(1)
[*]
[*]    if(masterrs['Position'] != slavers['Read_Master_Log_Pos']):
[*]      print 'WARING - Master binlog position is %d but slave read master log position is %d The Offset is %d' % (masterrs['Position'], slavers['Read_Master_Log_Pos'], masterrs['Position'] - slavers['Read_Master_Log_Pos'])
[*]      sys.exit(1)
[*]
[*]    if(slavers['Read_Master_Log_Pos'] != slavers['Exec_Master_Log_Pos']):
[*]      print 'WARING - Read Master Log Position is %d but Exec Master Log Position is %d The Offset is %d' % (slavers['Read_Master_Log_Pos'], slavers['Exec_Master_Log_Pos'], slavers['Read_Master_Log_Pos'] - slavers['Exec_Master_Log_Pos'])
[*]      sys.exit(1)
[*]
[*]    print "OK - mysql replication is ok"
[*]    sys.exit(0)
[*]
[*]def main():
[*]    slavehost = slaveuser = slavepasswd = slaveport = masterhost = masteruser = masterpasswd = masterport = None
[*]    try:
[*]      opts,args = getopt.getopt(sys.argv,'?h:u:p:H:U:P:',["help","slavehost=","slaveuser=","slavepasswd=",'slaveport=','masterhost=','masteruser=','masterpasswd=','masterport='])
[*]    except getopt.GetoptError:
[*]      usage()
[*]      sys.exit(3)
[*]    for o,a in opts:
[*]      if o in ('-?', '--help'):
[*]            usage()
[*]            sys.exit()
[*]      if o in ('-h', '--slavehost'):
[*]            slavehost = a
[*]      if o in ('-u', '--slaveuser'):
[*]            slaveuser = a
[*]      if o in ('-p', '--slavepasswd'):
[*]            slavepasswd = a
[*]      if o in ('-H', '--masterhost'):
[*]            masterhost = a
[*]      if o in ('-U', '--masteruser'):
[*]            masteruser = a
[*]      if o in ('-P', '--masterpasswd'):
[*]            masterpasswd = a
[*]      if o in ('--slaveport'):
[*]            slaveport = a
[*]      if o in ('--masterport'):
[*]            masterport = a
[*]
[*]    if not (slavehost and slaveuser and slavepasswd and masterhost and masteruser and masterpasswd):
[*]      usage()
[*]      sys.exit(3)
[*]
[*]    if not slaveport:
[*]      slaveport = 3306
[*]    if not masterport:
[*]      masterport = 3306
[*]
[*]    do(masterhost, masteruser, masterpasswd, slavehost, slaveuser, slavepasswd, masterport, slaveport)
[*]
[*]if __name__ == '__main__':
[*]    main()
  

  2, Deploy this file to $NAGIOS_HOME/libexec and add a nagios check command.
  Modify $NAGIOS_HOME/etc/objects/command.cfg, add the flowing line
  


[*]define command{
[*]    command_name check_mysql_replication
[*]    command_line /usr/local/nagios/libexec/check_mysql_replication.py -h $HOSTADDRESS$ -u **** -p ***** -U **** -P ***** -H $ARG1$
[*]}
  

  3, Define the services
  Modify $NAGIOS_HOME/etc/servers/servers.cfg, add the flowing line
  


[*]define service {
[*]      use                     generic-service
[*]      host_name               web-sh-unicom-01
[*]      service_description   shanghai-user-databases
[*]      check_command         check_mysql_replication!203.116.12.160
[*]}
[*]
[*]define service {
[*]      use                     generic-service
[*]      host_name               web-sg-01
[*]      service_description   Singapore-user-databases
[*]      check_command         check_mysql_replication!60.29.233.97
[*]}
  

  4, Restart nagios
  First, check the configure file
  


[*]$NAGIOS_HOME/bin/nagios -v $NAGIOS_HOME/etc/nagios.cfg
  

  if no any errors, restart nagios.


页: [1]
查看完整版本: nagios监控mysql集群脚本