nagios监控mysql集群脚本
1, make a python script to check mysql replication status[*]#!/usr/bin/python
[*]
[*]#this is nagios plugin for check mysql replication
[*]#
[*]#Author lxm
[*]#
[*]
[*]import getopt
[*]import sys
[*]import MySQLdb
[*]
[*]def usage():
[*] print "Usage %s [-?|--help] -h|--slavehost -u|--slaveuser -p|--slavepasswd -H|--masterhost -U|--masteruser -P|--masterpasswd [--masterport] [--slaveport]"
[*]
[*]
[*]def getMysqlConnect(host, user, passwd, port=3306):
[*] try:
[*] conn = MySQLdb.connect(hosthost=host, useruser=user, passwdpasswd=passwd, portport=port)
[*] cursor = conn.cursor(cursorclass = MySQLdb.cursors.DictCursor)
[*] return cursor
[*] except:
[*] print "UNKNOWN - Can't connect %s:%s" % (host, port)
[*] sys.exit(3)
[*]
[*]def getMasterStatus(cursor):
[*] try:
[*] cursor.execute('show master status')
[*] rs = cursor.fetchone()
[*] cursor.close()
[*] return rs
[*] except:
[*] print "UNKNOWN - Can't get master status"
[*] sys.exit(3)
[*]
[*]def getSlaveStatus(cursor):
[*] try:
[*] cursor.execute('show slave status')
[*] rs = cursor.fetchone()
[*] return rs
[*] cursor.close()
[*] except:
[*] print "UNKNOWN - Can't get slave status"
[*] sys.exit(3)
[*]
[*]def do(masterhost, masteruser, masterpasswd, slavehost, slaveuser, slavepasswd, masterport=3306, slaveport=3306):
[*] master = getMysqlConnect(masterhost, masteruser, masterpasswd, masterport)
[*] slave = getMysqlConnect(slavehost, slaveuser,slavepasswd, slaveport)
[*] masterrs = getMasterStatus(master)
[*] slavers = getSlaveStatus(slave)
[*]
[*] if(slavers['Slave_IO_Running'] != 'Yes') or (slavers['Slave_SQL_Running'] != 'Yes'):
[*] print 'CRITICAL - Slave_IO_Running: %s\t Slave_SQL_Running:%s' % (slavers['Slave_IO_Running'], slavers['Slave_SQL_Running'])
[*] sys.exit(2)
[*]
[*] if(masterrs['File'] != slavers['Master_Log_File']):
[*] print 'CRITICAL - Master binlog file is %s but slave read master log file is %s' % (masterrs['File'], slavers['Master_Log_File'])
[*] sys.exit(2)
[*]
[*] if(slavers['Master_Log_File'] != slavers['Relay_Master_Log_File']):
[*] print 'WARING - Master_Log_File is %s but Relay_Master_Log_File is %s' % (slavers['Master_Log_File'], slavers['Relay_Master_Log_File'])
[*] sys.exit(1)
[*]
[*] if(masterrs['Position'] != slavers['Read_Master_Log_Pos']):
[*] print 'WARING - Master binlog position is %d but slave read master log position is %d The Offset is %d' % (masterrs['Position'], slavers['Read_Master_Log_Pos'], masterrs['Position'] - slavers['Read_Master_Log_Pos'])
[*] sys.exit(1)
[*]
[*] if(slavers['Read_Master_Log_Pos'] != slavers['Exec_Master_Log_Pos']):
[*] print 'WARING - Read Master Log Position is %d but Exec Master Log Position is %d The Offset is %d' % (slavers['Read_Master_Log_Pos'], slavers['Exec_Master_Log_Pos'], slavers['Read_Master_Log_Pos'] - slavers['Exec_Master_Log_Pos'])
[*] sys.exit(1)
[*]
[*] print "OK - mysql replication is ok"
[*] sys.exit(0)
[*]
[*]def main():
[*] slavehost = slaveuser = slavepasswd = slaveport = masterhost = masteruser = masterpasswd = masterport = None
[*] try:
[*] opts,args = getopt.getopt(sys.argv,'?h:u:p:H:U:P:',["help","slavehost=","slaveuser=","slavepasswd=",'slaveport=','masterhost=','masteruser=','masterpasswd=','masterport='])
[*] except getopt.GetoptError:
[*] usage()
[*] sys.exit(3)
[*] for o,a in opts:
[*] if o in ('-?', '--help'):
[*] usage()
[*] sys.exit()
[*] if o in ('-h', '--slavehost'):
[*] slavehost = a
[*] if o in ('-u', '--slaveuser'):
[*] slaveuser = a
[*] if o in ('-p', '--slavepasswd'):
[*] slavepasswd = a
[*] if o in ('-H', '--masterhost'):
[*] masterhost = a
[*] if o in ('-U', '--masteruser'):
[*] masteruser = a
[*] if o in ('-P', '--masterpasswd'):
[*] masterpasswd = a
[*] if o in ('--slaveport'):
[*] slaveport = a
[*] if o in ('--masterport'):
[*] masterport = a
[*]
[*] if not (slavehost and slaveuser and slavepasswd and masterhost and masteruser and masterpasswd):
[*] usage()
[*] sys.exit(3)
[*]
[*] if not slaveport:
[*] slaveport = 3306
[*] if not masterport:
[*] masterport = 3306
[*]
[*] do(masterhost, masteruser, masterpasswd, slavehost, slaveuser, slavepasswd, masterport, slaveport)
[*]
[*]if __name__ == '__main__':
[*] main()
2, Deploy this file to $NAGIOS_HOME/libexec and add a nagios check command.
Modify $NAGIOS_HOME/etc/objects/command.cfg, add the flowing line
[*]define command{
[*] command_name check_mysql_replication
[*] command_line /usr/local/nagios/libexec/check_mysql_replication.py -h $HOSTADDRESS$ -u **** -p ***** -U **** -P ***** -H $ARG1$
[*]}
3, Define the services
Modify $NAGIOS_HOME/etc/servers/servers.cfg, add the flowing line
[*]define service {
[*] use generic-service
[*] host_name web-sh-unicom-01
[*] service_description shanghai-user-databases
[*] check_command check_mysql_replication!203.116.12.160
[*]}
[*]
[*]define service {
[*] use generic-service
[*] host_name web-sg-01
[*] service_description Singapore-user-databases
[*] check_command check_mysql_replication!60.29.233.97
[*]}
4, Restart nagios
First, check the configure file
[*]$NAGIOS_HOME/bin/nagios -v $NAGIOS_HOME/etc/nagios.cfg
if no any errors, restart nagios.
页:
[1]