24123r2 发表于 2016-12-30 10:39:53

zabbix监控docker

Centos6下安装easy_install
# yum install python-setuptools

安装python 的docker模块
# easy_install docker-py

sudo:sorry, you must have a tty to run sudo
使用不同账户,执行执行脚本时候sudo经常会碰到 sudo: sorry, you must have a tty to run sudo这个情况,其实修改一下sudo的配置就好了
# vim /etc/sudoers (最好用visudo命令)
注释掉 Default requiretty 一行
#Default requiretty
意思就是sudo默认需要tty终端。注释掉就可以在后台执行了。

Zabbix客户端的部署:
#vim /opt/zabbix/etc/zabbix_agentd.conf

#docker
UserParameter=docker_discovery[*],cat/opt/zabbix/script/docker_cons.txt    //用来发现宿主机上存活的容器
UserParameter=docker_stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 //用来监控容器的各种指标,后面会脚本具体体现,看不懂脚本的请路过。
UserParameter=docker.tomcat.discovery,cat/opt/zabbix/script/docker_tomcat.txt//用来发现容器启动的tomcat服务
UserParameter=docker.tomcat.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3//用来监控容器中tomcat的端口
UserParameter=docker.nginx.discovery,cat/opt/zabbix/script/docker_nginx.txt//用来发现容器启动的nginx服务
UserParameter=docker.nginx.stats[*],/opt/zabbix/script/zabbix_monitor_docker.py$1 $2 $3 //用来监控容器中nginx的端口

监控脚本1,用来监控容器的CPU 内存 网卡,服务端口
#cat /opt/zabbix/script/zabbix_monitor_docker.py

#!/usr/bin/envpython#-*- coding:utf-8 -*-#email:279379936@qq.com
from dockerimport Clientimport sysimport subprocessimport osimport timeimport commands
defcheck_container_stats(container_name,collect_item):   container_collect=docker_client.stats(container_name)    container_collect.next()    old_result=eval(container_collect.next())    new_result=eval(container_collect.next())    container_collect.close()    if collect_item == 'cpu_total_usage':       result=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']    elif collect_item == 'cpu_system_usage':       result=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']    elif collect_item == 'cpu_percent':       cpu_total_usage=new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']       cpu_system_uasge=new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']       cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])      result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)    elif collect_item == 'mem_usage':       result=new_result['memory_stats']['usage']    elif collect_item == 'mem_limit':       result=new_result['memory_stats']['limit']    elif collect_item == 'mem_percent':       mem_usage=new_result['memory_stats']['usage']       mem_limit=new_result['memory_stats']['limit']       result=round(float(mem_usage)/float(mem_limit)*100.0,2)    elif collect_item == 'network_rx_bytes':      network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      #print time.time()      #print network_old_result      time.sleep(1)       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      #print time.time()      #print network_new_result      #unit b      result=int(network_new_result['rx']) -int(network_old_result['rx'])    elif collect_item == 'network_tx_bytes':      network_check_command="""dockerexec %s ifconfig eth1|grep bytes|awk -F ':' '{print $2,$3}'|awk -F '(' '{print$1,$2}'|awk -F ')' '{print $1}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      time.sleep(1)      network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      result=int(network_new_result['tx']) -int(network_old_result['tx'])    return resultif __name__ =="__main__":    docker_client = Client(base_url='unix://var/run/docker.sock',version='1.19')    if len(sys.argv) == 3:      container_name=sys.argv      collect_item=sys.argv      printcheck_container_stats(container_name,collect_item)    elif len(sys.argv) == 4 and sys.argv =='port':      container_name=sys.argv      collect_item=int(sys.argv)       check_stat=commands.getoutput("/usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))      print check_stat    else:      print '1'说明:上面脚本为通过python的docker模块去抓取数据,由于各种原因,有些机器无法安装python模块,可通过下面脚本实现:# cat /opt/zabbix/script/zabbix_monitor_docker.py #!/usr/bin/envpython#-*- coding:utf-8 -*-#email:279379936@qq.com
import sysimportsubprocessimport timeimport commandsimport re
defget_memory_container_dir(memory_dir,container_name):    con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)    con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (memory_dir,con_id))    memory_container_dir=memory_dir + '/' +con_full_id    return memory_container_dir
defget_cpu_container_dir(cpu_dir,container_name):    con_id=commands.getoutput("sudo/usr/bin/docker ps|grep %s|awk '{print $1}'" % container_name)    con_full_id=commands.getoutput("ls -al%s|grep '%s'|grep -v grep|awk '{print $NF}'" % (cpu_dir,con_id))    cpu_container_dir=cpu_dir + '/' +con_full_id    return cpu_container_dirdefget_cpu_info(container_name):    info = commands.getoutput('echo -ne"GET /containers/%s/stats?stream=false HTTP/1.1\r\n\r\n"|sudo/usr/bin/nc -U /var/run/docker.sock|grep read' % container_name)    info = eval(info)    return info
defcheck_container_stats(container_name,collect_item):    if collect_item == 'cpu_total_usage':      old_result =get_cpu_info(container_name)      new_result =get_cpu_info(container_name)      old_time = old_result['read']      new_time = new_result['read']      list_old_time = re.split('\:|\.',old_time)      list_new_time =re.split('\:|\.',new_time)      old_s = int(list_old_time)      new_s = int(list_new_time)      if old_s >= new_s:            time_interval = 2      else:            time_interval = new_s - old_s      result=(new_result['cpu_stats']['cpu_usage']['total_usage']- old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval    elif collect_item == 'cpu_system_usage':
      old_result =get_cpu_info(container_name)      new_result = get_cpu_info(container_name)      old_time = old_result['read']      new_time = new_result['read']      list_old_time =re.split('\:|\.',old_time)      list_new_time =re.split('\:|\.',new_time)      old_s = int(list_old_time)      new_s = int(list_new_time)      if old_s >= new_s:            time_interval = 2      else:            time_interval = new_s - old_s       result=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval    elif collect_item == 'cpu_percent':
      old_result =get_cpu_info(container_name)      new_result = get_cpu_info(container_name)      old_time = old_result['read']      new_time = new_result['read']      list_old_time =re.split('\:|\.',old_time)      list_new_time =re.split('\:|\.',new_time)      old_s = int(list_old_time)      new_s = int(list_new_time)      if old_s >= new_s:            time_interval = 2      else:            time_interval = new_s - old_s       cpu_total_usage=(new_result['cpu_stats']['cpu_usage']['total_usage'] -old_result['cpu_stats']['cpu_usage']['total_usage']) / time_interval       cpu_system_uasge=(new_result['cpu_stats']['system_cpu_usage'] -old_result['cpu_stats']['system_cpu_usage']) / time_interval       cpu_num=len(old_result['cpu_stats']['cpu_usage']['percpu_usage'])      result=round((float(cpu_total_usage)/float(cpu_system_uasge))*cpu_num*100.0,2)    elif collect_item == 'mem_usage':
       memory_container_dir=get_memory_container_dir(memory_dir,container_name)      result=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)    elif collect_item == 'mem_limit':
       memory_container_dir=get_memory_container_dir(memory_dir,container_name)      result=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)    elif collect_item == 'mem_percent':
       memory_container_dir=get_memory_container_dir(memory_dir,container_name)      mem_usage=commands.getoutput("cat%s/memory.stat|grep '^rss'|grep -v grep|awk '{print $NF}'" %memory_container_dir)      mem_limit=commands.getoutput("cat%s/memory.limit_in_bytes" % memory_container_dir)       result=round(float(mem_usage)/float(mem_limit)*100.0,2)    elif collect_item == 'network_rx_bytes':
       network_check_command="""sudo /usr/bin/docker exec %s ifconfigeth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk '{print"{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name       network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      time.sleep(1)       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      result=int(network_new_result['rx']) -int(network_old_result['rx'])    elif collect_item == 'network_tx_bytes':
       network_check_command="""sudo /usr/bin/docker exec %sifconfig eth1|grep bytes|awk -F':' '{print $2,$3}'|awk '{print $1,$6}'|awk'{print "{\\"rx\\":"$1",\\"tx\\":"$2"}"}'"""%container_name      network_old_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      time.sleep(1)       network_new_result=eval(((subprocess.Popen(network_check_command,shell=True,stdout=subprocess.PIPE)).stdout.readlines()).strip('\n'))      result=int(network_new_result['tx']) -int(network_old_result['tx'])    return result
if __name__ =="__main__":    cpu_dir="/cgroup/cpuacct/docker"   memory_dir="/cgroup/memory/docker"    iflen(sys.argv) == 3:      container_name=sys.argv      collect_item=sys.argv      printcheck_container_stats(container_name,collect_item)    elif len(sys.argv) == 4 and sys.argv =='port':      container_name=sys.argv      collect_item=int(sys.argv)       check_stat=commands.getoutput("sudo /usr/bin/docker exec %s netstat-ntpul|grep %s > /dev/null;echo $?" %(container_name,collect_item))      print check_stat    else:      print '1'
脚本2,用来发现容器名
catdiscovery_cons.py
#!/usr/bin/env python

# Felix Shang
#QQ: 279379936
import commands
import sys

def docker_s():
   cons = commands.getoutput("""sudo /usr/bin/docker ps|grep-v "CONTAINER ID"|awk '{print $NF}'|tr '\n' ' '""")
   count_cons = len(cons.split())
   if count_cons != 0:
       return cons.split()
   else:
       return 0

if __name__ == "__main__":
   if len(sys.argv) == 2 and sys.argv == 'docker':
       infos = docker_s()
       if infos != 0:
         print '{'
         print '\t"data":['
         i = 0
         cou_infos=len(infos)
         for con in infos:
                if i == cou_infos - 1:
                  print'\t\t{"{#CONTAINERNAME}":"%s"}' % con
                else:
                  print'\t\t{"{#CONTAINERNAME}":"%s"},' % con
                i = i + 1
         print '\t]'
         print '}'
脚本3,用来发现容器的服务(tomcat nginx),之前脚本2和脚本3是一个脚本,发现容器时出现好多问题。
# cat/opt/zabbix/script/discovery_docker_service.py
#!/usr/bin/env python

# Felix Shang
#QQ: 279379936
import commands
import sys

def docker_s():
   cons = commands.getoutput("""cat /opt/zabbix/script/docker_cons.txt|grep'CONTAINERNAME'|grep -v grep|awk -F'"' '{print $4}'|tr '\n''\t'""")
   #print cons
   count_cons = len(cons.split())
   if count_cons != 0:
       return cons.split()
   else:
       return 0
def tomcat_s():
    cons = docker_s()
   if cons == 0:
       sys.exit(2)
   else:
       cons_d = {}
       for con in cons:
         #print con
         stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepjava|grep tomcat|grep -v grep>/dev/null;echo $?" % con)
         port_list = []
         if int(stat) == 0:
                tomcat_config_dirs =commands.getoutput("sudo /usr/bin/docker exec %s ps -ef | grep tomcat |grep -v grep | awk -F\= '{print $2}' | awk -F'logging' '{print $1}'" %con).split()
                for tomcat_config_dir intomcat_config_dirs:
                  tomcat_config_file =tomcat_config_dir + 'server.xml'
                  port =commands.getoutput("""sudo /usr/bin/docker exec %s grep"port=" %s|grep -v "shutdown"|grep -v "AJP"|grep"Connector"|awk -F\= '{print $2}'|awk '{print $1}'"""%(con,tomcat_config_file)).strip('"')
                  port_list.append(port)
                cons_d = port_list
         else:
                cons_d = port_list
       return cons_d
def nginx_s():
   cons = docker_s()
   if cons == 0:
       sys.exit(2)
   else:
       cons_d = {}
       for con in cons:
         stat = commands.getoutput("sudo /usr/bin/docker exec %s ps -ef|grepnginx|grep -v grep>/dev/null;echo $?" % con)
         port_list = []
         if int(stat) == 0:
                port_list =commands.getoutput("sudo /usr/bin/docker exec %s netstat -ntpul|grepnginx|grep -v 40080|awk '{print $4}'|awk -F\: '{print $NF}'|tr '\n' ' '" %con).split()
                cons_d = port_list
         else:
                cons_d = port_list
       return cons_d

if __name__ == "__main__":
   if len(sys.argv) == 2 and sys.argv == 'tomcat':
       infos = tomcat_s()
       print '{'
       print '\t"data":['
       port_infos = []
       for con_info in infos:
         if len(infos) == 0:
                continue
         else:
                for port in infos:
                  port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_TOMCAT_PORT}":"%s"},'%(con_info,port)
                   port_infos.append(port_info)
       i = 0
      cou_port_infos = len(port_infos)
       for port_i in port_infos:
         if i == cou_port_infos - 1:
                port_i = port_i
         print port_i
         i = i + 1

       print '\t]'
       print '}'

   elif len(sys.argv) == 2 and sys.argv == 'nginx':
       infos = nginx_s()
       print '{'
       print '\t"data":['
       port_infos = []
       for con_info in infos:
         if len(infos) == 0:
                continue
         else:
                for port in infos:
                  port_info ='\t\t{"{#CONTAINERNAME}":"%s","{#CON_NGINX_PORT}":"%s"},'%(con_info,port)
                   port_infos.append(port_info)
       i = 0
       cou_port_infos = len(port_infos)
       for port_i in port_infos:
         if i == cou_port_infos - 1:
                port_i = port_i
         print port_i
         i = i + 1

       print '\t]'
       print '}'
   #else:
#    help_s()
#vim /etc/sudoers   //zabbix_agent是通过zabbix用户执行,通过sudo提权让zabbix用户对脚本有执行权限。
zabbix    ALL=(root) NOPASSWD:/usr/bin/docker,/sbin/fdisk,/usr/sbin/dmidecode,/usr/bin/nc

Zabbix服务端的配置:
导入模板:Template docker, 宿主机关联此模板即可。


报错:
Server获取值报错:ZBX_NOTSUPPORTED]
# vim zabbix_agentd.conf
# 设置超时时间
Timeout=30


页: [1]
查看完整版本: zabbix监控docker