haoman 发表于 2018-11-9 10:18:40

从Nginx的access日志统计PV、UV和热点资源

# cat count.py  
from __future__ import print_function
  
from collections import Counter
  

  
ips = []                     #定义存储客户端来源IP的列表
  
hot_resources = Counter()      #用计数器来统计资源的访问情况
  
with open('access.log', 'r') as fin:
  
    for line in fin:
  
      ip = line.split()
  
      if ip:
  
            ips.append(ip)
  
      resource = line.split()
  
      if resource:
  
            hot_resources += 1
  

  
print("PV is: {0:d}".format(len(ips)))
  
print("UV is: {0:d}".format(len(set(ips))))
  

  
for key, val in hot_resources.most_common(10):          #计数器提供了most_common,可以输出最大的10条记录
  
    print(val, key)
  

  
# python count.py
  
PV is: 1881955
  
UV is: 64953
  
92838 http://download.helloworld.com/hello/hello
  
88873 http://download.helloworld.com/world/hi/
  
57711 http://appy.helloworld.com/world/js/jquery-1.10.1.min.js
  
46980 http://download.helloworld.com/favicon.ico
  
38759 http://appy.helloworld.com/world/css/style.css?t=00001
  
38684 http://appy.helloworld.com/world/css/base.css
  
35404 http://appy.helloworld.com/favicon.ico
  
34907 http://download.helloworld.com/world/js/jquery-1.10.1.min.js
  
34882 http://appy.helloworld.com/world/img/hi.jpg
  
34445 http://download.helloworld.com/world/css/base.css


页: [1]
查看完整版本: 从Nginx的access日志统计PV、UV和热点资源