从Nginx的access日志统计PV、UV和热点资源
# cat count.pyfrom __future__ import print_function
from collections import Counter
ips = [] #定义存储客户端来源IP的列表
hot_resources = Counter() #用计数器来统计资源的访问情况
with open('access.log', 'r') as fin:
for line in fin:
ip = line.split()
if ip:
ips.append(ip)
resource = line.split()
if resource:
hot_resources += 1
print("PV is: {0:d}".format(len(ips)))
print("UV is: {0:d}".format(len(set(ips))))
for key, val in hot_resources.most_common(10): #计数器提供了most_common,可以输出最大的10条记录
print(val, key)
# python count.py
PV is: 1881955
UV is: 64953
92838 http://download.helloworld.com/hello/hello
88873 http://download.helloworld.com/world/hi/
57711 http://appy.helloworld.com/world/js/jquery-1.10.1.min.js
46980 http://download.helloworld.com/favicon.ico
38759 http://appy.helloworld.com/world/css/style.css?t=00001
38684 http://appy.helloworld.com/world/css/base.css
35404 http://appy.helloworld.com/favicon.ico
34907 http://download.helloworld.com/world/js/jquery-1.10.1.min.js
34882 http://appy.helloworld.com/world/img/hi.jpg
34445 http://download.helloworld.com/world/css/base.css
页:
[1]