Python正则表达式,统计分析nginx访问日志
#!/usr/bin/env python#
-*- coding: utf-8 -*-
import re
class CountPatt(object):
def __init__(self, patt):
self.patt = re.compile(patt)
self.result = {}
def count_patt(self, fname):
with open(fname) as fobj:
for line in fobj:
match = self.patt.search(line)
if match:
key = match.group()
self.result = self.result.get(key, 0) + 1
return self.result
def sort(self):
result = []
alist = self.result.items()
for i in xrange(len(alist)):
greater = alist
for item in alist:
if greater < item:
greater = item
result.append(greater)
alist.remove(greater)
return result
if __name__ == "__main__":
httpd_log = '/tmp/access.log'
ip_pattern = r'^(\d+\.){3}\d+'
browser_pattern = r'Chrome|Safari|Firefox'
a = CountPatt(ip_pattern)
print a.count_patt(httpd_log)
print a.sort()
页:
[1]