q29191 发表于 2017-12-23 16:52:40

Python正则表达式,统计分析nginx访问日志

#!/usr/bin/env python  
#
-*- coding: utf-8 -*-  

  
import re
  

  
class CountPatt(object):
  def __init__(self, patt):
  self.patt = re.compile(patt)
  self.result = {}
  def count_patt(self, fname):
  with open(fname) as fobj:
  for line in fobj:
  match = self.patt.search(line)
  if match:
  key = match.group()
  self.result = self.result.get(key, 0) + 1
  

  return self.result
  

  def sort(self):
  result = []
  alist = self.result.items()
  for i in xrange(len(alist)):
  greater = alist
  for item in alist:
  if greater < item:
  greater = item
  result.append(greater)
  alist.remove(greater)
  return result
  

  

  
if __name__ == "__main__":
  httpd_log = '/tmp/access.log'
  ip_pattern = r'^(\d+\.){3}\d+'
  browser_pattern = r'Chrome|Safari|Firefox'
  a = CountPatt(ip_pattern)
  print a.count_patt(httpd_log)
  print a.sort()
页: [1]
查看完整版本: Python正则表达式,统计分析nginx访问日志