python之nntp服务器组

saundy 发表于 2018-8-6 13:11:06

　　

[*]http://www.newzbot.com/serverlist.php?since=ALL&orderby=kps&sortorder=desc&show_maxgroup=on&show_post=on&show_kps=on&show_created=on
这是可以找到当前有哪些服务器的地方网址，　　

　　这个项目的目的就是收集信息，并且将其生成一个html的报告（当然也可以是其他的形式的报告），完成代码如下
　　

[*]'''''
[*]Created on 2012-7-18
[*]
[*]@author: mars
[*]'''
[*]import nntplib
[*]from nntplib import NNTP
[*]from time import time,strftime,localtime
[*]from email import message_from_string
[*]from urllib import urlopen
[*]import textwrap
[*]import re
[*]
[*]day=24*60*60
[*]def wrap(string,max=70):
[*] #make the string to the max linewidth
[*] return '\n'.join(textwrap.wrap(string))+'\n'
[*]class NewsAgent:
[*] #can get the new project and announce to the object fo the new from the souuce of the news
[*] def __init__(self):
[*]    self.sources=[]
[*]    self.destinations=[]
[*] def addSource(self,source):
[*]    self.sources.append(source)
[*] def addDestination(self,dest):
[*]    self.destinations.append(dest)
[*] def distribute(self):
[*]    items=[]
[*]    for source in self.sources:
[*]          items.extend(source.getItems())
[*]    for dest in self.destinations:
[*]          dest.receiveItems(items)
[*]class NewsItem:
[*] #simle news project including tile and text
[*] def __init__(self,title,body):
[*]    self.title=title
[*]    self.body=body
[*]
[*]class NNTPSource:
[*] #the nntp source
[*] def __init__(self,servername,group,window):
[*]    self.servername=servername
[*]    self.group=group
[*]    self.window=window
[*] def getItems(self):
[*]    start=localtime(time()-self.window*day)
[*]    date=strftime('%y%m%d',start)
[*]    hour=strftime('%H%M%S',start)
[*]
[*]    server=NNTP(self.servername)
[*]
[*]    ids=server.group(self.group)
[*]    #ids=server.newnews(self.group, date, hour)
[*]
[*]    for id in ids:
[*]          lines=server.article(id)
[*]          message=message_from_string('\n'.join(lines))
[*]
[*]          title=message['subject']
[*]          body=message.get_payload()
[*]          if message.is_multipart():
[*]             body=body
[*]
[*]          yield NewsItem(title,body)
[*]
[*]    server.quit()
[*]
[*]class SimpleWebSource:
[*] #user the reto fetch thr source from the webpage
[*] def __init__(self,url,titlePattern,bodyPattern):
[*]    self.url=url
[*]    self.titlePattern=re.compile(titlePattern)
[*]    self.bodyPattern=re.compile(bodyPattern)
[*] def getItems(self):
[*]    text=urlopen(self.url).read()
[*]    titles=self.titlePattern.findall(text)
[*]    bodies=self.bodyPattern.findall(text)
[*]    for title,body in zip(titles,bodies):
[*]          yield NewsItem(title.wrap(body))
[*]class PlainDestination:
[*] #make it to the pure text
[*] def receiveItems(self,items):
[*]    for item in items:
[*]          print item.title
[*]          #print '-'*len(subject)
[*]          #print '-'*len(item.title)
[*]          print item.body
[*]          #print 'fuck&&&&&&&bitch'
[*]class HTMLDestination:
[*] # make it to the html
[*] def __init__(self, filename):
[*]    self.filename = filename
[*]
[*] def receiveItems(self, items):
[*]    out = open(self.filename, 'w')
[*]    print >> out, """
[*]    <html>
[*]          <head>
[*]             <title>Today's News</title>
[*]          </head>
[*]          <body>
[*]          <h1>Today's News</h1>
[*]    """
[*]
[*]    print >> out, '<ul>'
[*]    id = 0
[*]    for item in items:
[*]          id += 1
[*]          print >> out, '<li><a href="#%i">%s</a></li>' % (id, item.title)
[*]    print >> out, '</ul>'
[*]
[*]    id = 0
[*]    for item in items:
[*]          id += 1
[*]          print >> out, '<h2><a name="%i">%s</a></h2>' % (id, item.title)
[*]          print >> out, '<pre>%s</pre>' % item.body
[*]
[*]    print >> out, """
[*]          </body>
[*]    </html>
[*]    """
[*]
[*]class runDefaultSetup():
[*] #the soucecan modify by yourself
[*] agent=NewsAgent()
[*] #bbc_url='http://www.chinanews.com/'
[*] bbc_url='http://www.bbc.co.uk/news/'
[*] #bbc_url='http://www.bbc.co.uk/text_only.stm'
[*] bbc_title=r'(?s)a href="[^"]*>\s*<b>\s*(.*?)\s*</b>'
[*] bbc_body=r'(?s)</a>\s*<br/>\s*(.*?)\s*<'
[*] bbc=SimpleWebSource(bbc_url,bbc_title,bbc_body)
[*]
[*] agent.addSource(bbc)
[*] #cong gmane.comp.python.announce get the nntpsource
[*]
[*] clpa_server='news.gmane.org'
[*] clpa_group='gmane.comp.python.apple'
[*]
[*] clpa_window=1
[*] clpa=NNTPSource(clpa_server,clpa_group,clpa_window)
[*] agent.addSource(clpa)
[*]
[*]
[*] #add the text and html target
[*] agent.addDestination(PlainDestination())
[*]
[*] agent.addDestination(HTMLDestination('news.html'))
[*]
[*] #public
[*] agent.distribute()
[*]if __name__=='__main__':
[*] runDefaultSetup()
[*]
[*]
[*]
　　

　　其实这个程序呢在第二版的教程上有，不过呢那个给出的服务器不能用，所以在文章的开始的时候我就给出了可以找到服务器地址的地方，比如我这里用的就是
　　clpa_server='news.gmane.org'
　　
clpa_group='gmane.comp.python.apple'
　　
这个！
　　当然这段代码我也稍微说下，最开始的类NewsAgent,接着是NewsItem,NNTPSource,SimpleWebSource,PlainDestination,HTMLDestination和runDefaultSetup

　　程序一运行就开始跑的是runDefaultSetup，这里就将NewsAgent实例化为agent，SimpleWebSource的3个参数分别是url， >　　随后将bbc作为参数，调用agent的addsource。同样的道理完成了nntpsouce这一块。
　　最后就是就是调用agent.addDestionation。最后HTMLDestionation以news.html作为生成报告的html文本！

页: [1]

运维网's Archiver

python之nntp服务器组