python之nntp服务器组

saundy · 发表于 2018-8-6 13:11:06

http://www.newzbot.com/serverlist.php?since=ALL&orderby=kps&sortorder=desc&show_maxgroup=on&show_post=on&show_kps=on&show_created=on

这是可以找到当前有哪些服务器的地方网址，　　

　　这个项目的目的就是收集信息，并且将其生成一个html的报告（当然也可以是其他的形式的报告），完成代码如下
　　

'''''
Created on 2012-7-18
@author: mars
'''
import nntplib
from nntplib import NNTP
from time import time,strftime,localtime
from email import message_from_string
from urllib import urlopen
import textwrap
import re
day=24*60*60
def wrap(string,max=70):
#make the string to the max linewidth
return '\n'.join(textwrap.wrap(string))+'\n'
class NewsAgent:
#can get the new project and announce to the object fo the new from the souuce of the news
def __init__(self):
self.sources=[]
self.destinations=[]
def addSource(self,source):
self.sources.append(source)
def addDestination(self,dest):
self.destinations.append(dest)
def distribute(self):
items=[]
for source in self.sources:
items.extend(source.getItems())
for dest in self.destinations:
dest.receiveItems(items)
class NewsItem:
#simle news project including tile and text
def __init__(self,title,body):
self.title=title
self.body=body
class NNTPSource:
#the nntp source
def __init__(self,servername,group,window):
self.servername=servername
self.group=group
self.window=window
def getItems(self):
start=localtime(time()-self.window*day)
date=strftime('%y%m%d',start)
hour=strftime('%H%M%S',start)
server=NNTP(self.servername)
ids=server.group(self.group)[2]
#ids=server.newnews(self.group, date, hour)[1]
for id in ids:
lines=server.article(id)[3]
message=message_from_string('\n'.join(lines))
title=message['subject']
body=message.get_payload()
if message.is_multipart():
body=body[0]
yield NewsItem(title,body)
server.quit()
class SimpleWebSource:
#user the re to fetch thr source from the webpage
def __init__(self,url,titlePattern,bodyPattern):
self.url=url
self.titlePattern=re.compile(titlePattern)
self.bodyPattern=re.compile(bodyPattern)
def getItems(self):
text=urlopen(self.url).read()
titles=self.titlePattern.findall(text)
bodies=self.bodyPattern.findall(text)
for title,body in zip(titles,bodies):
yield NewsItem(title.wrap(body))
class PlainDestination:
#make it to the pure text
def receiveItems(self,items):
for item in items:
print item.title
#print '-'*len(subject)
#print '-'*len(item.title)
print item.body
#print 'fuck&&&&&&&bitch'
class HTMLDestination:
# make it to the html
def __init__(self, filename):
self.filename = filename
def receiveItems(self, items):
out = open(self.filename, 'w')
print >> out, """
<html>
<head>
<title>Today's News</title>
</head>
<body>
<h1>Today's News</h1>
"""
print >> out, '<ul>'
id = 0
for item in items:
id += 1
print >> out, '<li><a href="#%i">%s</a></li>' % (id, item.title)
print >> out, '</ul>'
id = 0
for item in items:
id += 1
print >> out, '<h2><a name="%i">%s</a></h2>' % (id, item.title)
print >> out, '<pre>%s</pre>' % item.body
print >> out, """
</body>
</html>
"""
class runDefaultSetup():
#the souce can modify by yourself
agent=NewsAgent()
#bbc_url='http://www.chinanews.com/'
bbc_url='http://www.bbc.co.uk/news/'
#bbc_url='http://www.bbc.co.uk/text_only.stm'
bbc_title=r'(?s)a href="[^"]*>\s*<b>\s*(.*?)\s*</b>'
bbc_body=r'(?s)</a>\s*<br/>\s*(.*?)\s*<'
bbc=SimpleWebSource(bbc_url,bbc_title,bbc_body)
agent.addSource(bbc)
#cong gmane.comp.python.announce get the nntpsource
clpa_server='news.gmane.org'
clpa_group='gmane.comp.python.apple'
clpa_window=1
clpa=NNTPSource(clpa_server,clpa_group,clpa_window)
agent.addSource(clpa)
#add the text and html target
agent.addDestination(PlainDestination())
agent.addDestination(HTMLDestination('news.html'))
#public
agent.distribute()
if __name__=='__main__':
runDefaultSetup()

　　

　　其实这个程序呢在第二版的教程上有，不过呢那个给出的服务器不能用，所以在文章的开始的时候我就给出了可以找到服务器地址的地方，比如我这里用的就是
　　clpa_server='news.gmane.org'
　　
clpa_group='gmane.comp.python.apple'
　　
这个！
　　当然这段代码我也稍微说下，最开始的类NewsAgent,接着是NewsItem,NNTPSource,SimpleWebSource,PlainDestination,HTMLDestination和runDefaultSetup

　　程序一运行就开始跑的是runDefaultSetup，这里就将NewsAgent实例化为agent，SimpleWebSource的3个参数分别是url， >　　随后将bbc作为参数，调用agent的addsource。同样的道理完成了nntpsouce这一块。
　　最后就是就是调用agent.addDestionation。最后HTMLDestionation以news.html作为生成报告的html文本！

账号		自动登录	找回密码
密码			立即注册

Centos6.5×64安装配置openmeetings3.0.3详

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

[经验分享] python之nntp服务器组

浏览过的版块

扫码加入运维网微信交流群