python版本的curl工具pycurl学习
import os,sys,time
import threading
import Queue
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import pycurl
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
# the libcurl tutorial for more info.
try:
import signal
from signal import SIGPIPE,SIG_ING
signal.signal(signal.SIGPIPE,signal.SIG_IGN)
except ImportError:
pass
# need a given txt file contains urls
try:
if sys.argv=="-":
urls=sys.stdin.readlines()
else:
urls=open(sys.argv,'rb').readlines()
#print urls
except:
print "Usage: %s check_urls.txt <file with urls to check>" %sys.argv
raise SystemExit
class Curl:
def __init__(self,url):
self.url=url
self.body=StringIO()
self.http_code=0
self._curl=pycurl.Curl()
self._curl.setopt(pycurl.URL,self.url)
self._curl.setopt(pycurl.FOLLOWLOCATION,True)
self._curl.setopt(pycurl.CONNECTTIMEOUT,15)
self._curl.setopt(pycurl.TIMEOUT,15)
self._curl.setopt(pycurl.WRITEFUNCTION,self.body.write)
self._curl.setopt(pycurl.NOSIGNAL,1)
self._curl.debug=0
def perform(self):
self._curl.perform()
def close(self):
try:
self.http_code=self._curl.getinfo(pycurl.HTTP_CODE)
except pycurl.error:
assert c.debug
self.http_code=0
self._curl.close()
queue=Queue.Queue()
for url in urls:
url=url.strip()
if not url or url == "#":
continue
queue.put(url)
assert queue.queue, "no urls are given"
num_urls=len(queue.queue)
#num_conn=min(num_conn,num_urls)
num_conn=num_urls
#assert 1 <= num_conn < = 1000,"invalid number of concurrent connections"
class WorkerThread(threading.Thread):
def __init__(self,queue):
threading.Thread.__init__(self)
self.queue=queue
def run(self):
while 1:
try:
url=self.queue.get_nowait()
except Queue.Empty:
raise SystemExit
c=Curl(url)
c.perform()
c.close()
print "http_url:" + url + "\t" + "http_code:" + str(c.http_code)
#start a bunch of threads
threads=[]
for dummy in range(num_conn):
t=WorkerThread(queue)
t.start()
threads.append(t)
#wait for all threads to finish
for thread in threads:
thread.join()
页:
[1]