python版本的curl工具pycurl学习

julley 发表于 2018-8-4 08:19:11

　　
import os,sys,time
　　
import threading
　　
import Queue
　　

　　
try:
　　
from cStringIO import StringIO
　　
except ImportError:
　　
from StringIO import StringIO
　　
import pycurl
　　

　　
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
　　
# the libcurl tutorial for more info.
　　
try:
　　
import signal
　　
from signal import SIGPIPE,SIG_ING
　　
signal.signal(signal.SIGPIPE,signal.SIG_IGN)
　　
except ImportError:
　　
pass
　　

　　

　　
# need a given txt file contains urls
　　
try:
　　
if sys.argv=="-":
　　
   urls=sys.stdin.readlines()
　　
else:
　　
   urls=open(sys.argv,'rb').readlines()
　　
#print urls
　　
except:
　　
print "Usage: %s check_urls.txt <file with urls to check>" %sys.argv
　　
raise SystemExit
　　

　　
class Curl:
　　
def __init__(self,url):
　　
   self.url=url
　　
   self.body=StringIO()
　　
   self.http_code=0
　　

　　
   self._curl=pycurl.Curl()
　　
   self._curl.setopt(pycurl.URL,self.url)
　　
   self._curl.setopt(pycurl.FOLLOWLOCATION,True)
　　
   self._curl.setopt(pycurl.CONNECTTIMEOUT,15)
　　
   self._curl.setopt(pycurl.TIMEOUT,15)
　　
   self._curl.setopt(pycurl.WRITEFUNCTION,self.body.write)
　　
   self._curl.setopt(pycurl.NOSIGNAL,1)
　　
   self._curl.debug=0
　　

　　
def perform(self):
　　
   self._curl.perform()
　　

　　
def close(self):
　　
   try:
　　
   self.http_code=self._curl.getinfo(pycurl.HTTP_CODE)
　　
   except pycurl.error:
　　
   assert c.debug
　　
   self.http_code=0
　　
   self._curl.close()
　　

　　

　　
queue=Queue.Queue()
　　
for url in urls:
　　
url=url.strip()
　　
if not url or url == "#":
　　
   continue
　　
queue.put(url)
　　

　　
assert queue.queue, "no urls are given"
　　
num_urls=len(queue.queue)
　　
#num_conn=min(num_conn,num_urls)
　　
num_conn=num_urls
　　
#assert 1 <= num_conn < = 1000,"invalid number of concurrent connections"
　　

　　
class WorkerThread(threading.Thread):
　　
def __init__(self,queue):
　　
      threading.Thread.__init__(self)
　　
      self.queue=queue
　　

　　
def run(self):
　　
      while 1:
　　
         try:
　　
            url=self.queue.get_nowait()
　　
         except Queue.Empty:
　　
            raise SystemExit
　　
         c=Curl(url)
　　
         c.perform()
　　
         c.close()
　　
         print "http_url:" + url + "\t" + "http_code:" + str(c.http_code)
　　
#start a bunch of threads
　　
threads=[]
　　
for dummy in range(num_conn):
　　
t=WorkerThread(queue)
　　
t.start()
　　
threads.append(t)
　　

　　
#wait for all threads to finish
　　
for thread in threads:
　　
thread.join()

页: [1]

运维网's Archiver

python版本的curl工具pycurl学习