julley 发表于 2018-8-4 08:19:11

python版本的curl工具pycurl学习

  
import os,sys,time
  
import threading
  
import Queue
  

  
try:
  
   from cStringIO import StringIO
  
except ImportError:
  
   from StringIO import StringIO
  
import pycurl
  

  
# We should ignore SIGPIPE when using pycurl.NOSIGNAL - see
  
# the libcurl tutorial for more info.
  
try:
  
   import signal
  
   from signal import SIGPIPE,SIG_ING
  
   signal.signal(signal.SIGPIPE,signal.SIG_IGN)
  
except ImportError:
  
   pass
  

  

  
# need a given txt file contains urls
  
try:
  
   if sys.argv=="-":
  
      urls=sys.stdin.readlines()
  
   else:
  
      urls=open(sys.argv,'rb').readlines()
  
   #print urls
  
except:
  
   print "Usage: %s check_urls.txt <file with urls to check>" %sys.argv
  
   raise SystemExit
  

  
class Curl:
  
   def __init__(self,url):
  
       self.url=url
  
       self.body=StringIO()
  
       self.http_code=0
  

  
       self._curl=pycurl.Curl()
  
       self._curl.setopt(pycurl.URL,self.url)
  
       self._curl.setopt(pycurl.FOLLOWLOCATION,True)
  
       self._curl.setopt(pycurl.CONNECTTIMEOUT,15)
  
       self._curl.setopt(pycurl.TIMEOUT,15)
  
       self._curl.setopt(pycurl.WRITEFUNCTION,self.body.write)
  
       self._curl.setopt(pycurl.NOSIGNAL,1)
  
       self._curl.debug=0
  

  
   def perform(self):
  
       self._curl.perform()
  

  
   def close(self):
  
      try:
  
      self.http_code=self._curl.getinfo(pycurl.HTTP_CODE)
  
      except pycurl.error:
  
      assert c.debug
  
      self.http_code=0
  
      self._curl.close()
  

  

  
queue=Queue.Queue()
  
for url in urls:
  
    url=url.strip()
  
    if not url or url == "#":
  
       continue
  
    queue.put(url)
  

  
assert queue.queue, "no urls are given"
  
num_urls=len(queue.queue)
  
#num_conn=min(num_conn,num_urls)
  
num_conn=num_urls
  
#assert 1 <= num_conn < = 1000,"invalid number of concurrent connections"
  

  
class WorkerThread(threading.Thread):
  
   def __init__(self,queue):
  
         threading.Thread.__init__(self)
  
         self.queue=queue
  

  
   def run(self):
  
         while 1:
  
             try:
  
                url=self.queue.get_nowait()
  
             except Queue.Empty:
  
                raise SystemExit
  
             c=Curl(url)
  
             c.perform()
  
             c.close()
  
             print "http_url:" + url + "\t" + "http_code:" + str(c.http_code)
  
#start a bunch of threads
  
threads=[]
  
for dummy in range(num_conn):
  
    t=WorkerThread(queue)
  
    t.start()
  
    threads.append(t)
  

  
#wait for all threads to finish
  
for thread in threads:
  
    thread.join()
页: [1]
查看完整版本: python版本的curl工具pycurl学习