10477777 发表于 2018-8-10 08:51:12

Python 爬取糗事百科段子

  
#!/usr/bin/env python
  
# -*- coding: utf-8 -*-
  

  
import re
  
import urllib.request
  

  
def gettext(url,page):
  headers=("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36")
  opener=urllib.request.build_opener()
  opener.addheaders=
  urllib.request.install_opener(opener)
  data=urllib.request.urlopen(url).read().decode("utf-8")
  userpat='<h2>(.*?)</h2>'
  textpat='<div class=&quot;content&quot;>(.*?)</div>'
  userlist=re.compile(userpat,re.S).findall(data)
  textlist=re.compile(textpat,re.S).findall(data)
  dictionary=dict(zip(userlist,textlist))
  x=1
  for key,value in dictionary.items():
  value=value.replace(&quot;\n&quot;,&quot;&quot;)
  value=value.replace(&quot;<span>&quot;,&quot;&quot;)
  value=value.replace(&quot;</span>&quot;,&quot;&quot;)
  value=value.replace(&quot;<br/>&quot;,&quot;\n&quot;)
  print(&quot;第&quot;+str(page)+&quot;页&quot;+str(x)+&quot;用户&quot;+key)
  print(&quot;内容:&quot;+value)
  print('\n')
  print(&quot;-----------------------------&quot;)
  x+=1
  

  
for i in range(1,3):
  url=&quot;https://www.qiushibaike.com/8hr/page/&quot;+str(i)
  gettext(url,i)
页: [1]
查看完整版本: Python 爬取糗事百科段子