用python+selenium抓取微博24小时热门话题的前15个并保存到txt...

7564321 发表于 2016-12-19 13:23:48

抓取微博24小时热门话题的前15个，抓取的内容请保存至txt文件中，需要抓取阅读数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#coding=utf-8
from selenium import webdriver
import unittest
from time import sleep

class Weibo(unittest.TestCase):

def setUp(self):
   self.dr = webdriver.Chrome()
   self.hot_list = self.get_weibo_hot_topic()
   self.weibo_topic = self.get_top_rank_file()

def get_weibo_hot_topic(self):
   self.dr.get('http://weibo.com/')
   sleep(5)
   self.login('649004152@qq.com','kemi_xxxx') #微博帐号密码
   self.dr.get('http://d.weibo.com/100803?refer=index_hot_new')#热门话题url
   sleep(5)
   hot_topic_list = []
   i = 0
   while i < 15:
         rank_and_topic = self.dr.find_elements_by_css_selector('.title.W_autocut').text #定位排行和话题
         number = self.dr.find_elements_by_css_selector('.number').text #定位阅读数
         hot_topic_list.append()
         i += 1
   return hot_topic_list

def get_top_rank_file(self):
   self.file_title = '微博24小时热门话题'
   self.file = open(self.file_title + '.txt', 'wb')
   for item in self.hot_list:
         separate_line = '~~~~~~~~~~~~~~~~~~~~~~~~\n'
         self.file.write(separate_line.encode('utf-8'))
         self.file.write((item+''+'阅读数：'+item+'\n').encode('utf-8'))
   self.file.close()

def login(self, username, password):
   self.dr.find_element_by_name('username').clear()
   self.dr.find_element_by_name('username').send_keys(username)
   self.dr.find_element_by_name('password').send_keys(password)
   self.dr.find_element_by_css_selector('.info_list.login_btn').click()

def test_weibo_topic(self):
   pass
   print('抓取完毕')

def tearDown(self):
   self.dr.quit()

if __name__== '__main__':
unittest.main()

网页如下：

生成txt文件如下：

页: [1]

运维网's Archiver

用python+selenium抓取微博24小时热门话题的前15个并保存到txt...