scrapy爬虫结果插入mysql数据库

soyizi · 发表于 2017-12-12 18:19:46

# -*- coding: utf-8 -*-　　

　　
# Define your item pipelines here
　　
#
　　
# Don
't forget to add your pipeline to the ITEM_PIPELINES setting　　
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
　　

　　

　　
import json
　　
from twisted.enterprise import adbapi
　　
from scrapy import log
　　
import MySQLdb
　　
import MySQLdb.cursors
　　

　　

　　
class DoubanPipeline(object):
　　def __init__(self):
　　self.file = open("./books.json", "wb")
　　

　　def process_item(self, item, spider):
　　# 编码的转换
　　for k in item:
　　item[k] = item[k].encode("utf8")
　　line = json.dumps(dict(item), ensure_ascii=False) + "\n"
　　self.file.write(line)
　　return item
　　

　　

　　
class MySQLPipeline(object):
　　

　　def __init__(self):
　　self.dbpool = adbapi.ConnectionPool("MySQLdb",
　　db = "scrapy",          # 数据库名
　　user = "root",    # 数据库用户名
　　passwd = "qmf123456",    # 密码
　　cursorclass = MySQLdb.cursors.DictCursor,
　　charset = "utf8",
　　use_unicode = False
　　)
　　def process_item(self, item, spider):
　　query = self.dbpool.runInteraction(self._conditional_insert, item)
　　query.addErrback(self.handle_error)
　　return item
　　

　　def _conditional_insert(self, tb, item):
　　

　　tb.execute("insert into douban (name, author, press, date, page, price, score, ISBN, author_profile,\
　　content_description, link) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",\
　　(item["name"], item["author"], item["press"], item["date"],\
　　item["page"], item["price"], item["score"], item["ISBN"],\
　　item["author_profile"], item["content_description"], item["link"]))
　　log.msg("Item data in db: %s" % item, level=log.DEBUG)
　　

　　def handle_error(self, e):
　　log.err(e)

账号		自动登录	找回密码
密码			立即注册

Centos6.5×64安装配置openmeetings3.0.3详

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

[经验分享] scrapy爬虫结果插入mysql数据库

浏览过的版块

扫码加入运维网微信交流群