redis语法,python使用redis_郑*杰的博客-CSDN博客
python-pymongo模块_郑*杰的博客-CSDN博客
python操作mysql数据库_郑*杰的博客-CSDN博客
基本步骤:python—scrapy数据解析、存储_郑*杰的博客-CSDN博客
正文:
当前文件:D:\python_test\scrapyProject\scrapyProject\settings.pyITEM_PIPELINES = {#数字表示管道类被执行的优先级,数字越小表示优先级越高'xiaoshuoPro.pipelines.MysqlPipeline': 300,'xiaoshuoPro.pipelines.RedisPipeLine': 301,'xiaoshuoPro.pipelines.MongoPipeline': 302,
}
当前文件:D:\python_test\scrapyProject\scrapyProject\pipelines.pyfrom itemadapter import ItemAdapter
import pymysql
import redis
import pymongo# 数据存储到mysql
class MysqlPipeline:def open_spider(self,spider):self.conn = pymysql.Connect(host = '127.0.0.1',port = 3306,user = 'root',password = 'root',db = 'test',charset = 'utf8')self.cursor = self.conn.cursor()def process_item(self, item, spider):title = item['title']sql = f'insert into xiaoshuo (title) values ("{title}")'self.cursor.execute(sql)self.conn.commit()print('成功写入一条数据!')# 爬虫文件只会将item提交给优先级最高的管道类。优先级最高的管道类的process_item中需要写return item操作,该操作表示将item对象传递给下一个管道类return itemdef close_spider(self,spider):self.cursor.close()self.conn.close()# 数据存储到redis中
class RedisPipeLine:def open_spider(self,spider):self.conn = redis.Redis(host='127.0.0.1',port=6379)def process_item(self,item,spider):self.conn.lpush('xiaoshuo',item)print('数据存储redis成功!')return itemdef close_spider(self,spider):self.conn.close()# 数据存储到Mongo中
class MongoPipeline:def open_spider(self, spider):self.conn = pymongo.MongoClient(host='127.0.0.1',port=27017)self.db_test = self.conn['test']def process_item(self, item, spider):self.db_test['xiaoshuo'].insert_one({'title': item['item_title']})print('插入成功!')return itemdef close_spider(self,spider):self.conn.close()