def saveToSqlite(spider_info,id): # 获取spider_info字典中的信息 jingdong_good_id = spider_info['goods_id'] site_id = id # 连接数据库并插入相应数据 mongodb_conn = mongodb_options.mongodb_init_uestc() print "-------------->" print id mongodb_options.insert_goods_id(mongodb_conn,jingdong_good_id,site_id)
def saveToSqlite(spider_info, id): # 获取spider_info字典中的信息 title = spider_info['title'] context = spider_info['context'] url = "tieba.baidu.com" + spider_info['url'] auther = spider_info['auther'] create_time = spider_info['create_time'] site_id = id # 连接数据库并插入相应数据 mongodb_conn = mongodb_options.mongodb_init_uestc() print "-------------->" print id result1 = mongodb_options.insert_crawlinginfo(mongodb_conn, title, context, url, auther, create_time, id)
def saveToSqlite(spider_info, id): # 获取spider_info字典中的信息 usr_id = spider_info["usr_id"] goods_id = spider_info["goods_id"] referenceName = spider_info["referenceName"] content = spider_info["content"] score = spider_info["score"] creationTime = spider_info["creationTime"] site_id = id # 连接数据库并插入相应数据 mongodb_conn = mongodb_options.mongodb_init_uestc() print "-------------->" print id mongodb_options.insert_goods_comments(mongodb_conn, usr_id, goods_id, referenceName, content, score, creationTime, site_id)
def spider_server(site_data, id): if site_data[3] == 'baidutieba': #爬取百度贴吧 baidutieba_spider.startGrab(site_data[1], id) elif site_data[3] == 'jingdongpinglun': #爬取京东商品评论 #1、爬取商品列表 jingdonggoods_spider.startGrab(site_data[2], id) #2、根据商品列表爬取商品的评论 #2.1、获取商品id mongodb_conn = mongodb_options.mongodb_init_uestc() jingdong_goods_id_list = mongodb_options.jingdonggoods_find_all( mongodb_conn, id) print "00000000000000000000000000000000" print len(jingdong_goods_id_list) for jingdong_goods_iid in jingdong_goods_id_list: goods_iid = jingdong_goods_iid['goods_id'] print goods_iid print "1111111111111111111111111111111" for jingdong_goods_id in jingdong_goods_id_list: goods_id = jingdong_goods_id['goods_id'] jingdongcomments_spider.startGrab(goods_id, id) return
#! /usr/bin/env python # coding=utf-8 import os import uuid from django.shortcuts import HttpResponseRedirect from django.shortcuts import render from BigSpider_app.DataBase import mysql_options, redis_options, mongodb_options db = mongodb_options.mongodb_init_spider_ms() uestc = mongodb_options.mongodb_init_uestc() uestc_redis = redis_options.redis_init() # mysql_conn = mysql_options.mysql_init() # 在url.py中,网页中名称 view中名称 html网页中名称 def pre_index(request): return render(request, 'pre_index.html') # 用户页面 def index(request): flag = False if "username" in request.session: username = request.session['username'] flag = True if flag: # 从redis中读取当前爬虫队列长度(爬取的url) crawling_queue = redis_options.crawling_queue(uestc_redis)