Пример #1
0
# coding=utf-8
import datetime
import blog_spider
import mood_spider
import friend_spider
import information_spider
import public_methods
from multiprocessing.dummy import Pool

import my_log
log = my_log.getLogger("SpideController.log",
                       "spider_controller.SpideController")


class SpideController(object):
    """ 功能:控制去抓取日志、说说、个人信息,并保存到MongoDB """
    def __init__(self, my_messages=None):
        self.my_messages = my_messages
        self.changer = public_methods.Changing(
            self.my_messages)  # 新建一个对象,用来更换QQ,更换Cookie

    def beginer(self):
        restNum = self.my_messages.rconn.llen('QQSpider:QQForSpide')
        while restNum > 0:
            step = restNum if restNum < 1000 else 1000
            pool = Pool(self.my_messages.thread_num_QQ)
            pool.map(self.store_dairy, range(step))
            pool.close()
            pool.join()
            restNum = self.my_messages.rconn.llen('QQSpider:QQForSpide')
Пример #2
0
# coding=utf-8
import re
import datetime
import itertools
from bs4 import BeautifulSoup
from multiprocessing.dummy import Pool

import my_log

log = my_log.getLogger()


class BlogSpider(object):
    """ 功能:爬取QQ日志 """
    def __init__(self, spiderMessage, changer):
        self.message = spiderMessage
        self.changer = changer

    def beginer(self):
        blog_list = self.get_blog_list()  # 获取日志ID列表
        if blog_list:
            pool = Pool(self.changer.my_messages.thread_num_Blog)
            myBlog = pool.map(
                self.get_blog,
                itertools.izip(blog_list.keys(), blog_list.values()))
            pool.close()
            pool.join()
            fail = myBlog.count(-1)  # 对于获取失败的日志,需要清除
            for i in range(fail):
                myBlog.remove(-1)
            return myBlog