Exemplo n.º 1
0
    def scan_list(self, target, exists):
        self.limited_forward_count = target.limited_forward_count
        self.limited_attitude_count = target.limited_attitude_count

        list = []
        result_list = []

        """模拟登陆"""
        status = 'you got it'

        """如果登陆成功"""
        if status != '':
            self.loops(target,exists,list)
            if len(list) < 1:
                return (0, (target, None, None, None))
        else:
            LogGo.warning("Weibo: Loop scan faild!")
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list = self.purify(list)
            list.reverse()

            for item in list:
                if exists.count(item['id']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return(-1, (target, None, None, None))
Exemplo n.º 2
0
    def send_to_queue(result):
        """
        传送
        code: 0 到达最大访问频率
              1 正常结果
        :param request:
        :param result:
        :return:
        """
        global target_mutex, target_count, target_transported_count, all_target_transported

        code, value = result
        target, detail_page_bundle_list, content_ruler, encode = value

        if code == 1:
            for detail_page_bundle in detail_page_bundle_list:
                target_producer.target_queue.queue.put(
                    (target, detail_page_bundle, content_ruler, encode))
                ScrabingTarget.set_last_access_date(target.id)
        else:
            LogGo.error("List Page Error:" + str(target.data_key) + " Code: " +
                        str(code))
            ScrabingTarget.set_elog(target.id, "error code: " + str(code))

        if target_mutex.acquire():
            if target_count == target_transported_count:
                all_target_transported = True
            else:
                target_transported_count += 1
                LogGo.debug('target_transported_count: ' +
                            str(target_transported_count))

            target_mutex.release()
Exemplo n.º 3
0
    def scan_list(self, target, exists):
        list = []
        result_list = []

        cap = 'data'

        ruler = 'author:author;title:title;date:posttime;img:picurl;link:url;top:top;click:readnum_newest;vote_up:likenum_newest;subject:content'

        url = self.url.format(target.extra0, target.wx_hao)
        header = {'X-Requested-With': 'XMLHttpRequest'}

        raw = RequestHelper.get(url, header=header, file_cookie=Configs.gsdata_cookie_file)

        try:
            self.looper_js(list, raw, exists, ruler, cap)
        except Exception as e:
            E.out_err(e)
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list = self.sort(list)
            list.reverse()

            for item in list:
                if exists.count(item['title']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return(-1, (target, None, None, None))
Exemplo n.º 4
0
    def start_mormal_mission(self):
        global all_target_transported

        target_list = get_target_list()

        self.target_producer = target_producer(target_list,
                                               self.config.target_pool_size,
                                               self.config.target_queue_size)
        self.target_consumer = target_consumer()
        self.upload_consumer = upload_consumer(self.config.uploader_queue_size)

        self.upload_consumer.start()
        self.target_consumer.start()
        self.target_producer.start()

        # self.target_producer.pool.close()
        # self.target_producer.pool.join()

        while True:  # LogGo.debug(">>> target queue unfinishd count: " + str(self.target_producer.target_queue.queue.unfinished_tasks))
            time.sleep(5)
            LogGo.debug("target_transported_over: " +
                        str(target_producer.is_all_target_transported()))

        # self.target_consumer.queue.queue.join()
        # time.sleep(6000)

        LogGo.info('Loop Done! task count: ' + str(len(target_list)))
        SMTPServer.launch_mission_report()
Exemplo n.º 5
0
    def scan_list(self, target, exists):
        """请求参数"""
        par = (['flag', 'true'], ['uuid', target.extra0])
        """抓取关键字"""
        keys = [
            'title', 'author', 'publicTime', 'url', 'clicksCount', 'likeCount',
            'publicTime', 'summary'
        ]

        list = []
        result_list = []

        try:
            raw = RequestHelper.post(NewrankRuler.url,
                                     par,
                                     file_cookie=Configs.newrank_cookie_file)
        except Exception as e:
            import traceback
            msg = traceback.format_exc()
            # print(msg)
            LogGo.warning(msg)
            return (-1, (target, None, None, None))

        try:
            list = ExtraJSON.extra_newrank_wechat_list(raw, keys)
        except:
            return (-1, (target, None, None, None))

        if len(list) > 0:
            list.reverse()

            for item in list:
                if exists.count(item['title']) < 1:
                    result_list.append(item)

            LogGo.debug('newrank list length:' + str(len(result_list)))

        if len(result_list) > 0:
            return (1, (target, list, None, None))
        return (-1, (target, None, None, None))
Exemplo n.º 6
0
    def fetch_detail(self, value):
        """value: (target, original dic, ruler, encode  )
        """
        LogGo.debug("in detail :" + str(value))
        target, detail_page_dic, content_ruler, encode = value

        delegate = None

        if target.type == 'ulweb' or target.type == 'jsweb':
            delegate = self.web.scrape_detail
        elif target.type == 'newrank':
            delegate = self.wechat.newrank_detail
        elif target.type == 'gsdata':
            delegate = self.wechat.gs_detail
        elif target.type == 'weibo':
            delegate = self.weibo.weibo_detail

        code, result_dic = delegate(target, detail_page_dic, content_ruler,
                                    encode)

        if code is 1:
            self.upload_queue.queue.put(result_dic)
        else:
            LogGo.error("Detail Error: " + str(detail_page_dic))