Пример #1
0
    def check(self, list_data, offset, cnt):
        """
        :param list_data: 请求返回的结果
        :param offset:
        :return: 带着本次请求的参数和结果一起过安检
        请求失败导致安检不通过 安检提醒人重新操作手机 操作完之后再次发起请求
        不排除还是会失败  继续调用自己
        """
        if list_data != 'req_data_error':
            stop_and_start.check({'crawler': '历史文章列表', 'msg': 'success'})
        else:
            stop_and_start.check({
                'crawler': '历史文章列表',
                'msg': 'req_data_error'
            })
            self.wx_req_data_list = rd.tidy()
            while len(self.wx_req_data_list) == 0:
                self.wx_req_data_list = rd.tidy()
                from utils.front import notification
                notification('没有发现参数', '参数错误', _type='error')
                time.sleep(3)

            list_data = Crawler(offset, self.wx_req_data_list[0]).run()
            self.check(list_data, offset, cnt)
        return list_data
Пример #2
0
 def __init__(self):
     self.wx_req_data_list = rd.tidy()
     self.nickname = self.wx_req_data_list[0]['nickname']
     self.every_delay = 3.0
     self.wx_num = len(self.wx_req_data_list)
     self.delay = round(self.every_delay / self.wx_num, 3)
     self.articles = []
     self.col_data = CollectionOperation(self.nickname)
     self.pre_crawl_time = time.time()
Пример #3
0
    def check(self, reading_data, item):
        """
        :return: 带着本次请求的参数和结果一起过安检
        请求失败导致安检不通过 安检提醒人重新操作手机 操作完之后再次发起请求
        不排除还是会失败  继续调用自己 反正想办法让其获得成功的请求  最后返回成功的请求
        """
        if reading_data != 'req_data_error':
            stop_and_start.check({'crawler': '阅读数据', 'msg': 'success'})
        else:
            stop_and_start.check({'crawler': '阅读数据', 'msg': 'req_data_error'})
            self.wx_req_data_list = rd.tidy()
            while len(self.wx_req_data_list) == 0:
                self.wx_req_data_list = rd.tidy()
                from utils.front import notification
                notification('没有发现参数', '参数错误', _type='error')
                time.sleep(3)

            reading_data = Crawler(item[1], item[2],
                                   self.wx_req_data_list[0]).run()
            self.check(reading_data, item)
        return reading_data
Пример #4
0
 def __init__(self):
     # 从数据源获取的请求参数
     self.wx_req_data_list = rd.tidy()
     # 微信昵称
     self.nickname = self.wx_req_data_list[0]['nickname']
     # 同一个微信账号两次请求之间的时间间隔
     self.every_delay = 3.0
     # 参加采集微信的数量
     self.wx_num = len(self.wx_req_data_list)
     # 多个微信账号的情况下时间间隔
     self.delay = round(self.every_delay/self.wx_num, 3)
     # 所有需要采集的文章
     self.articles = []
     # 数据库操作
     self.col_data = CollectionOperation(self.nickname)
     # 上一次请求时间
     self.pre_crawl_time = time.time()
Пример #5
0
def get_all_article(worker_num=5, process=None):
    global article_data_buffer
    global col_data
    global front_process
    global nickname
    front_process = process
    article_data_buffer = []
    from instance import rd
    nickname = rd.tidy()[0]['nickname']
    col_data = CollectionOperation(nickname)
    rc = RequestContent()
    rc.prepare_articles(nickname,
                        worker_num=16,
                        ip_num=1,
                        need_proxy=use_proxy_directly())
    rc.run_crawlers()
    rc.join_cralwers()
    TaskRecoder.print_ts()
Пример #6
0
 def crawl(self):
     from utils.front import message_box
     from cmp.protect import Passport
     if not Passport.check_password():
         message_box('请先通过使用说明书中的方法获得授权有效授权证书', '授权无效 不可采集数据', 'error')
         return
     if len(rd.tidy()) == 0:
         return
     from app.api.process import Process
     crange = int(self.filter['range'])
     process = Process(crange)
     import builtins
     builtins.crawler_process = process
     if crange == 0:
         process.new_step()
         self.crawler_article_list(process)
     else:
         if crange == 25:
             process.new_step()
             self.crawler_article_list(process)
             process.new_step()
             self.crawler_article(process)
         else:
             if crange == 50:
                 process.new_step()
                 self.crawler_article_list(process)
                 process.new_step()
                 self.crawler_reading_data(process)
             else:
                 if crange == 75:
                     process.new_step()
                     self.crawler_article_list(process)
                     process.new_step()
                     self.crawler_article(process)
                     process.new_step()
                     self.crawler_reading_data(process)
                 else:
                     if crange == 100:
                         process.new_step()
                         self.crawler_reading_data(process)
     process.send_finish()
     message_box('总共用时%d分钟' % int((time.time() - self.begin_time) / 60),
                 '采集完成', 'success')
Пример #7
0
 def __init__(self):
     self.wx_req_data_list = rd.tidy()