def check(self, list_data, offset, cnt): """ :param list_data: 请求返回的结果 :param offset: :return: 带着本次请求的参数和结果一起过安检 请求失败导致安检不通过 安检提醒人重新操作手机 操作完之后再次发起请求 不排除还是会失败 继续调用自己 """ if list_data != 'req_data_error': stop_and_start.check({'crawler': '历史文章列表', 'msg': 'success'}) else: stop_and_start.check({ 'crawler': '历史文章列表', 'msg': 'req_data_error' }) self.wx_req_data_list = rd.tidy() while len(self.wx_req_data_list) == 0: self.wx_req_data_list = rd.tidy() from utils.front import notification notification('没有发现参数', '参数错误', _type='error') time.sleep(3) list_data = Crawler(offset, self.wx_req_data_list[0]).run() self.check(list_data, offset, cnt) return list_data
def __init__(self): self.wx_req_data_list = rd.tidy() self.nickname = self.wx_req_data_list[0]['nickname'] self.every_delay = 3.0 self.wx_num = len(self.wx_req_data_list) self.delay = round(self.every_delay / self.wx_num, 3) self.articles = [] self.col_data = CollectionOperation(self.nickname) self.pre_crawl_time = time.time()
def check(self, reading_data, item): """ :return: 带着本次请求的参数和结果一起过安检 请求失败导致安检不通过 安检提醒人重新操作手机 操作完之后再次发起请求 不排除还是会失败 继续调用自己 反正想办法让其获得成功的请求 最后返回成功的请求 """ if reading_data != 'req_data_error': stop_and_start.check({'crawler': '阅读数据', 'msg': 'success'}) else: stop_and_start.check({'crawler': '阅读数据', 'msg': 'req_data_error'}) self.wx_req_data_list = rd.tidy() while len(self.wx_req_data_list) == 0: self.wx_req_data_list = rd.tidy() from utils.front import notification notification('没有发现参数', '参数错误', _type='error') time.sleep(3) reading_data = Crawler(item[1], item[2], self.wx_req_data_list[0]).run() self.check(reading_data, item) return reading_data
def __init__(self): # 从数据源获取的请求参数 self.wx_req_data_list = rd.tidy() # 微信昵称 self.nickname = self.wx_req_data_list[0]['nickname'] # 同一个微信账号两次请求之间的时间间隔 self.every_delay = 3.0 # 参加采集微信的数量 self.wx_num = len(self.wx_req_data_list) # 多个微信账号的情况下时间间隔 self.delay = round(self.every_delay/self.wx_num, 3) # 所有需要采集的文章 self.articles = [] # 数据库操作 self.col_data = CollectionOperation(self.nickname) # 上一次请求时间 self.pre_crawl_time = time.time()
def get_all_article(worker_num=5, process=None): global article_data_buffer global col_data global front_process global nickname front_process = process article_data_buffer = [] from instance import rd nickname = rd.tidy()[0]['nickname'] col_data = CollectionOperation(nickname) rc = RequestContent() rc.prepare_articles(nickname, worker_num=16, ip_num=1, need_proxy=use_proxy_directly()) rc.run_crawlers() rc.join_cralwers() TaskRecoder.print_ts()
def crawl(self): from utils.front import message_box from cmp.protect import Passport if not Passport.check_password(): message_box('请先通过使用说明书中的方法获得授权有效授权证书', '授权无效 不可采集数据', 'error') return if len(rd.tidy()) == 0: return from app.api.process import Process crange = int(self.filter['range']) process = Process(crange) import builtins builtins.crawler_process = process if crange == 0: process.new_step() self.crawler_article_list(process) else: if crange == 25: process.new_step() self.crawler_article_list(process) process.new_step() self.crawler_article(process) else: if crange == 50: process.new_step() self.crawler_article_list(process) process.new_step() self.crawler_reading_data(process) else: if crange == 75: process.new_step() self.crawler_article_list(process) process.new_step() self.crawler_article(process) process.new_step() self.crawler_reading_data(process) else: if crange == 100: process.new_step() self.crawler_reading_data(process) process.send_finish() message_box('总共用时%d分钟' % int((time.time() - self.begin_time) / 60), '采集完成', 'success')
def __init__(self): self.wx_req_data_list = rd.tidy()