def scan_list(self, target, exists): self.limited_forward_count = target.limited_forward_count self.limited_attitude_count = target.limited_attitude_count list = [] result_list = [] """模拟登陆""" status = 'you got it' """如果登陆成功""" if status != '': self.loops(target,exists,list) if len(list) < 1: return (0, (target, None, None, None)) else: LogGo.warning("Weibo: Loop scan faild!") return (-1, (target, None, None, None)) if len(list) > 0: list = self.purify(list) list.reverse() for item in list: if exists.count(item['id']) < 1: result_list.append(item) LogGo.debug('newrank list length:' + str(len(result_list))) if len(result_list) > 0: return (1, (target, list, None, None)) return(-1, (target, None, None, None))
def send_to_queue(result): """ 传送 code: 0 到达最大访问频率 1 正常结果 :param request: :param result: :return: """ global target_mutex, target_count, target_transported_count, all_target_transported code, value = result target, detail_page_bundle_list, content_ruler, encode = value if code == 1: for detail_page_bundle in detail_page_bundle_list: target_producer.target_queue.queue.put( (target, detail_page_bundle, content_ruler, encode)) ScrabingTarget.set_last_access_date(target.id) else: LogGo.error("List Page Error:" + str(target.data_key) + " Code: " + str(code)) ScrabingTarget.set_elog(target.id, "error code: " + str(code)) if target_mutex.acquire(): if target_count == target_transported_count: all_target_transported = True else: target_transported_count += 1 LogGo.debug('target_transported_count: ' + str(target_transported_count)) target_mutex.release()
def scan_list(self, target, exists): list = [] result_list = [] cap = 'data' ruler = 'author:author;title:title;date:posttime;img:picurl;link:url;top:top;click:readnum_newest;vote_up:likenum_newest;subject:content' url = self.url.format(target.extra0, target.wx_hao) header = {'X-Requested-With': 'XMLHttpRequest'} raw = RequestHelper.get(url, header=header, file_cookie=Configs.gsdata_cookie_file) try: self.looper_js(list, raw, exists, ruler, cap) except Exception as e: E.out_err(e) return (-1, (target, None, None, None)) if len(list) > 0: list = self.sort(list) list.reverse() for item in list: if exists.count(item['title']) < 1: result_list.append(item) LogGo.debug('newrank list length:' + str(len(result_list))) if len(result_list) > 0: return (1, (target, list, None, None)) return(-1, (target, None, None, None))
def start_mormal_mission(self): global all_target_transported target_list = get_target_list() self.target_producer = target_producer(target_list, self.config.target_pool_size, self.config.target_queue_size) self.target_consumer = target_consumer() self.upload_consumer = upload_consumer(self.config.uploader_queue_size) self.upload_consumer.start() self.target_consumer.start() self.target_producer.start() # self.target_producer.pool.close() # self.target_producer.pool.join() while True: # LogGo.debug(">>> target queue unfinishd count: " + str(self.target_producer.target_queue.queue.unfinished_tasks)) time.sleep(5) LogGo.debug("target_transported_over: " + str(target_producer.is_all_target_transported())) # self.target_consumer.queue.queue.join() # time.sleep(6000) LogGo.info('Loop Done! task count: ' + str(len(target_list))) SMTPServer.launch_mission_report()
def scan_list(self, target, exists): """请求参数""" par = (['flag', 'true'], ['uuid', target.extra0]) """抓取关键字""" keys = [ 'title', 'author', 'publicTime', 'url', 'clicksCount', 'likeCount', 'publicTime', 'summary' ] list = [] result_list = [] try: raw = RequestHelper.post(NewrankRuler.url, par, file_cookie=Configs.newrank_cookie_file) except Exception as e: import traceback msg = traceback.format_exc() # print(msg) LogGo.warning(msg) return (-1, (target, None, None, None)) try: list = ExtraJSON.extra_newrank_wechat_list(raw, keys) except: return (-1, (target, None, None, None)) if len(list) > 0: list.reverse() for item in list: if exists.count(item['title']) < 1: result_list.append(item) LogGo.debug('newrank list length:' + str(len(result_list))) if len(result_list) > 0: return (1, (target, list, None, None)) return (-1, (target, None, None, None))
def fetch_detail(self, value): """value: (target, original dic, ruler, encode ) """ LogGo.debug("in detail :" + str(value)) target, detail_page_dic, content_ruler, encode = value delegate = None if target.type == 'ulweb' or target.type == 'jsweb': delegate = self.web.scrape_detail elif target.type == 'newrank': delegate = self.wechat.newrank_detail elif target.type == 'gsdata': delegate = self.wechat.gs_detail elif target.type == 'weibo': delegate = self.weibo.weibo_detail code, result_dic = delegate(target, detail_page_dic, content_ruler, encode) if code is 1: self.upload_queue.queue.put(result_dic) else: LogGo.error("Detail Error: " + str(detail_page_dic))