def dump(self, cookies, account, path=None, type_=PickleFileType.cookie): if not path: path = '../../data/' account = account['username'] data = pickle.dumps(cookies) data_dir = Path(__file__).parent.joinpath(path) data_dir.mkdir(exist_ok=True) key = self.gen_by_account(account=account, type_=type_) data_file = data_dir.joinpath(key) data_file.write_bytes(data) logger.info('dump data success,account:[%s],type:[%s]', account, type_)
def execute(self, num, date_str=None): # if not date_str: # date_str = yesterday() tasks = self.execute_sycm_product_actions() logger.info("start to execute sycm tasks,tasks length:%s", len(tasks)) i = 0 for task in tasks: if i < num: self._execute_sycm_product_actions(task) # self._executor.submit(self._execute_sycm_product_actions, task) i += 1 time.sleep(15)
def get_sign_js(): data_file = Path(__file__).parent.joinpath('../../../js/' + 'sign.js') if not data_file.exists(): return None try: text = data_file.read_text() return text # ctx = execjs.compile(text) # print(ctx.call('p', 'aaa')) logger.info('load sign.js success') except Exception as e: logger.error('load sign.js error,exp:%s', e) raise e
def load(self, account, path=None, type_=PickleFileType.cookie): if not path: path = '../../data/' account = account['username'] key = self.gen_by_account(account=account, type_=type_) if type_ == PickleFileType.cookie: # cookies = self.cookies_dict.get(key, None) # if cookies: # logger.info('load mem cookies success,account:[%s]', account) # return cookies data_file = Path(__file__).parent.joinpath(path + self.gen_by_account(account=account, type_=type_)) if not data_file.exists(): return None try: bytes = data_file.read_bytes() cookies = pickle.loads(bytes) logger.info('load cookies success,account:[%s]', account) self.cookies_dict[key] = cookies return cookies except Exception as e: logger.error('load cookies error,account:[%s],exp:%s', account, e) raise e elif type_ == PickleFileType.origin_cookie: data_file = Path(__file__).parent.joinpath(path + self.gen_by_account(account=account, type_=type_)) if not data_file.exists(): return None try: bytes = data_file.read_bytes() cookies = pickle.loads(bytes) logger.info('load origin cookies success,account:[%s]', account) return cookies except Exception as e: logger.error('load lorigin cookies error,account:[%s],exp:%s', account, e) raise e else: data_file = Path(__file__).parent.joinpath(path + self.gen_by_account(account=account, type_=type_)) if not data_file.exists(): return None try: bytes = data_file.read_bytes() legality_token = pickle.loads(bytes) logger.info('load legality_token success,account:[%s]', account) return legality_token except Exception as e: logger.error('load legality_token error,account:[%s],exp:%s', account, e) return ''
def unmarshal(self, context, response): result = response.json() logger.info('proxy result:%s', result) return result['data']
if i < num: future_tasks[self._executor.submit( self._execut_taobao_detail_actions, task, proxy)] = task i += 1 for future in as_completed(future_tasks): try: proxy = future.result() if proxy: self._proxy.remove_proxy(url=proxy['https']) except Exception as e: logger.error(e) def init(self): super().init() def init_argparse(self, parser): super().init_argparse(parser) def process(self): # return super().process() self.execute(5) time.sleep(3) if __name__ == "__main__": s = TaobaoDetailPageJob(40) logger.info("start to execute taobao_detail_page job") s.run() # s.process() logger.error("exit taobao_detail_page job")
# -*- coding: utf-8 -*- from apscheduler.schedulers.blocking import BlockingScheduler from config.config_loader import logger from mall_spider.spiders.actions.action_service import ActionService class SycmScheduleJob(ActionService, BlockingScheduler): def __init__(self): super().__init__() def handle(self): # self.execute_sycm_category_job_init_actions() self.add_job(self.execute_sycm_category_job_init_actions, 'cron', day_of_week='0-6', hour=10, minute=30, second=0) def run(self): self.handle() self.start() if __name__ == "__main__": s = SycmScheduleJob() logger.info("start to execute sycm_schedule job") s.run() # jobs = s.get_jobs() # print(jobs) logger.error("exit sycm_schedule job")
def test_log(self): logger.info(u'中文')
from mall_spider.spiders.actions.executor_service import ExecutorService from mall_spider.utils.date_util import yesterday class DirectSyncJob(ActionService, Smorf): def __init__(self, pool_size): super().__init__() self._executor = ExecutorService(pool_size) def execute(self, num, date_str=None): # date_str = yesterday().strftime("%Y-%m-%d") date_str = '2019-01-17' self.execute_direct_good_actions(date_str=date_str) def init(self): super().init() def init_argparse(self, parser): super().init_argparse(parser) def process(self): self.execute(10) time.sleep(10) if __name__ == "__main__": s = DirectSyncJob(1) logger.info("start to execute direct sync job") s.process() logger.info("exit direct sync job")
def _risk(self, stream_risk_dao, account): entity = stream_risk_dao.query_one(_filter=[ CmmSysStreamRisk.type == int(RiskType.taobao_search), CmmSysStreamRisk.raw_data == account['username'] ]) if not entity: entity = CmmSysStreamRisk() entity.raw_data = account['username'] entity.type = int(RiskType.taobao_search) stream_risk_dao.insert_entity(entity=entity) def init(self): super().init() def init_argparse(self, parser): super().init_argparse(parser) def process(self): # return super().process() self.execute(2) time.sleep(10) if __name__ == "__main__": s = TaobaoListPageJob(10) logger.info("start to execute taobao_list_page job") s.run() # s.process() logger.error("exit taobao_list_page job")
tasks = self.execute_sycm_product_actions() logger.info("start to execute sycm tasks,tasks length:%s", len(tasks)) i = 0 for task in tasks: if i < num: self._execute_sycm_product_actions(task) # self._executor.submit(self._execute_sycm_product_actions, task) i += 1 time.sleep(15) def init(self): super().init() def init_argparse(self, parser): super().init_argparse(parser) def process(self): # return super().process() self.execute(10) time.sleep(10) # def run(self): # self.execute(self, maxInt) if __name__ == "__main__": s = SycmJob(1) logger.info("start to execute sycm job") s.run() logger.error("exit sycm job")
self._proxy_service = get_proxy_service() def execute(self): with write_session_scope() as session: _stream_risk_dao = get_stream_risk_dao(session=session) rsts = _stream_risk_dao.base_query.limit(self.account_num).all() if rsts: for item in rsts: username = item.raw_data account = global_config.s_accounts_dict[username] proxy = self._proxy_service.get_origin_static_proxy(account['username']) self._login(account=account, force=True, risk=True, proxy=proxy) _stream_risk_dao.delete(_filter=[CmmSysStreamRisk.id == item.id]) session.commit() def init(self): super().init() def init_argparse(self, parser): super().init_argparse(parser) def process(self): self.execute() if __name__ == "__main__": s = LoginJob(1) logger.info("start to execute login job") s.process() logger.error("exit login job")
def execute_in_retry(self, context, http_request, data=None): method = http_request.method is_update_cookies = context.get(Context.KEY_IS_UPDATE_COOKIES, False) headers = context.get(Context.KEY_HEADERS, '') cookies = context.get(Context.KEY_COOKIES, RequestsCookieJar()) start_time = time.time() retry = int(default_retry) retry_interval = float(default_retry_interval) timeout = 25.0 connect_time_out = int(default_connect_timeout) proxies = context.get(Context.KEY_CURRENT_PROXY, '') account = context.get(Context.KEY_CURRENT_TASK_ACCOUNT, {}) while retry > 0: retry = retry - 1 response = None try: if proxies: logger.info('context key:[%s],proxy inject,[%s]->[%s]', context.context_key, account, proxies) if method == HttpMethod.GET: response = get(url=http_request.url, params=None, headers=headers, cookies=cookies, proxies=proxies) elif method == HttpMethod.POST: response = post(url=http_request.url, data=data, headers=headers, cookies=cookies, proxies=proxies, connect_timeout=connect_time_out, timeout=timeout) logger.debug(u'context key:[%s],action:[%s] execute result:%s', context.context_key, self.__class__.__name__, response.text) if response.status_code != 200: raise StatusCodeException(response.status_code) return response except ProxyError as e: logger.error('proxy error,[%s]->[%s],exp:%s', account, proxies, e) raise ProxyException(e) except ConnectTimeoutError as e: logger.error('proxy error,[%s]->[%s],exp:%s', account, proxies, e) raise ProxyException(e) except ReadTimeout as e: import sys exc_info = sys.exc_info() if time.time() - start_time > timeout or retry == 0: raise e # raise exc_info[0], exc_info[1], exc_info[2] logger.error( u'context key:[%s],action:[%s] execute read time out,exception:%s', context.context_key, self.__class__.__name__, traceback.format_exc()) except ConnectTimeout as e: import sys exc_info = sys.exc_info() if time.time() - start_time > timeout or retry == 0: raise e # raise exc_info[0], exc_info[1], exc_info[2] logger.error( u'context key:[%s],action:[%s] execute connect time out,exception:%s', context.context_key, self.__class__.__name__, traceback.format_exc()) except Exception as e: import sys exc_info = sys.exc_info() if time.time() - start_time > timeout or retry == 0: raise e # raise exc_info[0], exc_info[1], exc_info[2] logger.error( u'context key:[%s],action:[%s] execute error,exception:%s', context.context_key, self.__class__.__name__, traceback.format_exc()) finally: if is_update_cookies and response: cookies.update(response.cookies) time.sleep(retry_interval)
def unmarshal(self, context, response): params_dict = self.__get_list_api_params(html=response.text) logger.info('list api params:%s', params_dict) return params_dict