Пример #1
0
 def dump(self, cookies, account, path=None, type_=PickleFileType.cookie):
     if not path:
         path = '../../data/'
     account = account['username']
     data = pickle.dumps(cookies)
     data_dir = Path(__file__).parent.joinpath(path)
     data_dir.mkdir(exist_ok=True)
     key = self.gen_by_account(account=account, type_=type_)
     data_file = data_dir.joinpath(key)
     data_file.write_bytes(data)
     logger.info('dump data success,account:[%s],type:[%s]', account, type_)
Пример #2
0
 def execute(self, num, date_str=None):
     # if not date_str:
     #     date_str = yesterday()
     tasks = self.execute_sycm_product_actions()
     logger.info("start to execute sycm tasks,tasks length:%s", len(tasks))
     i = 0
     for task in tasks:
         if i < num:
             self._execute_sycm_product_actions(task)
             # self._executor.submit(self._execute_sycm_product_actions, task)
             i += 1
             time.sleep(15)
Пример #3
0
def get_sign_js():
    data_file = Path(__file__).parent.joinpath('../../../js/' + 'sign.js')
    if not data_file.exists():
        return None
    try:
        text = data_file.read_text()
        return text
        # ctx = execjs.compile(text)
        # print(ctx.call('p', 'aaa'))
        logger.info('load sign.js success')
    except Exception as e:
        logger.error('load sign.js error,exp:%s', e)
        raise e
Пример #4
0
    def load(self, account, path=None, type_=PickleFileType.cookie):
        if not path:
            path = '../../data/'
        account = account['username']
        key = self.gen_by_account(account=account, type_=type_)

        if type_ == PickleFileType.cookie:
            # cookies = self.cookies_dict.get(key, None)
            # if cookies:
            #     logger.info('load mem cookies success,account:[%s]', account)
            #     return cookies

            data_file = Path(__file__).parent.joinpath(path + self.gen_by_account(account=account, type_=type_))
            if not data_file.exists():
                return None
            try:
                bytes = data_file.read_bytes()
                cookies = pickle.loads(bytes)
                logger.info('load cookies success,account:[%s]', account)
                self.cookies_dict[key] = cookies
                return cookies
            except Exception as e:
                logger.error('load cookies error,account:[%s],exp:%s', account, e)
                raise e
        elif type_ == PickleFileType.origin_cookie:
            data_file = Path(__file__).parent.joinpath(path + self.gen_by_account(account=account, type_=type_))
            if not data_file.exists():
                return None
            try:
                bytes = data_file.read_bytes()
                cookies = pickle.loads(bytes)
                logger.info('load origin cookies success,account:[%s]', account)
                return cookies
            except Exception as e:
                logger.error('load lorigin cookies error,account:[%s],exp:%s', account, e)
                raise e
        else:
            data_file = Path(__file__).parent.joinpath(path + self.gen_by_account(account=account, type_=type_))
            if not data_file.exists():
                return None
            try:
                bytes = data_file.read_bytes()
                legality_token = pickle.loads(bytes)
                logger.info('load legality_token success,account:[%s]', account)
                return legality_token
            except Exception as e:
                logger.error('load legality_token error,account:[%s],exp:%s', account, e)
                return ''
Пример #5
0
 def unmarshal(self, context, response):
     result = response.json()
     logger.info('proxy result:%s', result)
     return result['data']
Пример #6
0
            if i < num:
                future_tasks[self._executor.submit(
                    self._execut_taobao_detail_actions, task, proxy)] = task
            i += 1
        for future in as_completed(future_tasks):
            try:
                proxy = future.result()
                if proxy:
                    self._proxy.remove_proxy(url=proxy['https'])
            except Exception as e:
                logger.error(e)

    def init(self):
        super().init()

    def init_argparse(self, parser):
        super().init_argparse(parser)

    def process(self):
        # return super().process()
        self.execute(5)
        time.sleep(3)


if __name__ == "__main__":
    s = TaobaoDetailPageJob(40)
    logger.info("start to execute taobao_detail_page job")
    s.run()
    # s.process()
    logger.error("exit taobao_detail_page job")
Пример #7
0
# -*- coding: utf-8 -*-
from apscheduler.schedulers.blocking import BlockingScheduler

from config.config_loader import logger
from mall_spider.spiders.actions.action_service import ActionService


class SycmScheduleJob(ActionService, BlockingScheduler):

    def __init__(self):
        super().__init__()

    def handle(self):
        # self.execute_sycm_category_job_init_actions()
        self.add_job(self.execute_sycm_category_job_init_actions, 'cron', day_of_week='0-6', hour=10, minute=30,
                     second=0)

    def run(self):
        self.handle()
        self.start()


if __name__ == "__main__":
    s = SycmScheduleJob()
    logger.info("start to execute sycm_schedule job")
    s.run()
    # jobs = s.get_jobs()
    # print(jobs)
    logger.error("exit sycm_schedule job")
Пример #8
0
 def test_log(self):
     logger.info(u'中文')
Пример #9
0
from mall_spider.spiders.actions.executor_service import ExecutorService
from mall_spider.utils.date_util import yesterday


class DirectSyncJob(ActionService, Smorf):
    def __init__(self, pool_size):
        super().__init__()
        self._executor = ExecutorService(pool_size)

    def execute(self, num, date_str=None):
        # date_str = yesterday().strftime("%Y-%m-%d")
        date_str = '2019-01-17'
        self.execute_direct_good_actions(date_str=date_str)

    def init(self):
        super().init()

    def init_argparse(self, parser):
        super().init_argparse(parser)

    def process(self):
        self.execute(10)
        time.sleep(10)


if __name__ == "__main__":
    s = DirectSyncJob(1)
    logger.info("start to execute direct sync job")
    s.process()
    logger.info("exit direct sync job")
Пример #10
0
    def _risk(self, stream_risk_dao, account):
        entity = stream_risk_dao.query_one(_filter=[
            CmmSysStreamRisk.type == int(RiskType.taobao_search),
            CmmSysStreamRisk.raw_data == account['username']
        ])
        if not entity:
            entity = CmmSysStreamRisk()
            entity.raw_data = account['username']
            entity.type = int(RiskType.taobao_search)
            stream_risk_dao.insert_entity(entity=entity)

    def init(self):
        super().init()

    def init_argparse(self, parser):
        super().init_argparse(parser)

    def process(self):
        # return super().process()
        self.execute(2)
        time.sleep(10)


if __name__ == "__main__":
    s = TaobaoListPageJob(10)
    logger.info("start to execute taobao_list_page job")
    s.run()
    # s.process()
    logger.error("exit taobao_list_page job")
Пример #11
0
        tasks = self.execute_sycm_product_actions()
        logger.info("start to execute sycm tasks,tasks length:%s", len(tasks))
        i = 0
        for task in tasks:
            if i < num:
                self._execute_sycm_product_actions(task)
                # self._executor.submit(self._execute_sycm_product_actions, task)
                i += 1
                time.sleep(15)

    def init(self):
        super().init()

    def init_argparse(self, parser):
        super().init_argparse(parser)

    def process(self):
        # return super().process()
        self.execute(10)
        time.sleep(10)

    # def run(self):
    #     self.execute(self, maxInt)


if __name__ == "__main__":
    s = SycmJob(1)
    logger.info("start to execute sycm job")
    s.run()
    logger.error("exit sycm job")
Пример #12
0
        self._proxy_service = get_proxy_service()

    def execute(self):
        with write_session_scope() as session:
            _stream_risk_dao = get_stream_risk_dao(session=session)
            rsts = _stream_risk_dao.base_query.limit(self.account_num).all()
            if rsts:
                for item in rsts:
                    username = item.raw_data
                    account = global_config.s_accounts_dict[username]
                    proxy = self._proxy_service.get_origin_static_proxy(account['username'])
                    self._login(account=account, force=True, risk=True, proxy=proxy)
                    _stream_risk_dao.delete(_filter=[CmmSysStreamRisk.id == item.id])
                    session.commit()

    def init(self):
        super().init()

    def init_argparse(self, parser):
        super().init_argparse(parser)

    def process(self):
        self.execute()


if __name__ == "__main__":
    s = LoginJob(1)
    logger.info("start to execute login job")
    s.process()
    logger.error("exit login job")
Пример #13
0
    def execute_in_retry(self, context, http_request, data=None):
        method = http_request.method
        is_update_cookies = context.get(Context.KEY_IS_UPDATE_COOKIES, False)
        headers = context.get(Context.KEY_HEADERS, '')
        cookies = context.get(Context.KEY_COOKIES, RequestsCookieJar())
        start_time = time.time()
        retry = int(default_retry)
        retry_interval = float(default_retry_interval)
        timeout = 25.0
        connect_time_out = int(default_connect_timeout)

        proxies = context.get(Context.KEY_CURRENT_PROXY, '')
        account = context.get(Context.KEY_CURRENT_TASK_ACCOUNT, {})
        while retry > 0:
            retry = retry - 1
            response = None
            try:
                if proxies:
                    logger.info('context key:[%s],proxy inject,[%s]->[%s]',
                                context.context_key, account, proxies)
                if method == HttpMethod.GET:
                    response = get(url=http_request.url,
                                   params=None,
                                   headers=headers,
                                   cookies=cookies,
                                   proxies=proxies)
                elif method == HttpMethod.POST:
                    response = post(url=http_request.url,
                                    data=data,
                                    headers=headers,
                                    cookies=cookies,
                                    proxies=proxies,
                                    connect_timeout=connect_time_out,
                                    timeout=timeout)
                logger.debug(u'context key:[%s],action:[%s] execute result:%s',
                             context.context_key, self.__class__.__name__,
                             response.text)
                if response.status_code != 200:
                    raise StatusCodeException(response.status_code)
                return response
            except ProxyError as e:
                logger.error('proxy error,[%s]->[%s],exp:%s', account, proxies,
                             e)
                raise ProxyException(e)
            except ConnectTimeoutError as e:
                logger.error('proxy error,[%s]->[%s],exp:%s', account, proxies,
                             e)
                raise ProxyException(e)
            except ReadTimeout as e:
                import sys
                exc_info = sys.exc_info()
                if time.time() - start_time > timeout or retry == 0:
                    raise e
                    # raise exc_info[0], exc_info[1], exc_info[2]
                logger.error(
                    u'context key:[%s],action:[%s] execute read time out,exception:%s',
                    context.context_key, self.__class__.__name__,
                    traceback.format_exc())
            except ConnectTimeout as e:
                import sys
                exc_info = sys.exc_info()
                if time.time() - start_time > timeout or retry == 0:
                    raise e
                    # raise exc_info[0], exc_info[1], exc_info[2]
                logger.error(
                    u'context key:[%s],action:[%s] execute connect time out,exception:%s',
                    context.context_key, self.__class__.__name__,
                    traceback.format_exc())
            except Exception as e:
                import sys
                exc_info = sys.exc_info()
                if time.time() - start_time > timeout or retry == 0:
                    raise e
                    # raise exc_info[0], exc_info[1], exc_info[2]
                logger.error(
                    u'context key:[%s],action:[%s] execute error,exception:%s',
                    context.context_key, self.__class__.__name__,
                    traceback.format_exc())
            finally:
                if is_update_cookies and response:
                    cookies.update(response.cookies)

            time.sleep(retry_interval)
Пример #14
0
 def unmarshal(self, context, response):
     params_dict = self.__get_list_api_params(html=response.text)
     logger.info('list api params:%s', params_dict)
     return params_dict