def get_cookies(cls, host: str) -> typing.Optional[CacheRecord]: """Get cookies information. The method fetches cached cookies information from the Redis database. If caches expired, it calls :meth:`~MarketSite.cache_cookies` to update the cached information. Args: host: Hostname of the target market. Returns: Cached cookies information if any. """ cached: typing.Optional[CacheRecord] = _redis_command( 'get', f'cookies:{host}') if cached is None: cached = cls.cache_cookies(host) return cached
def main(argv: 'Optional[List[str]]' = None) -> int: """Entrypoint. Args: argv: Optional command line arguments. Returns: Exit code. """ parser = get_parser() args = parser.parse_args(argv) pid = os.getpid() with open(PATH_ID, 'w') as file: print(pid, file=file) # wait for Redis if _WAIT_REDIS: if not FLAG_DB: _redis_command('set', 'darc', pid) if FLAG_DB: while True: try: with DB: _db_operation(DB.create_tables, [ HostnameQueueModel, RequestsQueueModel, SeleniumQueueModel, ]) except Exception: logger.pexc(LOG_WARNING, category=DatabaseOperaionFailed, line='DB.create_tables([HostnameQueueModel, ...]') continue break if SAVE_DB: while True: try: with DB_WEB: _db_operation(DB_WEB.create_tables, [ HostnameModel, URLModel, URLThroughModel, RobotsModel, SitemapModel, HostsModel, RequestsModel, RequestsHistoryModel, SeleniumModel, ]) except Exception: logger.pexc(LOG_WARNING, category=DatabaseOperaionFailed, line='DB.create_tables([HostnameModel, ...]') continue break logger.debug('-*- Initialisation -*-') if DEBUG and not FLAG_DB: # nuke the db _redis_command('delete', 'queue_hostname') _redis_command('delete', 'queue_requests') _redis_command('delete', 'queue_selenium') link_list = [] for link in filter( None, map(lambda s: s.strip(), args.link)): # type: ignore[name-defined,var-annotated] logger.pline(LOG_DEBUG, link) link_list.append(link) if args.file is not None: for path in args.file: with open(path) as file: for line in filter(None, map(lambda s: s.strip(), file)): if line.startswith('#'): continue logger.pline(LOG_DEBUG, line) link_list.append(line) # write to database link_pool = [parse_link(link, backref=None) for link in link_list] save_requests(link_pool, score=0, nx=True) logger.pline(LOG_DEBUG, logger.horizon) # init link file if not os.path.isfile(PATH_LN): with open(PATH_LN, 'w') as file: print('proxy,scheme,host,hash,link', file=file) try: process(args.type) except BaseException: traceback.print_exc() _exit() return 0
def main(argv: typing.Optional[typing.List[str]] = None) -> int: """Entrypoint. Args: argv: Optional command line arguments. Returns: Exit code. """ parser = get_parser() args = parser.parse_args(argv) pid = os.getpid() with open(PATH_ID, 'w') as file: print(pid, file=file) # wait for Redis if _WAIT_REDIS: if not FLAG_DB: _redis_command('set', 'darc', pid) if FLAG_DB: while True: try: with DB: _db_operation(DB.create_tables, [ HostnameQueueModel, RequestsQueueModel, SeleniumQueueModel, ]) except Exception as error: warning = warnings.formatwarning( error, DatabaseOperaionFailed, __file__, 102, # type: ignore[arg-type] 'DB.create_tables([HostnameQueueModel, ...])') print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr) # pylint: disable=no-member continue break if SAVE_DB: while True: try: with DB_WEB: _db_operation(DB_WEB.create_tables, [ HostnameModel, URLModel, RobotsModel, SitemapModel, HostsModel, RequestsModel, RequestsHistoryModel, SeleniumModel, ]) except Exception as error: warning = warnings.formatwarning( error, DatabaseOperaionFailed, __file__, 117, # type: ignore[arg-type] 'DB.create_tables([HostnameModel, ...])') print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr) # pylint: disable=no-member continue break if DEBUG: print( stem.util.term.format('-*- Initialisation -*-', stem.util.term.Color.MAGENTA)) # pylint: disable=no-member # nuke the db if not FLAG_DB: _redis_command('delete', 'queue_hostname') _redis_command('delete', 'queue_requests') _redis_command('delete', 'queue_selenium') link_list = list() for link in filter( None, map(lambda s: s.strip(), args.link)): # type: ignore[name-defined,var-annotated] if DEBUG: print(stem.util.term.format(link, stem.util.term.Color.MAGENTA)) # pylint: disable=no-member link_list.append(link) if args.file is not None: for path in args.file: with open(path) as file: for line in filter(None, map(lambda s: s.strip(), file)): if line.startswith('#'): continue if DEBUG: print( stem.util.term.format( line, stem.util.term.Color.MAGENTA)) # pylint: disable=no-member link_list.append(line) # write to database link_pool = [parse_link(link) for link in link_list] save_requests(link_pool, score=0, nx=True) if DEBUG: print( stem.util.term.format('-' * shutil.get_terminal_size().columns, stem.util.term.Color.MAGENTA)) # pylint: disable=no-member # init link file if not os.path.isfile(PATH_LN): with open(PATH_LN, 'w') as file: print('proxy,scheme,host,hash,link', file=file) try: process(args.type) except BaseException: traceback.print_exc() _exit() return 0
def cache_cookies(host: str) -> typing.Optional[CacheRecord]: """Cache cookies information. The method sends a ``POST`` request to the `remote database`_ to obtain the latest cookies information, then parses and caches them in the Redis database. Notes: If :data:`CACHE_TIMEOUT` is set to :data:`None`, the caching will then be disabled. The parsed cookies information represents the actual target homepage of the market and the cookies needed for accessing it successfully. Args: host: Hostname of the target market. Returns: Cached cookies information if any. """ cached = None response = requests.post('http://43.250.173.86:7899/cookie/all') data = response.json()['result']['cookies'] for record in data: domain_list = list(filter( None, record['domain'].split(','))) # type: typing.List[str] cached_data = { 'homepage': record['homeUrl'], 'alive': record['isAlive'], 'name': record['marketName'], 'cookies': json.loads(record['cookie']), 'domains': domain_list, 'exit_node': record['exitNode'], 'valid': record['isValid'], 'last_update': datetime.strptime(record['updateTime'], r'%Y-%m-%d %H:%M:%S.%f'), 'deleted': record['isDel'], 'history_domains': list(filter(None, record['historyDomain'].split(','))), } # type: CacheRecord if CACHE_TIMEOUT is None: if any(domain == host for domain in domain_list): cached = cached_data.copy() break else: for domain in domain_list: if domain == host: cached = cached_data.copy() _redis_command('setex', f'cookies:{domain}', CACHE_TIMEOUT, cached_data) return cached
def main(): """Entrypoint.""" parser = get_parser() args = parser.parse_args() pid = os.getpid() with open(PATH_ID, 'w') as file: print(pid, file=file) # wait for Redis if _WAIT_REDIS: if not FLAG_DB: _redis_command('set', 'darc', pid) if FLAG_DB: while True: with contextlib.suppress(Exception): with DB: DB.create_tables([ HostnameQueueModel, RequestsQueueModel, SeleniumQueueModel, ]) break if SAVE_DB: while True: with contextlib.suppress(Exception): with DB_WEB: DB_WEB.create_tables([ HostnameModel, URLModel, RobotsModel, SitemapModel, HostsModel, RequestsModel, RequestsHistoryModel, SeleniumModel, ]) break if DEBUG: print(stem.util.term.format('-*- Initialisation -*-', stem.util.term.Color.MAGENTA)) # pylint: disable=no-member # nuke the db if not FLAG_DB: _redis_command('delete', 'queue_hostname') _redis_command('delete', 'queue_requests') _redis_command('delete', 'queue_selenium') link_list = list() for link in filter(None, map(lambda s: s.strip(), args.link)): if DEBUG: print(stem.util.term.format(link, stem.util.term.Color.MAGENTA)) # pylint: disable=no-member link_list.append(link) if args.file is not None: for path in args.file: with open(path) as file: for line in filter(None, map(lambda s: s.strip(), file)): if line.startswith('#'): continue if DEBUG: print(stem.util.term.format(line, stem.util.term.Color.MAGENTA)) # pylint: disable=no-member link_list.append(line) # write to database link_pool = [parse_link(link) for link in link_list] save_requests(link_pool, score=0, nx=True) if DEBUG: print(stem.util.term.format('-' * shutil.get_terminal_size().columns, stem.util.term.Color.MAGENTA)) # pylint: disable=no-member # init link file if not os.path.isfile(PATH_LN): with open(PATH_LN, 'w') as file: print('proxy,scheme,host,hash,link', file=file) try: process(args.type) except BaseException: traceback.print_exc() _exit()