示例#1
0
    def get_cookies(cls, host: str) -> typing.Optional[CacheRecord]:
        """Get cookies information.

        The method fetches cached cookies information from the Redis database.
        If caches expired, it calls :meth:`~MarketSite.cache_cookies` to update
        the cached information.

        Args:
            host: Hostname of the target market.

        Returns:
            Cached cookies information if any.

        """
        cached: typing.Optional[CacheRecord] = _redis_command(
            'get', f'cookies:{host}')
        if cached is None:
            cached = cls.cache_cookies(host)
        return cached
示例#2
0
def main(argv: 'Optional[List[str]]' = None) -> int:
    """Entrypoint.

    Args:
        argv: Optional command line arguments.

    Returns:
        Exit code.

    """
    parser = get_parser()
    args = parser.parse_args(argv)

    pid = os.getpid()
    with open(PATH_ID, 'w') as file:
        print(pid, file=file)

    # wait for Redis
    if _WAIT_REDIS:
        if not FLAG_DB:
            _redis_command('set', 'darc', pid)

    if FLAG_DB:
        while True:
            try:
                with DB:
                    _db_operation(DB.create_tables, [
                        HostnameQueueModel,
                        RequestsQueueModel,
                        SeleniumQueueModel,
                    ])
            except Exception:
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line='DB.create_tables([HostnameQueueModel, ...]')
                continue
            break

    if SAVE_DB:
        while True:
            try:
                with DB_WEB:
                    _db_operation(DB_WEB.create_tables, [
                        HostnameModel,
                        URLModel,
                        URLThroughModel,
                        RobotsModel,
                        SitemapModel,
                        HostsModel,
                        RequestsModel,
                        RequestsHistoryModel,
                        SeleniumModel,
                    ])
            except Exception:
                logger.pexc(LOG_WARNING,
                            category=DatabaseOperaionFailed,
                            line='DB.create_tables([HostnameModel, ...]')
                continue
            break

    logger.debug('-*- Initialisation -*-')
    if DEBUG and not FLAG_DB:
        # nuke the db
        _redis_command('delete', 'queue_hostname')
        _redis_command('delete', 'queue_requests')
        _redis_command('delete', 'queue_selenium')

    link_list = []
    for link in filter(
            None, map(lambda s: s.strip(),
                      args.link)):  # type: ignore[name-defined,var-annotated]
        logger.pline(LOG_DEBUG, link)
        link_list.append(link)

    if args.file is not None:
        for path in args.file:
            with open(path) as file:
                for line in filter(None, map(lambda s: s.strip(), file)):
                    if line.startswith('#'):
                        continue
                    logger.pline(LOG_DEBUG, line)
                    link_list.append(line)

    # write to database
    link_pool = [parse_link(link, backref=None) for link in link_list]
    save_requests(link_pool, score=0, nx=True)
    logger.pline(LOG_DEBUG, logger.horizon)

    # init link file
    if not os.path.isfile(PATH_LN):
        with open(PATH_LN, 'w') as file:
            print('proxy,scheme,host,hash,link', file=file)

    try:
        process(args.type)
    except BaseException:
        traceback.print_exc()
    _exit()

    return 0
示例#3
0
def main(argv: typing.Optional[typing.List[str]] = None) -> int:
    """Entrypoint.

    Args:
        argv: Optional command line arguments.

    Returns:
        Exit code.

    """
    parser = get_parser()
    args = parser.parse_args(argv)

    pid = os.getpid()
    with open(PATH_ID, 'w') as file:
        print(pid, file=file)

    # wait for Redis
    if _WAIT_REDIS:
        if not FLAG_DB:
            _redis_command('set', 'darc', pid)

    if FLAG_DB:
        while True:
            try:
                with DB:
                    _db_operation(DB.create_tables, [
                        HostnameQueueModel,
                        RequestsQueueModel,
                        SeleniumQueueModel,
                    ])
            except Exception as error:
                warning = warnings.formatwarning(
                    error,
                    DatabaseOperaionFailed,
                    __file__,
                    102,  # type: ignore[arg-type]
                    'DB.create_tables([HostnameQueueModel, ...])')
                print(render_error(warning, stem.util.term.Color.YELLOW),
                      end='',
                      file=sys.stderr)  # pylint: disable=no-member
                continue
            break

    if SAVE_DB:
        while True:
            try:
                with DB_WEB:
                    _db_operation(DB_WEB.create_tables, [
                        HostnameModel,
                        URLModel,
                        RobotsModel,
                        SitemapModel,
                        HostsModel,
                        RequestsModel,
                        RequestsHistoryModel,
                        SeleniumModel,
                    ])
            except Exception as error:
                warning = warnings.formatwarning(
                    error,
                    DatabaseOperaionFailed,
                    __file__,
                    117,  # type: ignore[arg-type]
                    'DB.create_tables([HostnameModel, ...])')
                print(render_error(warning, stem.util.term.Color.YELLOW),
                      end='',
                      file=sys.stderr)  # pylint: disable=no-member
                continue
            break

    if DEBUG:
        print(
            stem.util.term.format('-*- Initialisation -*-',
                                  stem.util.term.Color.MAGENTA))  # pylint: disable=no-member

        # nuke the db
        if not FLAG_DB:
            _redis_command('delete', 'queue_hostname')
            _redis_command('delete', 'queue_requests')
            _redis_command('delete', 'queue_selenium')

    link_list = list()
    for link in filter(
            None, map(lambda s: s.strip(),
                      args.link)):  # type: ignore[name-defined,var-annotated]
        if DEBUG:
            print(stem.util.term.format(link, stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
        link_list.append(link)

    if args.file is not None:
        for path in args.file:
            with open(path) as file:
                for line in filter(None, map(lambda s: s.strip(), file)):
                    if line.startswith('#'):
                        continue
                    if DEBUG:
                        print(
                            stem.util.term.format(
                                line, stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
                    link_list.append(line)

    # write to database
    link_pool = [parse_link(link) for link in link_list]
    save_requests(link_pool, score=0, nx=True)

    if DEBUG:
        print(
            stem.util.term.format('-' * shutil.get_terminal_size().columns,
                                  stem.util.term.Color.MAGENTA))  # pylint: disable=no-member

    # init link file
    if not os.path.isfile(PATH_LN):
        with open(PATH_LN, 'w') as file:
            print('proxy,scheme,host,hash,link', file=file)

    try:
        process(args.type)
    except BaseException:
        traceback.print_exc()
    _exit()

    return 0
示例#4
0
    def cache_cookies(host: str) -> typing.Optional[CacheRecord]:
        """Cache cookies information.

        The method sends a ``POST`` request to the `remote database`_ to
        obtain the latest cookies information, then parses and caches them
        in the Redis database.

        Notes:
            If :data:`CACHE_TIMEOUT` is set to :data:`None`, the caching
            will then be disabled.

        The parsed cookies information represents the actual target homepage
        of the market and the cookies needed for accessing it successfully.

        Args:
            host: Hostname of the target market.

        Returns:
            Cached cookies information if any.

        """
        cached = None

        response = requests.post('http://43.250.173.86:7899/cookie/all')
        data = response.json()['result']['cookies']
        for record in data:
            domain_list = list(filter(
                None, record['domain'].split(',')))  # type: typing.List[str]
            cached_data = {
                'homepage':
                record['homeUrl'],
                'alive':
                record['isAlive'],
                'name':
                record['marketName'],
                'cookies':
                json.loads(record['cookie']),
                'domains':
                domain_list,
                'exit_node':
                record['exitNode'],
                'valid':
                record['isValid'],
                'last_update':
                datetime.strptime(record['updateTime'],
                                  r'%Y-%m-%d %H:%M:%S.%f'),
                'deleted':
                record['isDel'],
                'history_domains':
                list(filter(None, record['historyDomain'].split(','))),
            }  # type: CacheRecord

            if CACHE_TIMEOUT is None:
                if any(domain == host for domain in domain_list):
                    cached = cached_data.copy()
                    break
            else:
                for domain in domain_list:
                    if domain == host:
                        cached = cached_data.copy()
                    _redis_command('setex', f'cookies:{domain}', CACHE_TIMEOUT,
                                   cached_data)
        return cached
示例#5
0
def main():
    """Entrypoint."""
    parser = get_parser()
    args = parser.parse_args()

    pid = os.getpid()
    with open(PATH_ID, 'w') as file:
        print(pid, file=file)

    # wait for Redis
    if _WAIT_REDIS:
        if not FLAG_DB:
            _redis_command('set', 'darc', pid)

    if FLAG_DB:
        while True:
            with contextlib.suppress(Exception):
                with DB:
                    DB.create_tables([
                        HostnameQueueModel, RequestsQueueModel, SeleniumQueueModel,
                    ])
            break

    if SAVE_DB:
        while True:
            with contextlib.suppress(Exception):
                with DB_WEB:
                    DB_WEB.create_tables([
                        HostnameModel, URLModel,
                        RobotsModel, SitemapModel, HostsModel,
                        RequestsModel, RequestsHistoryModel, SeleniumModel,
                    ])
            break

    if DEBUG:
        print(stem.util.term.format('-*- Initialisation -*-', stem.util.term.Color.MAGENTA))  # pylint: disable=no-member

        # nuke the db
        if not FLAG_DB:
            _redis_command('delete', 'queue_hostname')
            _redis_command('delete', 'queue_requests')
            _redis_command('delete', 'queue_selenium')

    link_list = list()
    for link in filter(None, map(lambda s: s.strip(), args.link)):
        if DEBUG:
            print(stem.util.term.format(link, stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
        link_list.append(link)

    if args.file is not None:
        for path in args.file:
            with open(path) as file:
                for line in filter(None, map(lambda s: s.strip(), file)):
                    if line.startswith('#'):
                        continue
                    if DEBUG:
                        print(stem.util.term.format(line, stem.util.term.Color.MAGENTA))  # pylint: disable=no-member
                    link_list.append(line)

    # write to database
    link_pool = [parse_link(link) for link in link_list]
    save_requests(link_pool, score=0, nx=True)

    if DEBUG:
        print(stem.util.term.format('-' * shutil.get_terminal_size().columns, stem.util.term.Color.MAGENTA))  # pylint: disable=no-member

    # init link file
    if not os.path.isfile(PATH_LN):
        with open(PATH_LN, 'w') as file:
            print('proxy,scheme,host,hash,link', file=file)

    try:
        process(args.type)
    except BaseException:
        traceback.print_exc()
    _exit()