示例#1
0
文件: db.py 项目: lucmichalski/darc
def _save_selenium_redis(entries: typing.Union[Link, typing.List[Link]], single: bool = False,
                         score: typing.Optional[float] = None, nx: bool = False, xx: bool = False) -> None:
    """Save link to the :mod:`selenium` database.

    The function updates the ``queue_selenium`` database.

    Args:
        entries: Links to be added to the :mod:`selenium` database.
            It can be either an *iterable* of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is an *iterable* of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Forces ``ZADD`` to only create new elements and not to
            update scores for elements that already exist.
        xx: Forces ``ZADD`` to only update scores of elements that
            already exist. New elements will not be added.

    When ``entries`` is a list of :class:`~darc.link.Link` instances,
    we tries to perform *bulk* update to easy the memory consumption.
    The *bulk* size is defined by :data:`~darc.db.BULK_SIZE`.

    Notes:
        The ``entries`` will be dumped through :mod:`pickle` so that
        :mod:`darc` do not need to parse them again.

    """
    if not entries:
        return
    if score is None:
        score = time.time()

    if not single:
        if typing.TYPE_CHECKING:
            entries = typing.cast(typing.List[Link], entries)

        for chunk in peewee.chunked(entries, BULK_SIZE):
            pool = list(filter(lambda link: isinstance(link, Link), chunk))
            for link in pool:
                _redis_command('set', link.name, pickle.dumps(link), nx=True)
            mapping = {
                link.name: score for link in pool
            }
            with _redis_get_lock('lock_queue_selenium'):  # type: ignore
                _redis_command('zadd', 'queue_selenium', mapping, nx=nx, xx=xx)
        return

    if typing.TYPE_CHECKING:
        entries = typing.cast(Link, entries)

    _redis_command('set', entries.name, pickle.dumps(entries), nx=True)
    mapping = {
        entries.name: score,
    }
    with _redis_get_lock('lock_queue_selenium'):  # type: ignore
        _redis_command('zadd', 'queue_selenium', mapping, nx=nx, xx=xx)
示例#2
0
文件: db.py 项目: lucmichalski/darc
def _db_operation(operation: typing.Callable[..., typing.T], *args, **kwargs) -> typing.T:  # type: ignore
    """Retry operation on database.

    Args:
        operation: Callable / method to perform.
        *args: Arbitrary positional arguments.

    Keyword Args:
        **kwargs: Arbitrary keyword arguments.

    Returns:
        Any return value from a successful
        ``operation`` call.

    """
    _arg_msg = None

    while True:
        try:
            value = operation(*args, **kwargs)
        except Exception as error:
            if _arg_msg is None:
                _arg_msg = _gen_arg_msg(*args, **kwargs)

            model = typing.cast(typing.MethodType, operation).__self__.__class__.__name__
            warning = warnings.formatwarning(str(error), DatabaseOperaionFailed, __file__, 166,
                                             f'{model}.{operation.__name__}({_arg_msg})')
            print(render_error(warning, stem.util.term.Color.YELLOW), end='', file=sys.stderr)  # pylint: disable=no-member

            if RETRY_INTERVAL is not None:
                time.sleep(RETRY_INTERVAL)
            continue
        break
    return value
示例#3
0
文件: db.py 项目: lucmichalski/darc
def _save_requests_redis(entries: typing.Union[Link, typing.List[Link]], single: bool = False,
                         score: typing.Optional[float] = None, nx: bool = False, xx: bool = False) -> None:
    """Save link to the :mod:`requests` database.

    The function updates the ``queue_requests`` database.

    Args:
        entries: Links to be added to the :mod:`requests` database.
            It can be either a :obj:`list` of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is a :obj:`list` of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Forces ``ZADD`` to only create new elements and not to
            update scores for elements that already exist.
        xx: Forces ``ZADD`` to only update scores of elements that
            already exist. New elements will not be added.

    """
    if score is None:
        score = time.time()

    if not single:
        if typing.TYPE_CHECKING:
            entries = typing.cast(typing.List[Link], entries)

        for chunk in peewee.chunked(entries, BULK_SIZE):
            pool = list(filter(lambda link: isinstance(link, Link), chunk))
            for link in pool:
                _redis_command('set', link.name, pickle.dumps(link), nx=True)
            mapping = {
                link.name: score for link in pool
            }
            with _redis_get_lock('lock_queue_requests'):  # type: ignore
                _redis_command('zadd', 'queue_requests', mapping, nx=nx, xx=xx)
        return

    if typing.TYPE_CHECKING:
        entries = typing.cast(Link, entries)

    _redis_command('set', entries.name, pickle.dumps(entries), nx=True)
    mapping = {
        entries.name: score,
    }
    with _redis_get_lock('lock_queue_requests'):  # type: ignore
        _redis_command('zadd', 'queue_requests', mapping, nx=nx, xx=xx)
示例#4
0
def _freenet_bootstrap() -> None:
    """Freenet bootstrap.

    The bootstrap arguments are defined as :data:`~darc.proxy.freenet._FREENET_ARGS`.

    Raises:
        subprocess.CalledProcessError: If the return code of :data:`~darc.proxy.freenet._FREENET_PROC` is non-zero.

    See Also:
        * :func:`darc.proxy.freenet.freenet_bootstrap`
        * :data:`darc.proxy.freenet.BS_WAIT`
        * :data:`darc.proxy.freenet._FREENET_BS_FLAG`
        * :data:`darc.proxy.freenet._FREENET_PROC`

    """
    global _FREENET_BS_FLAG, _FREENET_PROC  # pylint: disable=global-statement

    # launch Freenet process
    _FREENET_PROC = subprocess.Popen(  # nosec
        _FREENET_ARGS,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )

    try:
        stdout, stderr = _FREENET_PROC.communicate(timeout=BS_WAIT)
    except subprocess.TimeoutExpired as error:
        stdout, stderr = error.stdout, error.stderr
    if VERBOSE:
        if stdout is not None:
            print(render_error(stdout, stem.util.term.Color.BLUE))  # pylint: disable=no-member
    if stderr is not None:
        print(render_error(stderr, stem.util.term.Color.RED))  # pylint: disable=no-member

    returncode = _FREENET_PROC.returncode
    if returncode != 0:
        raise subprocess.CalledProcessError(
            returncode, _FREENET_ARGS,
            typing.cast(typing.IO[bytes], _FREENET_PROC.stdout).read(),
            typing.cast(typing.IO[bytes], _FREENET_PROC.stderr).read())

    # update flag
    _FREENET_BS_FLAG = True
示例#5
0
文件: db.py 项目: lucmichalski/darc
def _save_selenium_db(entries: typing.Union[Link, typing.List[Link]], single: bool = False,
                      score: typing.Optional[float] = None, nx: bool = False, xx: bool = False) -> None:
    """Save link to the :mod:`selenium` database.

    The function updates the :class:`~darc.model.tasks.selenium.SeleniumQueueModel` table.

    Args:
        entries: Links to be added to the :mod:`selenium` database.
            It can be either a :obj:`list` of links, or a single
            link string (if ``single`` set as :data:`True`).
        single: Indicate if ``entries`` is a :obj:`list` of links
            or a single link string.
        score: Score to for the Redis sorted set.
        nx: Only create new elements and not to
            update scores for elements that already exist.
        xx: Only update scores of elements that
            already exist. New elements will not be added.

    """
    if not entries:
        return
    if score is None:
        score = datetime.datetime.now()  # type: ignore

    if not single:
        if typing.TYPE_CHECKING:
            entries = typing.cast(typing.List[Link], entries)

        if nx:
            with database.atomic():
                insert_many = [dict(
                    text=link.url,
                    hash=link.name,
                    link=link,
                    timestamp=score,
                ) for link in entries]
                for batch in peewee.chunked(insert_many, BULK_SIZE):
                    _db_operation(SeleniumQueueModel
                                  .insert_many(insert_many)
                                  .on_conflict_ignore()
                                  .execute)
            return

        if xx:
            entries_text = [link.url for link in entries]
            _db_operation(SeleniumQueueModel
                          .update(timestamp=score)
                          .where(typing.cast(peewee.TextField, SeleniumQueueModel.text).in_(entries_text))
                          .execute)
            return

        with database.atomic():
            replace_many = [dict(
                text=link.url,
                hash=link.name,
                link=link,
                timestamp=score
            ) for link in entries]
            for batch in peewee.chunked(replace_many, BULK_SIZE):
                _db_operation(SeleniumQueueModel.replace_many(batch).execute)
        return

    if typing.TYPE_CHECKING:
        entries = typing.cast(Link, entries)

    if nx:
        _db_operation(SeleniumQueueModel.get_or_create,
                      text=entries.url,
                      defaults=dict(
                          hash=entries.name,
                          link=entries,
                          timestamp=score,
                      ))
        return

    if xx:
        with contextlib.suppress(peewee.DoesNotExist):
            model = _db_operation(SeleniumQueueModel.get, SeleniumQueueModel.text == entries.url)
            model.timestamp = score
            _db_operation(model.save)
        return

    _db_operation(SeleniumQueueModel.replace(
        text=entries.url,
        hash=entries.name,
        link=entries,
        timestamp=score
    ).execute)