Exemplo n.º 1
0
 def _http_retry_delay() -> Iterable[float]:
     # below constants are defined in raiden.app.App.DEFAULT_CONFIG
     return udp_utils.timeout_exponential_backoff(
         DEFAULT_TRANSPORT_RETRIES_BEFORE_BACKOFF,
         int(DEFAULT_TRANSPORT_MATRIX_RETRY_INTERVAL / 5),
         int(DEFAULT_TRANSPORT_MATRIX_RETRY_INTERVAL),
     )
Exemplo n.º 2
0
 def _http_retry_delay() -> Iterable[float]:
     # below constants are defined in raiden.app.App.DEFAULT_CONFIG
     return udp_utils.timeout_exponential_backoff(
         self._config['retries_before_backoff'],
         self._config['retry_interval'] / 5,
         self._config['retry_interval'],
     )
Exemplo n.º 3
0
 def retry():
     timeout_generator = udp_utils.timeout_exponential_backoff(
         self._raiden_service.config['protocol']['retries_before_backoff'],
         self._raiden_service.config['protocol']['retry_interval'],
         self._raiden_service.config['protocol']['retry_interval'] * 10
     )
     while async_result.value is None:
         self._send_immediate(receiver_address, data)
         gevent.sleep(next(timeout_generator))
Exemplo n.º 4
0
 def retry():
     if not self._running:
         return
     timeout_generator = udp_utils.timeout_exponential_backoff(
         self._config['retries_before_backoff'],
         self._config['retry_interval'],
         self._config['retry_interval'] * 10,
     )
     while async_result.value is None:
         self._send_immediate(receiver_address, data)
         gevent.sleep(next(timeout_generator))
Exemplo n.º 5
0
 def retry():
     if not self._running:
         return
     timeout_generator = udp_utils.timeout_exponential_backoff(
         self._raiden_service.config['transport']['retries_before_backoff'],
         self._raiden_service.config['transport']['retry_interval'],
         self._raiden_service.config['transport']['retry_interval'] * 10,
     )
     while async_result.value is None:
         self._send_immediate(receiver_address, data)
         gevent.sleep(next(timeout_generator))
Exemplo n.º 6
0
 def retry():
     timeout_generator = udp_utils.timeout_exponential_backoff(
         self._config['retries_before_backoff'],
         self._config['retry_interval'],
         self._config['retry_interval'] * 10,
     )
     for delay in timeout_generator:
         status = self._address_to_presence.get(receiver_address)
         if status in reachable:
             self._send_raw(receiver_address, data)
         else:
             self.log.debug(
                 'Skipping SEND to unreachable node',
                 receiver=pex(receiver_address),
                 status=status,
                 message=message,
                 queue=queue_identifier,
             )
         # equivalent of gevent.sleep, but bails out when stopping
         if self._stop_event.wait(delay):
             break
         # retry while our queue is valid
         if queue_identifier not in self._queueids_to_queues:
             self.log.debug(
                 'Queue cleaned, stop retrying',
                 message=message,
                 queue=queue_identifier,
                 queueids_to_queues=self._queueids_to_queues,
             )
             break
         # retry while the message is in queue
         # Delivered and Processed messages should eventually remove them
         message_in_queue = any(
             message_id == event.message_identifier
             for event in self._queueids_to_queues[queue_identifier]
         )
         if not message_in_queue:
             break
Exemplo n.º 7
0
def single_queue_send(
        transport: 'UDPTransport',
        recipient: typing.Address,
        queue: Queue_T,
        event_stop: Event,
        event_healthy: Event,
        event_unhealthy: Event,
        message_retries: int,
        message_retry_timeout: int,
        message_retry_max_timeout: int,
):
    """ Handles a single message queue for `recipient`.

    Notes:
    - This task must be the only consumer of queue.
    - This task can be killed at any time, but the intended usage is to stop it
      with the event_stop.
    - If there are many queues for the same recipient, it is the
      caller's responsibility to not start them together to avoid congestion.
    - This task assumes the endpoint is never cleared after it's first known.
      If this assumption changes the code must be updated to handle unknown
      addresses.
    """

    # A NotifyingQueue is required to implement cancelability, otherwise the
    # task cannot be stopped while the greenlet waits for an element to be
    # inserted in the queue.
    if not isinstance(queue, NotifyingQueue):
        raise ValueError('queue must be a NotifyingQueue.')

    # Reusing the event, clear must be carefully done
    data_or_stop = event_first_of(
        queue,
        event_stop,
    )

    # Wait for the endpoint registration or to quit
    event_first_of(
        event_healthy,
        event_stop,
    ).wait()

    while True:
        data_or_stop.wait()

        if event_stop.is_set():
            return

        # The queue is not empty at this point, so this won't raise Empty.
        # This task being the only consumer is a requirement.
        (messagedata, message_id) = queue.peek(block=False)

        backoff = timeout_exponential_backoff(
            message_retries,
            message_retry_timeout,
            message_retry_max_timeout,
        )

        try:
            acknowledged = retry_with_recovery(
                transport,
                messagedata,
                message_id,
                recipient,
                event_stop,
                event_healthy,
                event_unhealthy,
                backoff,
            )
        except RaidenShuttingDown:  # For a clean shutdown process
            return

        if acknowledged:
            queue.get()

            # Checking the length of the queue does not trigger a
            # context-switch, so it's safe to assume the length of the queue
            # won't change under our feet and when a new item will be added the
            # event will be set again.
            if not queue:
                data_or_stop.clear()

                if event_stop.is_set():
                    return
Exemplo n.º 8
0
def single_queue_send(
    transport: 'UDPTransport',
    recipient: typing.Address,
    queue: Queue_T,
    queue_identifier: QueueIdentifier,
    event_stop: Event,
    event_healthy: Event,
    event_unhealthy: Event,
    message_retries: int,
    message_retry_timeout: int,
    message_retry_max_timeout: int,
):
    """ Handles a single message queue for `recipient`.

    Notes:
    - This task must be the only consumer of queue.
    - This task can be killed at any time, but the intended usage is to stop it
      with the event_stop.
    - If there are many queues for the same recipient, it is the
      caller's responsibility to not start them together to avoid congestion.
    - This task assumes the endpoint is never cleared after it's first known.
      If this assumption changes the code must be updated to handle unknown
      addresses.
    """

    # A NotifyingQueue is required to implement cancelability, otherwise the
    # task cannot be stopped while the greenlet waits for an element to be
    # inserted in the queue.
    if not isinstance(queue, NotifyingQueue):
        raise ValueError('queue must be a NotifyingQueue.')

    # Reusing the event, clear must be carefully done
    data_or_stop = event_first_of(
        queue,
        event_stop,
    )

    # Wait for the endpoint registration or to quit
    log.debug(
        'queue: waiting for node to become healthy',
        node=pex(transport.raiden.address),
        queue_identifier=queue_identifier,
        queue_size=len(queue),
    )

    event_first_of(
        event_healthy,
        event_stop,
    ).wait()

    log.debug(
        'queue: processing queue',
        node=pex(transport.raiden.address),
        queue_identifier=queue_identifier,
        queue_size=len(queue),
    )

    while True:
        data_or_stop.wait()

        if event_stop.is_set():
            log.debug(
                'queue: stopping',
                node=pex(transport.raiden.address),
                queue_identifier=queue_identifier,
                queue_size=len(queue),
            )
            return

        # The queue is not empty at this point, so this won't raise Empty.
        # This task being the only consumer is a requirement.
        (messagedata, message_id) = queue.peek(block=False)

        log.debug(
            'queue: sending message',
            node=pex(transport.raiden.address),
            recipient=pex(recipient),
            msgid=message_id,
            queue_identifier=queue_identifier,
            queue_size=len(queue),
        )

        backoff = timeout_exponential_backoff(
            message_retries,
            message_retry_timeout,
            message_retry_max_timeout,
        )

        acknowledged = retry_with_recovery(
            transport,
            messagedata,
            message_id,
            recipient,
            event_stop,
            event_healthy,
            event_unhealthy,
            backoff,
        )

        if acknowledged:
            queue.get()

            # Checking the length of the queue does not trigger a
            # context-switch, so it's safe to assume the length of the queue
            # won't change under our feet and when a new item will be added the
            # event will be set again.
            if not queue:
                data_or_stop.clear()

                if event_stop.is_set():
                    return
Exemplo n.º 9
0
def healthcheck(
        transport: UDPTransport,
        recipient: typing.Address,
        stop_event: Event,
        event_healthy: Event,
        event_unhealthy: Event,
        nat_keepalive_retries: int,
        nat_keepalive_timeout: int,
        nat_invitation_timeout: int,
        ping_nonce: int,
):
    """ Sends a periodical Ping to `recipient` to check its health. """
    # pylint: disable=too-many-branches

    log.debug(
        'starting healthcheck for',
        node=pex(transport.address),
        to=pex(recipient),
    )

    # The state of the node is unknown, the events are set to allow the tasks
    # to do work.
    last_state = NODE_NETWORK_UNKNOWN
    transport.set_node_network_state(
        recipient,
        last_state,
    )

    # Always call `clear` before `set`, since only `set` does context-switches
    # it's easier to reason about tasks that are waiting on both events.

    # Wait for the end-point registration or for the node to quit
    try:
        transport.get_host_port(recipient)
    except UnknownAddress:
        log.debug(
            'waiting for endpoint registration',
            node=pex(transport.address),
            to=pex(recipient),
        )

        event_healthy.clear()
        event_unhealthy.set()

        backoff = udp_utils.timeout_exponential_backoff(
            nat_keepalive_retries,
            nat_keepalive_timeout,
            nat_invitation_timeout,
        )
        sleep = next(backoff)

        while not stop_event.wait(sleep):
            try:
                transport.get_host_port(recipient)
            except UnknownAddress:
                sleep = next(backoff)
            else:
                break

    # Don't wait to send the first Ping and to start sending messages if the
    # endpoint is known
    sleep = 0
    event_unhealthy.clear()
    event_healthy.set()

    while not stop_event.wait(sleep):
        sleep = nat_keepalive_timeout

        ping_nonce['nonce'] += 1
        messagedata = transport.get_ping(ping_nonce['nonce'])
        message_id = ('ping', ping_nonce['nonce'], recipient)

        # Send Ping a few times before setting the node as unreachable
        acknowledged = udp_utils.retry(
            transport,
            messagedata,
            message_id,
            recipient,
            stop_event,
            [nat_keepalive_timeout] * nat_keepalive_retries,
        )

        if stop_event.is_set():
            return

        if not acknowledged:
            log.debug(
                'node is unresponsive',
                node=pex(transport.address),
                to=pex(recipient),
                current_state=last_state,
                new_state=NODE_NETWORK_UNREACHABLE,
                retries=nat_keepalive_retries,
                timeout=nat_keepalive_timeout,
            )

            # The node is not healthy, clear the event to stop all queue
            # tasks
            last_state = NODE_NETWORK_UNREACHABLE
            transport.set_node_network_state(
                recipient,
                last_state,
            )
            event_healthy.clear()
            event_unhealthy.set()

            # Retry until recovery, used for:
            # - Checking node status.
            # - Nat punching.
            acknowledged = udp_utils.retry(
                transport,
                messagedata,
                message_id,
                recipient,
                stop_event,
                repeat(nat_invitation_timeout),
            )

        if acknowledged:
            current_state = views.get_node_network_status(
                views.state_from_raiden(transport.raiden),
                recipient,
            )

            if last_state != NODE_NETWORK_REACHABLE:
                log.debug(
                    'node answered',
                    node=pex(transport.raiden.address),
                    to=pex(recipient),
                    current_state=current_state,
                    new_state=NODE_NETWORK_REACHABLE,
                )

                last_state = NODE_NETWORK_REACHABLE
                transport.set_node_network_state(
                    recipient,
                    last_state,
                )
                event_unhealthy.clear()
                event_healthy.set()