async def wait_until(self, callback): """Implementation shamelessly adapted from: https://github.com/pytest-dev/pytest-qt/blob/16b989d700dfb91fe389999d8e2676437169ed44/src/pytestqt/qtbot.py#L459 """ __tracebackhide__ = True last_exc = None try: async with real_clock_timeout(): while True: try: result = callback() except AssertionError as exc: last_exc = exc result = False if result not in (None, True, False): msg = f"waitUntil() callback must return None, True or False, returned {result!r}" raise ValueError(msg) if result in (True, None): return await trio.sleep(0.01) except trio.TooSlowError: if last_exc: raise trio.TooSlowError() from last_exc else: raise
async def main(): start = time.time() try: async with trio.open_nursery() as nurse: async with tractor.open_nursery() as tn: p = await tn.start_actor( 'fast_boi', enable_modules=[__name__], ) async def delayed_kbi(): await trio.sleep(kbi_delay) print(f'RAISING KBI after {kbi_delay} s') raise KeyboardInterrupt # start task which raises a kbi **after** # the actor nursery ``__aexit__()`` has # been run. nurse.start_soon(delayed_kbi) await p.run(do_nuthin) finally: duration = time.time() - start if duration > timeout: raise trio.TooSlowError( 'daemon cancel was slower then necessary..')
async def subscribe_request( self, request: AnyOutboundMessage, response_message_type: Type[TBaseMessage], ) -> AsyncIterator[trio.abc.ReceiveChannel[InboundMessage[TBaseMessage]]]: request_id = request.message.request_id self.logger.debug2( "Sending request: %s with request id %s", request, request_id.hex(), ) send_channel, receive_channel = trio.open_memory_channel[TBaseMessage](256) async with trio.open_nursery() as nursery: nursery.start_soon( self._manage_request_response, request, response_message_type, send_channel, ) async with receive_channel: try: yield receive_channel # Wrap EOC error with TSE to make the timeouts obvious except trio.EndOfChannel as err: raise trio.TooSlowError( f"Timout waiting for response: request_id={request_id.hex()}" ) from err nursery.cancel_scope.cancel()
async def execf(self, tid, res, fn): with self.wg: self.resultq[tid], rq = trio.open_memory_channel(0) try: await self._c.send( dict(tid=tid, res=res, func=pickle_dumps(fn))) with optional_cm(trio.fail_after, getattr(res, 'T', -180) + 180): # 3min grace period ok, r = await rq.receive() except (trio.ClosedResourceError, trio.EndOfChannel): # TODO: dedicated error class? ok, r = False, ( "", RuntimeError( f"remote {self.name} closed connection unexpectedly")) except trio.TooSlowError: log_event("lost_or_late_response") ok, r = False, ( "", trio.TooSlowError( f"remote {self.name} lost track of job {tid}")) if ok: self.health = FULL_HEALTH del self.resultq[tid] return ok, r
async def subscribe_request( self, node_id: NodeID, endpoint: Endpoint, request: AlexandriaMessage[Any], response_message_type: Type[TAlexandriaMessage], request_id: Optional[bytes], ) -> AsyncIterator[trio.abc.ReceiveChannel[ InboundMessage[TAlexandriaMessage]]]: send_channel, receive_channel = trio.open_memory_channel[ InboundMessage[TAlexandriaMessage]](256) # # START # # There cannot be any `await/async` calls between here and the `END` # marker, otherwise we will be subject to a race condition where # another request could collide with this request id. # if request_id is None: request_id = self.network.client.request_tracker.get_free_request_id( node_id) self.logger.debug2( "Sending request: %s with request id %s", request, request_id.hex(), ) with self.request_tracker.reserve_request_id(node_id, request_id): # # END # async with trio.open_nursery() as nursery: # The use of `functools.partial` below is due to an inadequacy # in the type hinting of `trio.Nursery.start_soon` which # doesn't support more than 4 positional argumeents. nursery.start_soon( functools.partial( self._manage_request_response, node_id, endpoint, request, response_message_type, send_channel, request_id, )) try: async with receive_channel: try: yield receive_channel # Wrap EOC error with TSE to make the timeouts obvious except trio.EndOfChannel as err: raise trio.TooSlowError( f"Timeout: request={request} request_id={request_id.hex()}" ) from err finally: nursery.cancel_scope.cancel()
async def sendSync(self, msg, read_only, seq_num=None, cid=None, pre_process=False, m_of_n_quorum=None, \ reconfiguration=False, include_ro=False, corrupt_params={}, no_retries=False, result=1): """ Send a client request and wait for a m_of_n_quorum (if None, it will set to 2F+C+1 quorum) of replies. Return a single reply message if a quorum of replies matches. Otherwise, raise a trio.TooSlowError indicating the request timed out. Retry Strategy: If the request is a write and the primary is known then send only to the primary on the first attempt. Otherwise, if the request is read only or the primary is unknown, then send to all replicas on the first attempt. After `config.retry_timeout_milli` without receiving a quorum of identical replies, then clear the replies and send to all replicas. Continue this strategy every `retry_timeout_milli` until `config.req_timeout_milli` elapses. If `config.req_timeout_milli` elapses then a trio.TooSlowError is raised. Note that this method also binds the socket to an appropriate port if not already bound. """ # Call an abstract function to allow each client type to set-up its communication before starting if not self.comm_prepared: await self._comm_prepare() if seq_num is None: seq_num = self.req_seq_num.next() if cid is None: cid = str(seq_num) signature = b'' client_id = self.client_id if self.signing_key: h = SHA256.new(msg) signature = pkcs1_15.new(self.signing_key).sign(h) if corrupt_params: msg, signature, client_id = self._corrupt_signing_params(msg, signature, client_id, corrupt_params) data = bft_msgs.pack_request(client_id, seq_num, read_only, self.config.req_timeout_milli, cid, msg, result, pre_process, reconfiguration=reconfiguration, signature=signature) if m_of_n_quorum is None: m_of_n_quorum = MofNQuorum.LinearizableQuorum(self.config, [r.id for r in self.replicas]) # Raise a trio.TooSlowError exception if a quorum of replies try: with trio.fail_after(self.config.req_timeout_milli / 1000): self._reset_on_new_request([seq_num]) replies = await self._send_receive_loop( data, read_only, m_of_n_quorum, include_ro=include_ro, no_retries=no_retries) return next(iter(self.replies.values())).get_common_data_with_result() if replies else None except trio.TooSlowError: raise trio.TooSlowError(f"client_id: {self.client_id}, seq_num: {seq_num}") finally: pass
async def write_batch(self, msg_batch, batch_seq_nums=None, m_of_n_quorum=None, corrupt_params=None, no_retries=False): if not self.comm_prepared: await self._comm_prepare() cid = str(self.req_seq_num.next()) batch_size = len(msg_batch) if batch_seq_nums is None: batch_seq_nums = [] for n in range(batch_size): batch_seq_nums.append(self.req_seq_num.next()) msg_data = b'' req_index_to_corrupt = random.randint(1, batch_size-1) # don't corrupt the 1st for n in range(batch_size): msg = msg_batch[n] msg_seq_num = batch_seq_nums[n] msg_cid = str(msg_seq_num) signature = b'' client_id = self.client_id if self.signing_key: h = SHA256.new(msg) signature = pkcs1_15.new(self.signing_key).sign(h) if corrupt_params and (req_index_to_corrupt == n): msg, signature, client_id = self._corrupt_signing_params(msg, signature, client_id, corrupt_params) msg_data = b''.join([msg_data, bft_msgs.pack_request( self.client_id, msg_seq_num, False, self.config.req_timeout_milli, msg_cid, msg, 0, True, reconfiguration=False, span_context=b'', signature=signature)]) data = bft_msgs.pack_batch_request(self.client_id, batch_size, msg_data, cid) if m_of_n_quorum is None: m_of_n_quorum = MofNQuorum.LinearizableQuorum(self.config, [r.id for r in self.replicas]) # Raise a trio.TooSlowError exception if a quorum of replies try: with trio.fail_after(batch_size * self.config.req_timeout_milli / 1000): self._reset_on_new_request(batch_seq_nums) return await self._send_receive_loop(data, False, m_of_n_quorum, batch_size * self.config.retry_timeout_milli / 1000, no_retries=no_retries) except trio.TooSlowError: raise trio.TooSlowError(f"client_id {self.client_id}, for batch msg {cid} {batch_seq_nums}") finally: pass
async def _stream_find_nodes_response( response_message_type: Type[BaseMessage], send_channel: trio.abc.SendChannel[InboundMessage[FoundNodesMessage]], ) -> None: with trio.move_on_after(REQUEST_RESPONSE_TIMEOUT) as scope: async with send_channel: with client.request_tracker.reserve_request_id( node_id, request_id) as reserved_request_id: request = AnyOutboundMessage( FindNodeMessage(reserved_request_id, tuple(distances)), endpoint, node_id, ) async with client.dispatcher.subscribe_request( request, response_message_type) as subscription: head_response = await subscription.receive() expected_total = head_response.message.total validate_found_nodes_response( head_response.message, request, expected_total, ) await send_channel.send(head_response) for _ in range(expected_total - 1): response = await subscription.receive() validate_found_nodes_response( response.message, request, expected_total) await send_channel.send(response) if scope.cancelled_caught: client.logger.debug( "Stream find nodes request disconnected: request=%s message_type=%s", request, reserved_request_id, ) raise trio.TooSlowError("Timeout in stream_find_nodes")
async def _open(self, async_fn: Callable[..., TReturn], *args: Any) -> AsyncIterator[ProcessAPI[TReturn]]: if self._dead: raise Exception(f"Worker (pid={self.pid}) is no longer active") if self._busy: raise Exception(f"Worker (pid={self.pid}) is busy") self._busy = True proc: Process[TReturn] = Process(async_fn, args) proc.pid = self._trio_proc.pid async with trio.open_nursery() as nursery: # We write the execution data immediately without waiting for the # `WAIT_EXEC_DATA` state to ensure that the child process doesn't have # to wait for that data due to the round trip times between processes. logger.debug("Writing execution data for %s over stdin", proc) await self._to_child.send_all(proc.sub_proc_payload) startup_timeout = int( os.getenv( "TRIO_RUN_IN_PROCESS_STARTUP_TIMEOUT", constants.STARTUP_TIMEOUT_SECONDS, )) with trio.open_signal_receiver(*RELAY_SIGNALS) as signal_aiter: # Monitor the child stream for incoming updates to the state of # the child process. nursery.start_soon(_monitor_state, proc, self._from_child) # Relay any appropriate signals to the child process. nursery.start_soon(_relay_signals, proc, signal_aiter) try: with trio.fail_after(startup_timeout): await proc.wait_pid() except trio.TooSlowError: proc.kill() raise trio.TooSlowError( f"{proc} took more than {startup_timeout} seconds to start up" ) # Wait until the child process has reached the EXECUTING # state before yielding the context. This ensures that any # calls to things like `terminate` or `kill` will be handled # properly in the child process. # # The timeout ensures that if something is fundamentally wrong # with the subprocess we don't hang indefinitely. try: with trio.fail_after(startup_timeout): await proc.wait_for_state(State.EXECUTING) except trio.TooSlowError: proc.kill() raise trio.TooSlowError( f"{proc} took more than {startup_timeout} seconds to start up" ) try: try: yield proc except KeyboardInterrupt as err: # If a keyboard interrupt is encountered relay it to the # child process and then give it a moment to cleanup before # re-raising logger.debug( "Got KeyboardInterrupt, sending SIGINT to %s", proc) try: proc.send_signal(signal.SIGINT) sigint_timeout = int( os.getenv( "TRIO_RUN_IN_PROCESS_SIGINT_TIMEOUT", constants.SIGINT_TIMEOUT_SECONDS, )) try: with trio.fail_after(sigint_timeout): await proc.wait() except trio.TooSlowError: logger.debug( "Timed out waiting for %s to exit after relaying SIGINT", proc, ) finally: raise err else: await proc.wait() finally: if not proc._has_returncode.is_set(): # If the process has not returned at this stage we need to hard # kill it to prevent it from hanging. logger.warning( "%s failed to exit cleanly. Sending SIGKILL", proc, # The `any` call is to include a stacktrace if this # happened due to an exception but to omit it if this is # somehow happening outside of an exception context. exc_info=any(sys.exc_info()), ) proc.kill() else: logger.debug("process %s finished: returncode=%d", proc, proc.returncode) self._busy = False nursery.cancel_scope.cancel()
async def run(self) -> None: while self.manager.is_running: async with self.node_queue.reserve() as node_id: try: proof = await self._network.get_content_proof( node_id, hash_tree_root=self.hash_tree_root, content_key=self.content_key, start_chunk_index=0, max_chunks=10, ) except trio.TooSlowError: continue else: break else: raise trio.TooSlowError( "Unable to retrieve initial proof from any of the provide nodes" ) missing_segments = proof.get_missing_segments() bite_size_missing_segments = slice_segments_to_max_chunk_count( missing_segments, max_chunk_count=16, ) self._segment_queue = ResourceQueue(bite_size_missing_segments) send_channel, receive_channel = trio.open_memory_channel[Proof]( self._concurrency ) if not proof.is_complete: content_length = proof.get_content_length() async with trio.open_nursery() as nursery: for worker_id in range(self._concurrency): nursery.start_soon(self._worker, worker_id, send_channel) async with receive_channel: async for partial_proof in receive_channel: # TODO: computationally expensive proof = proof.merge(partial_proof) still_missing = sum( segment.length for segment in proof.get_missing_segments() ) percent_complete = ( (content_length - still_missing) * 100 / content_length ) self.logger.info( "combined proof: content_key=%s proof=%s progress=%.2f%% nodes=%d", self.content_key.hex(), proof, percent_complete, len(self.node_queue), ) if proof.is_complete: break # shut the workers down nursery.cancel_scope.cancel() self._content_proof = proof self._content_ready.set()
async def common_client_stream_find_nodes( client: Client, node_id: NodeID, endpoint: Endpoint, distances: Collection[int], *, request_id: Optional[bytes] = None, ) -> AsyncIterator[trio.abc.ReceiveChannel[InboundMessage[FoundNodesMessage]]]: async def _stream_find_nodes_response( response_message_type: Type[BaseMessage], send_channel: trio.abc.SendChannel[InboundMessage[FoundNodesMessage]], ) -> None: with trio.move_on_after(REQUEST_RESPONSE_TIMEOUT) as scope: async with send_channel: with client.request_tracker.reserve_request_id( node_id, request_id) as reserved_request_id: request = AnyOutboundMessage( FindNodeMessage(reserved_request_id, tuple(distances)), endpoint, node_id, ) async with client.dispatcher.subscribe_request( request, response_message_type) as subscription: head_response = await subscription.receive() expected_total = head_response.message.total validate_found_nodes_response( head_response.message, request, expected_total, ) await send_channel.send(head_response) for _ in range(expected_total - 1): response = await subscription.receive() validate_found_nodes_response( response.message, request, expected_total) await send_channel.send(response) if scope.cancelled_caught: client.logger.debug( "Stream find nodes request disconnected: request=%s message_type=%s", request, reserved_request_id, ) raise trio.TooSlowError("Timeout in stream_find_nodes") async with trio.open_nursery() as nursery: send_channel, receive_channel = trio.open_memory_channel[ InboundMessage[FoundNodesMessage]](4) nursery.start_soon( _stream_find_nodes_response, FoundNodesMessage, send_channel, ) async with receive_channel: try: yield receive_channel except trio.EndOfChannel as err: raise trio.TooSlowError( "Timeout in stream_find_nodes") from err except (trio.ClosedResourceError, trio.ClosedResourceError): pass nursery.cancel_scope.cancel()
async def _watchdog(): await trio.to_thread.run_sync(_run_until_timeout_or_event_occured) if not event_occured: raise trio.TooSlowError()