def get_all_entries(target_id): LOG.info( f"Getting historical entries from {first_seqno} to {last_seqno} for id {target_id}" ) logs = [] with primary.client("user0") as c: timeout = 5 end_time = time.time() + timeout entries = [] path = f"/app/log/private/historical/range?from_seqno={first_seqno}&to_seqno={last_seqno}&id={target_id}" while time.time() < end_time: r = c.get(path, log_capture=logs) if r.status_code == http.HTTPStatus.OK: j_body = r.body.json() entries += j_body["entries"] if "@nextLink" in j_body: path = j_body["@nextLink"] continue else: # No @nextLink means we've reached end of range return entries elif r.status_code == http.HTTPStatus.ACCEPTED: # Ignore retry-after header, retry soon time.sleep(0.1) continue else: LOG.error("Printing historical/range logs on unexpected status") flush_info(logs, None) raise ValueError( f"Unexpected status code from historical range query: {r.status_code}" ) LOG.error("Printing historical/range logs on timeout") flush_info(logs, None) raise TimeoutError(f"Historical range not available after {timeout}s")
def wait_for_seqno_to_commit(seqno, view, nodes): """ Wait for a specific seqno at a specific view to be committed on all nodes. """ for _ in range(infra.network.Network.replication_delay * 10): up_to_date_f = [] logs = {} for f in nodes: with f.client() as c: logs[f.node_id] = [] r = c.get( f"/node/tx?view={view}&seqno={seqno}", log_capture=logs[f.node_id] ) assert ( r.status_code == http.HTTPStatus.OK ), f"tx request returned HTTP status {r.status_code}" status = TxStatus(r.body.json()["status"]) if status == TxStatus.Committed: up_to_date_f.append(f.node_id) elif status == TxStatus.Invalid: flush_info(logs[f.node_id], None, 0) raise RuntimeError( f"Node {f.node_id} reports transaction ID {view}.{seqno} is invalid and will never be committed" ) else: pass if len(up_to_date_f) == len(nodes): break time.sleep(0.1) for lines in logs.values(): flush_info(lines, None, 0) assert len(up_to_date_f) == len( nodes ), "Only {} out of {} nodes are up to date".format(len(up_to_date_f), len(nodes))
def _call( self, path: str, body: Optional[Union[str, dict, bytes]] = None, http_verb: str = "POST", headers: Optional[dict] = None, timeout: int = DEFAULT_REQUEST_TIMEOUT_SEC, log_capture: Optional[list] = None, ) -> Response: if headers is None: headers = {} r = Request(path, body, http_verb, headers) flush_info([f"{self.description} {r}"], log_capture, 3) response = self.client_impl.request(r, timeout) flush_info([str(response)], log_capture, 3) return response
def wait_for_refresh(self, network, kid=TEST_JWT_KID): timeout = self.refresh_interval * 3 LOG.info(f"Waiting {timeout}s for JWT key refresh") primary, _ = network.find_nodes() end_time = time.time() + timeout with primary.client( network.consortium.get_any_active_member().local_id) as c: while time.time() < end_time: logs = [] r = c.get("/gov/jwt_keys/all", log_capture=logs) assert r.status_code == 200, r stored_cert = r.body.json()[kid] if self.cert_pem == stored_cert: flush_info(logs) return time.sleep(0.1) flush_info(logs) raise TimeoutError( f"JWT public signing keys were not refreshed after {timeout}s")
def wait_for_commit(client, seqno: int, view: int, timeout: int = 3, log_capture: Optional[list] = None) -> None: """ Waits for a specific seqno/view pair to be committed by the network, as per the node to which client is connected to. :param client: Instance of :py:class:`ccf.clients.CCFClient` :param int seqno: Transaction sequence number. :param int view: Consensus view. :param str timeout: Maximum time to wait for this seqno/view pair to be committed before giving up. :param list log_capture: Rather than emit to default handler, capture log lines to list (optional). A TimeoutError exception is raised if the commit index is not committed within the given timeout. """ if view is None or seqno is None: raise ValueError(f"{view}.{seqno} is not a valid transaction ID") logs: List[str] = [] end_time = time.time() + timeout while time.time() < end_time: logs = [] r = client.get(f"/node/tx?transaction_id={view}.{seqno}", log_capture=logs) assert (r.status_code == http.HTTPStatus.OK ), f"tx request returned HTTP status {r.status_code}" status = TxStatus(r.body.json()["status"]) if status == TxStatus.Committed: flush_info(logs, log_capture, 1) return elif status == TxStatus.Invalid: raise RuntimeError( f"Transaction ID {view}.{seqno} is marked invalid and will never be committed" ) else: time.sleep(0.1) flush_info(logs, log_capture, 1) raise TimeoutError( f'Timed out waiting for commit: {pprint.pformat(client.get("/node/consensus").body.json())}' )
def get_all_entries(client, target_id, from_seqno=None, to_seqno=None, timeout=5): LOG.info( f"Getting historical entries{f' from {from_seqno}' if from_seqno is not None else ''}{f' to {to_seqno}' if to_seqno is not None else ''} for id {target_id}" ) logs = [] start_time = time.time() end_time = start_time + timeout entries = [] path = f"/app/log/private/historical/range?id={target_id}" if from_seqno is not None: path += f"&from_seqno={from_seqno}" if to_seqno is not None: path += f"&to_seqno={to_seqno}" while time.time() < end_time: r = client.get(path, log_capture=logs) if r.status_code == http.HTTPStatus.OK: j_body = r.body.json() entries += j_body["entries"] if "@nextLink" in j_body: path = j_body["@nextLink"] continue else: # No @nextLink means we've reached end of range duration = time.time() - start_time LOG.info(f"Done! Fetched {len(entries)} entries in {duration:0.2f}s") return entries, duration elif r.status_code == http.HTTPStatus.ACCEPTED: # Ignore retry-after header, retry soon time.sleep(0.1) continue else: LOG.error("Printing historical/range logs on unexpected status") flush_info(logs, None) raise ValueError( f"Unexpected status code from historical range query: {r.status_code}" ) LOG.error("Printing historical/range logs on timeout") flush_info(logs, None) raise TimeoutError(f"Historical range not available after {timeout}s")
def call( self, path: str, body: Optional[Union[str, dict, bytes]] = None, http_verb: str = "POST", headers: Optional[dict] = None, timeout: int = DEFAULT_REQUEST_TIMEOUT_SEC, log_capture: Optional[list] = None, allow_redirects: bool = True, ) -> Response: """ Issues one request, synchronously, and returns the response. :param str path: URI of the targeted resource. Must begin with '/' :param body: Request body (optional). :type body: str or dict or bytes :param str http_verb: HTTP verb (e.g. "POST" or "GET"). :param dict headers: HTTP request headers (optional). :param int timeout: Maximum time to wait for a response before giving up. :param list log_capture: Rather than emit to default handler, capture log lines to list (optional). :param bool allow_redirects: Select whether redirects are followed. :return: :py:class:`ccf.clients.Response` """ if not path.startswith("/"): raise ValueError(f"URL path '{path}' is invalid, must start with /") logs: List[str] = [] if self.is_connected: r = self._call( path, body, http_verb, headers, timeout, logs, allow_redirects ) flush_info(logs, log_capture, 2) return r end_time = time.time() + self.connection_timeout while True: try: logs = [] response = self._call( path, body, http_verb, headers, timeout, logs, allow_redirects ) # Only the first request gets this timeout logic - future calls # call _call self.is_connected = True flush_info(logs, log_capture, 2) return response except (CCFConnectionException, TimeoutError) as e: # If the initial connection fails (e.g. due to node certificate # not yet being endorsed by the network) sleep briefly and try again if time.time() > end_time: flush_info(logs, log_capture, 2) raise CCFConnectionException( f"Connection still failing after {self.connection_timeout}s" ) from e time.sleep(0.1)
def create_connections_until_exhaustion( target, continue_to_hard_cap=False, client_fn=primary.client ): with contextlib.ExitStack() as es: clients = [] LOG.success(f"Creating {target} clients") consecutive_failures = 0 i = 1 healthy_clients = [] while i <= target: logs = [] try: clients.append( es.enter_context( client_fn( identity="user0", connection_timeout=1, limits=httpx.Limits( max_connections=1, max_keepalive_connections=1, keepalive_expiry=30, ), ) ) ) r = clients[-1].post( "/app/log/private", {"id": 42, "msg": "foo"}, log_capture=logs, ) if r.status_code == http.HTTPStatus.OK: check( r, result=True, ) consecutive_failures = 0 i += 1 healthy_clients.append(clients[-1]) elif r.status_code == http.HTTPStatus.SERVICE_UNAVAILABLE: if continue_to_hard_cap: consecutive_failures = 0 i += 1 continue raise RuntimeError(r.body.text()) else: flush_info(logs) raise ValueError( f"Unexpected response status code: {r.status_code}" ) except (CCFConnectionException, RuntimeError) as e: flush_info(logs) LOG.warning(f"Hit exception at client {i}/{target}: {e}") clients.pop(-1) if consecutive_failures < 5: # Maybe got unlucky and tried to create a session while many files were open - keep trying consecutive_failures += 1 continue else: # Ok you've really hit a wall, stop trying to create clients break else: raise AllConnectionsCreatedException( f"Successfully created {target} clients without exception - expected this to exhaust available connections" ) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors" ) r = clients[0].get("/node/metrics") assert r.status_code == http.HTTPStatus.OK, r.status_code peak_metrics = r.body.json()["sessions"] assert peak_metrics["active"] <= peak_metrics["peak"], peak_metrics assert peak_metrics["active"] == len(healthy_clients), ( peak_metrics, len(healthy_clients), ) # Submit many requests, and at least enough to trigger additional snapshots more_requests = max(len(clients) * 3, args.snapshot_tx_interval * 2) LOG.info( f"Submitting an additional {more_requests} requests from existing clients" ) for _ in range(more_requests): client = random.choice(healthy_clients) logs = [] try: client.post( "/app/log/private", {"id": 42, "msg": "foo"}, timeout=1, log_capture=logs, ) except Exception as e: flush_info(logs) LOG.error(e) raise e time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors" ) LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success(f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") return num_fds