def run(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: network.start_and_join(args) primary, _ = network.find_primary() cmd = [ "python", args.client_tutorial, network.common_dir, ] rc = infra.proc.ccall(*cmd).returncode assert rc == 0, f"Failed to run tutorial script: {rc}" cmd = [ "python", args.ledger_tutorial, primary.get_ledger()[1], ] rc = infra.proc.ccall(*cmd).returncode assert rc == 0, f"Failed to run tutorial script: {rc}"
def run(args): args.jwt_key_refresh_interval_s = 1 with infra.network.network(args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) network = test_jwt_without_key_policy(network, args) network = test_jwt_with_sgx_key_policy(network, args) network = test_jwt_with_sgx_key_filter(network, args) network = test_jwt_key_auto_refresh(network, args) args.jwt_key_refresh_interval_s = 100000 with infra.network.network(args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) network = test_jwt_key_initial_refresh(network, args)
def run_to_destruction(args): hosts = ["localhost", "localhost", "localhost"] with infra.network.network( hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: network.start_and_join(args) LOG.warning("About to issue transactions until destruction") try: wsm = 5000 while True: LOG.info(f"Trying with writes scaled by {wsm}") network = test(network, args, batch_size=10, write_size_multiplier=wsm) wsm += ( 50000 # Grow very quickly, expect to fail on the second iteration ) except Exception as e: timeout = 10 LOG.info("Large write set caused an exception, as expected") LOG.info(f"Exception was: {e}") LOG.info(f"Polling for {timeout}s for node to terminate") end_time = time.time() + timeout while time.time() < end_time: time.sleep(0.1) exit_code = network.nodes[0].remote.remote.proc.poll() if exit_code is not None: LOG.info(f"Node terminated with exit code {exit_code}") assert exit_code != 0 break if time.time() > end_time: raise TimeoutError( f"Node took longer than {timeout}s to terminate" ) from e network.ignore_errors_on_shutdown()
def run(args): hosts = ["localhost", "localhost", "localhost"] with infra.network.network( hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: network.start_and_join(args) network = test(network, args, batch_size=1) network = test(network, args, batch_size=10) network = test(network, args, batch_size=100) network = test(network, args, batch_size=1000) network = test(network, args, batch_size=1000, write_key_divisor=10) network = test(network, args, batch_size=1000, write_size_multiplier=10) network = test( network, args, batch_size=1000, write_key_divisor=10, write_size_multiplier=10, )
def run(args): txs = app.LoggingTxs() with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) test_add_node_from_backup(network, args) test_add_node(network, args) test_add_node_untrusted_code(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) if args.snapshot_tx_interval is not None: test_add_node_from_snapshot(network, args, copy_ledger_read_only=True)
def run(args): hosts = ["localhost", "localhost", "localhost"] txs = app.LoggingTxs() with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs) as network: network.start_and_join(args) for i in range(args.recovery): # Alternate between recovery with primary change and stable primary-ship if i % 2 == 0: recovered_network = test_share_resilience(network, args) else: recovered_network = test(network, args) network.stop_all_nodes() network = recovered_network LOG.success("Recovery complete on all nodes")
def run(args): txs = app.LoggingTxs("user0") with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs, ) as network: network.start_and_join(args) test_version(network, args) if args.consensus != "bft": test_join_straddling_primary_replacement(network, args) test_node_replacement(network, args) test_add_node_from_backup(network, args) test_add_node(network, args) test_add_node_on_other_curve(network, args) test_retire_backup(network, args) test_add_as_many_pending_nodes(network, args) test_add_node(network, args) test_retire_primary(network, args) test_add_node_with_read_only_ledger(network, args) test_add_node_from_snapshot(network, args) test_add_node_from_snapshot(network, args, from_backup=True) test_add_node_from_snapshot(network, args, copy_ledger_read_only=False) test_node_filter(network, args) test_retiring_nodes_emit_at_most_one_signature(network, args) if args.reconfiguration_type == "2tx": test_learner_catches_up(network, args) test_node_certificates_validity_period(network, args) test_add_node_invalid_validity_period(network, args)
def run(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, ) as network: network.start_and_join(args) txs = app.LoggingTxs() txs.issue( network=network, number_txs=3, ) txs.verify() network = test(network, args) txs.issue( network=network, number_txs=3, ) txs.verify()
def service_startups(args): LOG.info("Starting service with insufficient number of recovery members") args.initial_member_count = 2 args.initial_recovery_member_count = 0 args.initial_operator_count = 1 with infra.network.network(args.nodes, args.binary_dir, pdb=args.pdb) as network: try: network.start_and_join(args) assert False, "Service cannot be opened with no recovery members" except AssertionError: primary, _ = network.find_primary() network.consortium.check_for_service( primary, infra.network.ServiceStatus.OPENING) LOG.success( "Service could not be opened with insufficient number of recovery mmebers" ) LOG.info( "Starting service with a recovery operator member, a non-recovery operator member and a non-recovery non-operator member" ) args.initial_member_count = 3 args.initial_recovery_member_count = 1 args.initial_operator_count = 2 with infra.network.network(args.nodes, args.binary_dir, pdb=args.pdb) as network: network.start_and_join(args) LOG.info( "Starting service with a recovery operator member, a recovery non-operator member and a non-recovery non-operator member" ) args.initial_member_count = 3 args.initial_recovery_member_count = 2 args.initial_operator_count = 1 with infra.network.network(args.nodes, args.binary_dir, pdb=args.pdb) as network: network.start_and_join(args)
def service_startups(args): LOG.info("Starting service with insufficient number of recovery members") args.initial_member_count = 2 args.initial_recovery_member_count = 0 args.initial_operator_count = 1 with infra.network.network(args.nodes, args.binary_dir, pdb=args.pdb) as network: try: network.start_and_join(args) assert False, "Service cannot be opened with no recovery members" except infra.proposal.ProposalNotAccepted as e: assert (e.proposal.state == infra.proposal.ProposalState.OPEN ), e.proposal.state LOG.success( "Service could not be opened with insufficient number of recovery mmebers" ) LOG.info( "Starting service with a recovery operator member, a non-recovery operator member and a non-recovery non-operator member" ) args.initial_member_count = 3 args.initial_recovery_member_count = 1 args.initial_operator_count = 2 with infra.network.network(args.nodes, args.binary_dir, pdb=args.pdb) as network: network.start_and_join(args) LOG.info( "Starting service with a recovery operator member, a recovery non-operator member and a non-recovery non-operator member" ) args.initial_member_count = 3 args.initial_recovery_member_count = 2 args.initial_operator_count = 1 with infra.network.network(args.nodes, args.binary_dir, pdb=args.pdb) as network: network.start_and_join(args)
def recovery_shares_scenario(args): # Members 0 and 1 are recovery members, member 2 isn't args.initial_member_count = 3 args.initial_recovery_member_count = 2 non_recovery_member_id = "member2" # Recovery threshold is initially set to number of recovery members (2) with infra.network.network(args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) # Membership changes trigger re-sharing and re-keying and are # only supported with CFT if args.consensus != "cft": LOG.warning( "Skipping test recovery threshold as consensus is not CFT") return LOG.info("Update recovery shares") assert_recovery_shares_update(True, test_update_recovery_shares, network, args) LOG.info("Non-recovery member does not have a recovery share") primary, _ = network.find_primary() with primary.client(non_recovery_member_id) as mc: r = mc.get("/gov/recovery_share") assert r.status_code == http.HTTPStatus.NOT_FOUND.value assert ( f"Recovery share not found for member {network.consortium.get_member_by_local_id(non_recovery_member_id).service_id}" in r.body.json()["error"]["message"]) # Removing a recovery number is not possible as the number of recovery # members would be under recovery threshold (2) LOG.info("Removing a recovery member should not be possible") try: test_remove_member(network, args, recovery_member=True) assert False, "Removing a recovery member should not be possible" except infra.proposal.ProposalNotAccepted as e: assert e.proposal.state == infra.proposal.ProposalState.FAILED # However, removing a non-recovery member is allowed LOG.info("Removing a non-recovery member is still possible") member_to_remove = network.consortium.get_member_by_local_id( non_recovery_member_id) test_remove_member(network, args, member_to_remove=member_to_remove) LOG.info("Removing an already-removed member succeeds with no effect") test_remove_member(network, args, member_to_remove=member_to_remove) LOG.info("Adding one non-recovery member") assert_recovery_shares_update(False, test_add_member, network, args, recovery_member=False) LOG.info("Adding one recovery member") assert_recovery_shares_update(True, test_add_member, network, args, recovery_member=True) LOG.info("Removing one non-recovery member") assert_recovery_shares_update(False, test_remove_member, network, args, recovery_member=False) LOG.info("Removing one recovery member") assert_recovery_shares_update(True, test_remove_member, network, args, recovery_member=True) LOG.info("Reduce recovery threshold") assert_recovery_shares_update( True, test_set_recovery_threshold, network, args, recovery_threshold=network.consortium.recovery_threshold - 1, ) # Removing a recovery member now succeeds LOG.info("Removing one recovery member") assert_recovery_shares_update(True, test_remove_member, network, args, recovery_member=True) LOG.info("Set recovery threshold to 0 is impossible") exception = infra.proposal.ProposalNotCreated try: test_set_recovery_threshold(network, args, recovery_threshold=0) assert False, "Setting recovery threshold to 0 should not be possible" except exception as e: assert (e.response.status_code == 400 and e.response.body.json()["error"]["code"] == "ProposalFailedToValidate"), e.response.body.text() LOG.info( "Set recovery threshold to more that number of active recovery members is impossible" ) try: test_set_recovery_threshold( network, args, recovery_threshold=len( network.consortium.get_active_recovery_members()) + 1, ) assert ( False ), "Setting recovery threshold to more than number of active recovery members should not be possible" except infra.proposal.ProposalNotAccepted as e: assert e.proposal.state == infra.proposal.ProposalState.FAILED try: test_set_recovery_threshold(network, args, recovery_threshold=256) assert False, "Recovery threshold cannot be set to > 255" except exception as e: assert (e.response.status_code == 400 and e.response.body.json()["error"]["code"] == "ProposalFailedToValidate"), e.response.body.text() try: network.consortium.set_recovery_threshold(primary, recovery_threshold=None) assert False, "Recovery threshold value must be passed as proposal argument" except exception as e: assert (e.response.status_code == 400 and e.response.body.json()["error"]["code"] == "ProposalFailedToValidate"), e.response.body.text() LOG.info( "Setting recovery threshold to current threshold does not update shares" ) assert_recovery_shares_update( False, test_set_recovery_threshold, network, args, recovery_threshold=network.consortium.recovery_threshold, )
def run(args): hosts = args.node or ["localhost"] * 3 if not args.verbose: LOG.remove() LOG.add( sys.stdout, format="<green>[{time:YYYY-MM-DD HH:mm:ss.SSS}]</green> {message}", ) LOG.disable("infra") LOG.disable("ccf") LOG.info(f"Starting {len(hosts)} CCF nodes...") if args.enclave_type == "virtual": LOG.warning("Virtual mode enabled") with infra.network.network(hosts=hosts, binary_directory=args.binary_dir, dbg_nodes=args.debug_nodes) as network: if args.recover: args.label = args.label + "_recover" LOG.info("Recovering network from:") LOG.info(f" - Ledger: {args.ledger_dir}") LOG.info( f" - Defunct network public encryption key: {args.network_enc_pubk}" ) LOG.info(f" - Common directory: {args.common_dir}") network.start_in_recovery(args, args.ledger_dir, args.common_dir) network.recover(args, args.network_enc_pubk) else: network.start_and_join(args) primary, backups = network.find_nodes() LOG.info("Started CCF network with the following nodes:") LOG.info(" Node [{:2d}] = {}:{}".format(primary.node_id, primary.pubhost, primary.rpc_port)) for b in backups: LOG.info(" Node [{:2d}] = {}:{}".format(b.node_id, b.pubhost, b.rpc_port)) # Dump primary info to file for tutorial testing if args.network_info_file is not None: dump_network_info(args.network_info_file, network, primary) LOG.info( f"You can now issue business transactions to the {args.package} application." ) LOG.info( f"Keys and certificates have been copied to the common folder: {network.common_dir}" ) LOG.info( "See https://microsoft.github.io/CCF/users/issue_commands.html for more information." ) LOG.warning("Press Ctrl+C to shutdown the network.") try: while True: time.sleep(60) except KeyboardInterrupt: LOG.info("Stopping all CCF nodes...") LOG.info("All CCF nodes stopped.")
def run(get_command, args): if args.fixed_seed: seed(getpass.getuser()) hosts = args.nodes if not hosts: hosts = ["local://localhost"] * minimum_number_of_local_nodes(args) args.initial_user_count = 3 args.sig_ms_interval = 1000 # Set to cchost default value LOG.info("Starting nodes on {}".format(hosts)) with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) primary, backups = network.find_nodes() command_args = get_command_args(args, get_command) if args.use_jwt: jwt_issuer = infra.jwt_issuer.JwtIssuer("https://example.issuer") jwt_issuer.register(network) jwt = jwt_issuer.issue_jwt() command_args += ["--bearer-token", jwt] nodes_to_send_to = filter_nodes(primary, backups, args.send_tx_to) clients = [] client_hosts = [] if args.one_client_per_backup: if not backups: raise Exception( "--one-client-per-backup was set but no backup was found") client_hosts = ["localhost"] * len(backups) else: if args.client_nodes: client_hosts.extend(args.client_nodes) if args.num_localhost_clients: client_hosts.extend(["localhost"] * int(args.num_localhost_clients)) if not client_hosts: client_hosts = ["localhost"] for client_id, client_host in enumerate(client_hosts): node = nodes_to_send_to[client_id % len(nodes_to_send_to)] remote_client = configure_remote_client(args, client_id, client_host, node, command_args) clients.append(remote_client) if args.network_only: for remote_client in clients: LOG.info( f"Client can be run with: {remote_client.remote.get_cmd()}" ) while True: time.sleep(60) else: for remote_client in clients: remote_client.start() hard_stop_timeout = 90 try: with cimetrics.upload.metrics(complete=False) as metrics: tx_rates = infra.rates.TxRates(primary) start_time = time.time() while True: stop_waiting = True for i, remote_client in enumerate(clients): done = remote_client.check_done() # all the clients need to be done LOG.info( f"Client {i} has {'completed' if done else 'not completed'} running ({time.time() - start_time:.2f}s / {hard_stop_timeout}s)" ) stop_waiting = stop_waiting and done if stop_waiting: break if time.time() > start_time + hard_stop_timeout: raise TimeoutError( f"Client still running after {hard_stop_timeout}s" ) time.sleep(5) tx_rates.get_metrics() for remote_client in clients: perf_result = remote_client.get_result() LOG.success( f"{args.label}/{remote_client.name}: {perf_result}" ) # TODO: Only results for first client are uploaded # https://github.com/microsoft/CCF/issues/1046 if remote_client == clients[0]: LOG.success( f"Uploading results for {remote_client.name}") metrics.put(args.label, perf_result) else: LOG.warning( f"Skipping upload for {remote_client.name}") primary, _ = network.find_primary() with primary.client() as nc: r = nc.get("/node/memory") assert r.status_code == http.HTTPStatus.OK.value results = r.body.json() tx_rates.insert_metrics(**results) # Construct name for heap metric, removing ^ suffix if present heap_peak_metric = args.label if heap_peak_metric.endswith("^"): heap_peak_metric = heap_peak_metric[:-1] heap_peak_metric += "_mem" peak_value = results["peak_allocated_heap_size"] metrics.put(heap_peak_metric, peak_value) LOG.info(f"Rates:\n{tx_rates}") tx_rates.save_results(args.metrics_file) for remote_client in clients: remote_client.stop() except Exception: LOG.error("Stopping clients due to exception") for remote_client in clients: remote_client.stop() raise
def run(args): hosts = ["localhost", "localhost"] with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) primary, _ = network.find_nodes() first_code_id = get_code_id( infra.path.build_lib_path(args.package, args.enclave_type)) with primary.client() as uc: r = uc.get("/node/code") assert r.body.json() == { "versions": [{ "digest": first_code_id, "status": "ACCEPTED" }], }, r.body LOG.info("Adding a new node") new_node = network.create_and_trust_node(args.package, "localhost", args) assert new_node new_code_id = get_code_id( infra.path.build_lib_path(args.patched_file_name, args.enclave_type)) LOG.info(f"Adding a node with unsupported code id {new_code_id}") code_not_found_exception = None try: network.create_and_add_pending_node(args.patched_file_name, "localhost", args, timeout=3) except infra.network.CodeIdNotFound as err: code_not_found_exception = err assert ( code_not_found_exception is not None ), f"Adding a node with unsupported code id {new_code_id} should fail" # Slow quote verification means that any attempt to add a node may cause an election, so confirm primary after adding node primary, _ = network.find_primary() network.consortium.add_new_code(primary, new_code_id) with primary.client() as uc: r = uc.get("/node/code") versions = sorted(r.body.json()["versions"], key=lambda x: x["digest"]) expected = sorted( [ { "digest": first_code_id, "status": "ACCEPTED" }, { "digest": new_code_id, "status": "ACCEPTED" }, ], key=lambda x: x["digest"], ) assert versions == expected, versions new_nodes = set() old_nodes_count = len(network.nodes) new_nodes_count = old_nodes_count + 1 LOG.info( f"Adding more new nodes ({new_nodes_count}) than originally existed ({old_nodes_count})" ) for _ in range(0, new_nodes_count): new_node = network.create_and_trust_node(args.patched_file_name, "localhost", args) assert new_node new_nodes.add(new_node) LOG.info("Stopping all original nodes") old_nodes = set(network.nodes).difference(new_nodes) for node in old_nodes: LOG.debug(f"Stopping old node {node.node_id}") node.stop() new_primary, _ = network.wait_for_new_primary(primary.node_id) LOG.info(f"New_primary is {new_primary.node_id}") LOG.info("Adding another node to the network") new_node = network.create_and_trust_node(args.patched_file_name, "localhost", args) assert new_node network.wait_for_node_commit_sync(args.consensus) LOG.info("Remove first code id") network.consortium.retire_code(new_node, first_code_id) with new_node.client() as uc: r = uc.get("/node/code") versions = sorted(r.body.json()["versions"], key=lambda x: x["digest"]) expected = sorted( [ { "digest": first_code_id, "status": "RETIRED" }, { "digest": new_code_id, "status": "ACCEPTED" }, ], key=lambda x: x["digest"], ) assert versions == expected, versions LOG.info(f"Adding a node with retired code id {first_code_id}") code_not_found_exception = None try: network.create_and_add_pending_node(args.package, "localhost", args, timeout=3) except infra.network.CodeIdRetired as err: code_not_found_exception = err assert ( code_not_found_exception is not None ), f"Adding a node with unsupported code id {new_code_id} should fail" LOG.info("Adding another node with the new code to the network") new_node = network.create_and_trust_node(args.patched_file_name, "localhost", args) assert new_node network.wait_for_node_commit_sync(args.consensus)
def run(args): # This is deliberately 5, because the rest of the test depends on this # to grow a prefix and allow just enough nodes to resume to reach the # desired election result. Conversion to a general f isn't trivial. hosts = ["local://localhost"] * 5 with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) primary, backups = network.find_nodes() # Suspend three of the backups to prevent commit backups[1].suspend() backups[2].suspend() backups[3].stop() committable_txs = [] # Run some transactions that can't be committed now with primary.client("user0") as uc: for i in range(3): committable_txs.append( uc.post("/app/log/private", { "id": 100 + i, "msg": "Hello world" })) last_tx = committable_txs[-1] sig_view, sig_seqno = last_tx.view, last_tx.seqno + 1 with backups[0].client() as bc: wait_for_pending(bc, sig_view, sig_seqno) # Suspend the final backup and run some transactions which only the partitioned # primary hears, which should be discarded by the new primary # NB: We can't guarantee that these will be discarded. Since we can't control # what order the queued actions occur in after resuming, they may be appended # before an election is called. They key assertion is that this primary is able # to rejoin the network whatever happens, even when (in the usual case) they # hold a suffix which has been discarded. backups[0].suspend() post_partition_txs = [] with primary.client("user0") as uc: for i in range(3): post_partition_txs.append( uc.post("/app/log/private", { "id": 100 + i, "msg": "Hello world" })) # Sleep long enough that this primary should be instantly replaced when nodes wake sleep_time = 2 * args.raft_election_timeout_ms / 1000 LOG.info(f"Sleeping {sleep_time}s") time.sleep(sleep_time) # Suspend the primary, resume other backups primary.suspend() backups[0].resume() backups[1].resume() backups[2].resume() new_primary, _ = network.wait_for_new_primary(primary, timeout_multiplier=6) with new_primary.client("user0") as uc: # Check that uncommitted but committable suffix is preserved check_commit = infra.checker.Checker(uc) for tx in committable_txs: check_commit(tx) # Check that new transactions can be committed with new_primary.client("user0") as uc: for i in range(3): r = uc.post("/app/log/private", { "id": 100 + i, "msg": "Hello world" }) assert r.status_code == 200 uc.wait_for_commit(r) # Resume original primary, check that they rejoin correctly, including new transactions primary.resume() network.wait_for_node_commit_sync()
def run(args): hosts = ["localhost"] * (4 if args.consensus == "bft" else 2) os.makedirs(args.schema_dir, exist_ok=True) changed_files = [] methods_with_schema = set() methods_without_schema = set() old_schema = set( os.path.join(dir_path, filename) for dir_path, _, filenames in os.walk(args.schema_dir) for filename in filenames) documents_valid = True all_methods = [] def fetch_schema(client, prefix): api_response = client.get(f"/{prefix}/api") check(api_response, error=lambda status, msg: status == http.HTTPStatus.OK.value) response_body = api_response.body.json() paths = response_body["paths"] all_methods.extend(paths.keys()) # Fetch the schema of each method for method, _ in paths.items(): schema_found = False expected_method_prefix = "/" if method.startswith(expected_method_prefix): method = method[len(expected_method_prefix):] schema_response = client.get( f'/{prefix}/api/schema?method="{method}"') check( schema_response, error=lambda status, msg: status == http.HTTPStatus.OK.value, ) if schema_response.body: for verb, schema_element in schema_response.body.json().items( ): for schema_type in ["params", "result"]: element_name = "{}_schema".format(schema_type) element = schema_element[element_name] target_file = build_schema_file_path( args.schema_dir, verb, method, schema_type) if element is not None and len(element) != 0: try: old_schema.remove(target_file) except KeyError: pass schema_found = True formatted_schema = json.dumps(element, indent=2) os.makedirs(os.path.dirname(target_file), exist_ok=True) with open(target_file, "a+") as f: f.seek(0) previous = f.read() if previous != formatted_schema: LOG.debug("Writing schema to {}".format( target_file)) f.truncate(0) f.seek(0) f.write(formatted_schema) changed_files.append(target_file) else: LOG.debug("Schema matches in {}".format( target_file)) if schema_found: methods_with_schema.add(method) else: methods_without_schema.add(method) formatted_schema = json.dumps(response_body, indent=2) openapi_target_file = os.path.join(args.schema_dir, f"{prefix}_openapi.json") try: old_schema.remove(openapi_target_file) except KeyError: pass with open(openapi_target_file, "a+") as f: f.seek(0) previous = f.read() if previous != formatted_schema: LOG.debug("Writing schema to {}".format(openapi_target_file)) f.truncate(0) f.seek(0) f.write(formatted_schema) changed_files.append(openapi_target_file) else: LOG.debug("Schema matches in {}".format(openapi_target_file)) try: openapi_spec_validator.validate_spec(response_body) except Exception as e: LOG.error(f"Validation of {prefix} schema failed") LOG.error(e) return False return True with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes) as network: network.start_and_join(args) primary, _ = network.find_primary() check = infra.checker.Checker() with primary.client("user0") as user_client: LOG.info("user frontend") if not fetch_schema(user_client, "app"): documents_valid = False with primary.client() as node_client: LOG.info("node frontend") if not fetch_schema(node_client, "node"): documents_valid = False with primary.client("member0") as member_client: LOG.info("member frontend") if not fetch_schema(member_client, "gov"): documents_valid = False made_changes = False if len(old_schema) > 0: LOG.error( "Removing old files which are no longer reported by the service:") for f in old_schema: LOG.error(" " + f) os.remove(f) f_dir = os.path.dirname(f) # Remove empty directories too while not os.listdir(f_dir): os.rmdir(f_dir) f_dir = os.path.dirname(f_dir) made_changes = True if len(changed_files) > 0: LOG.error("Made changes to the following schema files:") for f in changed_files: LOG.error(" " + f) made_changes = True if args.list_all: LOG.info("Discovered methods:") for method in sorted(set(all_methods)): LOG.info(f" {method}") if made_changes or not documents_valid: sys.exit(1)
def run(args): hosts = ["localhost"] with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) primary, others = network.find_nodes() regulators = [ AppUser(network, "FCA", "GB"), AppUser(network, "SEC", "FR") ] banks = [ AppUser(network, f"bank{country}", country) for country in ("US", "GB", "GR", "FR") ] # Give regulators permissions to register regulators and banks for regulator in regulators: proposal_body, _ = ccf.proposal_generator.set_user_data( regulator.ccf_id, { "proposals": { "REGISTER_REGULATORS": True, "REGISTER_BANKS": True } }, ) ( proposal_result, error, ) = network.consortium.get_any_active_member().propose( primary, proposal_body) network.consortium.vote_using_majority(primary, proposal_result["id"]) if args.run_poll: with open("revealed.log", "a+") as stdout: subprocess.Popen( [ "python3", f"{os.path.realpath(os.path.dirname(__file__))}/poll.py", f"--host={primary.host}", f"--port={primary.rpc_port}", f"--regulator-name={regulators[0].name}", f"--bank-name={banks[0].name}", ], stdout=stdout, ) else: LOG.warning("") LOG.warning( "================= Network setup complete, you can run the below command to poll the service. " + "Press enter to continue =================") LOG.warning("") LOG.warning( f"python3 {os.path.realpath(os.path.dirname(__file__))}/poll.py --host={primary.host} --port={primary.rpc_port}" ) LOG.warning("") input("") data = [] with open(args.lua_script, "r") as f: data = f.readlines() scripts = {} scripts["FCA"] = "".join(data) scripts[ "SEC"] = "if tonumber(amt) > 15000 then return true else return false end" for regulator in regulators: with primary.user_client(format="msgpack", user_id=regulator.name) as c: check = infra.checker.Checker() check( c.post( "REG_register", { "regulator_id": regulator.ccf_id, "country": regulator.country, "script": scripts[regulator.name], "name": regulator.name, }, ), result=regulator.ccf_id, ) check( c.post("REG_get", {"id": regulator.ccf_id}), result=[ regulator.country, scripts[regulator.name], regulator.name, ], ) LOG.debug(f"User {regulator} successfully registered as regulator") with primary.user_client(format="msgpack", user_id=regulators[0].name) as c: for bank in banks: check = infra.checker.Checker() check( c.post("BK_register", { "bank_id": bank.ccf_id, "country": bank.country }), result=bank.ccf_id, ) check(c.post("BK_get", {"id": bank.ccf_id}), result=bank.country) LOG.debug(f"User {bank} successfully registered as bank") LOG.success( f"{len(regulators)} regulator and {len(banks)} bank(s) successfully setup" ) tx_id = 0 # Tracks how many transactions have been issued LOG.info( f"Loading scenario file as bank {banks[0].ccf_id} ({banks[0].name})" ) with primary.user_client(format="msgpack", user_id=regulators[0].name) as reg_c: with primary.user_client(format="msgpack", user_id=banks[0].name) as c: with open(args.datafile, newline="") as f: start_time = perf_counter() datafile = csv.DictReader(f) for row in datafile: json_tx = { "src": row["origin"], "dst": row["destination"], "amt": row["amount"], "type": row["type"], "timestamp": strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()), "src_country": row["src_country"], "dst_country": row["dst_country"], } check(c.post("TX_record", json_tx), result=tx_id) print(json.dumps(json_tx)) tx_id += 1 if tx_id % 1000 == 0: elapsed_time = perf_counter() - start_time LOG.info( f"1000 transactions took {elapsed_time}: tx_id: {tx_id}" ) start_time = perf_counter() LOG.success("Scenario file successfully loaded") LOG.warning("Data loading completed, press Enter to shutdown...") input()
def run(get_command, args): if args.fixed_seed: seed(getpass.getuser()) hosts = args.nodes if not hosts: hosts = ["localhost"] * minimum_number_of_local_nodes(args) LOG.info("Starting nodes on {}".format(hosts)) with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) primary, backups = network.find_nodes() command_args = get_command_args(args, get_command) nodes_to_send_to = filter_nodes(primary, backups, args.send_tx_to) clients = [] client_hosts = [] if args.one_client_per_backup: if not backups: raise Exception( "--one-client-per-backup was set but no backup was found") client_hosts = ["localhost"] * len(backups) else: if args.client_nodes: client_hosts.extend(args.client_nodes) if args.num_localhost_clients: client_hosts.extend(["localhost"] * int(args.num_localhost_clients)) if not client_hosts: client_hosts = ["localhost"] for client_id, client_host in enumerate(client_hosts): node = nodes_to_send_to[client_id % len(nodes_to_send_to)] remote_client = configure_remote_client(args, client_id, client_host, node, command_args) clients.append(remote_client) if args.network_only: for remote_client in clients: LOG.info( f"Client can be run with: {remote_client.remote.get_cmd()}" ) while True: time.sleep(60) else: for remote_client in clients: remote_client.start() hard_stop_timeout = 90 try: with cimetrics.upload.metrics(complete=False) as metrics: tx_rates = infra.rates.TxRates(primary) start_time = time.time() while True: stop_waiting = True for i, remote_client in enumerate(clients): done = remote_client.check_done() # all the clients need to be done LOG.info( f"Client {i} has {'completed' if done else 'not completed'} running ({time.time() - start_time:.2f}s / {hard_stop_timeout}s)" ) stop_waiting = stop_waiting and done if stop_waiting: break if time.time() > start_time + hard_stop_timeout: raise TimeoutError( f"Client still running after {hard_stop_timeout}s" ) time.sleep(5) tx_rates.get_metrics() for remote_client in clients: perf_result = remote_client.get_result() LOG.success( f"{args.label}/{remote_client.name}: {perf_result}" ) # TODO: Only results for first client are uploaded # https://github.com/microsoft/CCF/issues/1046 if remote_client == clients[0]: LOG.success( f"Uploading results for {remote_client.name}") metrics.put(args.label, perf_result) else: LOG.warning( f"Skipping upload for {remote_client.name}") LOG.info(f"Rates:\n{tx_rates}") tx_rates.save_results(args.metrics_file) for remote_client in clients: remote_client.stop() except Exception: LOG.error("Stopping clients due to exception") for remote_client in clients: remote_client.stop() raise
def run(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: network.start_and_join(args) primary, _ = network.find_primary() network = test_missing_signature_header(network, args) network = test_corrupted_signature(network, args) LOG.info("Original members can ACK") network.consortium.get_any_active_member().ack(primary) LOG.info("Network cannot be opened twice") try: network.consortium.open_network(primary) except infra.proposal.ProposalNotAccepted as e: assert e.proposal.state == infra.proposal.ProposalState.Failed LOG.info("Proposal to add a new member (with different curve)") ( new_member_proposal, new_member, careful_vote, ) = network.consortium.generate_and_propose_new_member( remote_node=primary, curve=infra.network.ParticipantsCurve(args.participants_curve).next(), ) LOG.info("Check proposal has been recorded in open state") proposals = network.consortium.get_proposals(primary) proposal_entry = next( (p for p in proposals if p.proposal_id == new_member_proposal.proposal_id), None, ) assert proposal_entry assert proposal_entry.state == ProposalState.Open LOG.info("Rest of consortium accept the proposal") network.consortium.vote_using_majority( primary, new_member_proposal, careful_vote ) assert new_member_proposal.state == ProposalState.Accepted # Manually add new member to consortium network.consortium.members.append(new_member) LOG.debug( "Further vote requests fail as the proposal has already been accepted" ) params_error = http.HTTPStatus.BAD_REQUEST.value assert ( network.consortium.get_member_by_id(0) .vote(primary, new_member_proposal, careful_vote) .status_code == params_error ) assert ( network.consortium.get_member_by_id(1) .vote(primary, new_member_proposal, careful_vote) .status_code == params_error ) LOG.debug("Accepted proposal cannot be withdrawn") response = network.consortium.get_member_by_id( new_member_proposal.proposer_id ).withdraw(primary, new_member_proposal) assert response.status_code == params_error LOG.info("New non-active member should get insufficient rights response") try: proposal_trust_0, careful_vote = ccf.proposal_generator.trust_node(0) new_member.propose(primary, proposal_trust_0) assert ( False ), "New non-active member should get insufficient rights response" except infra.proposal.ProposalNotCreated as e: assert e.response.status_code == http.HTTPStatus.FORBIDDEN.value LOG.debug("New member ACK") new_member.ack(primary) LOG.info("New member is now active and send an accept node proposal") trust_node_proposal_0 = new_member.propose(primary, proposal_trust_0) trust_node_proposal_0.vote_for = careful_vote LOG.debug("Members vote to accept the accept node proposal") network.consortium.vote_using_majority( primary, trust_node_proposal_0, careful_vote ) assert trust_node_proposal_0.state == infra.proposal.ProposalState.Accepted LOG.info("New member makes a new proposal") proposal_trust_1, careful_vote = ccf.proposal_generator.trust_node(1) trust_node_proposal = new_member.propose(primary, proposal_trust_1) LOG.debug("Other members (non proposer) are unable to withdraw new proposal") response = network.consortium.get_member_by_id(1).withdraw( primary, trust_node_proposal ) assert response.status_code == http.HTTPStatus.FORBIDDEN.value LOG.debug("Proposer withdraws their proposal") response = new_member.withdraw(primary, trust_node_proposal) assert response.status_code == http.HTTPStatus.OK.value assert trust_node_proposal.state == infra.proposal.ProposalState.Withdrawn proposals = network.consortium.get_proposals(primary) proposal_entry = next( (p for p in proposals if p.proposal_id == trust_node_proposal.proposal_id), None, ) assert proposal_entry assert proposal_entry.state == ProposalState.Withdrawn LOG.debug("Further withdraw proposals fail") response = new_member.withdraw(primary, trust_node_proposal) assert response.status_code == params_error LOG.debug("Further votes fail") response = new_member.vote(primary, trust_node_proposal, careful_vote) assert response.status_code == params_error
def run(args): with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: network.start_and_join(args) network = test_cert_store(network, args)
def run(args): hosts = ["localhost", "localhost", "localhost"] LOG.info(f"setting seed to {args.seed}") random.seed(args.seed) txs = app.LoggingTxs() with infra.network.network( hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs ) as network: network.start_and_join(args) original_nodes = network.get_joined_nodes() view_info = {} suspend.update_view_info(network, view_info) app.test_run_txs(network=network, args=args, num_txs=TOTAL_REQUESTS) suspend.update_view_info(network, view_info) nodes_to_kill = [network.find_any_backup()] nodes_to_keep = [n for n in original_nodes if n not in nodes_to_kill] # check that a new node can catch up after all the requests late_joiner = network.create_and_trust_node(args.package, "localhost", args) nodes_to_keep.append(late_joiner) # some requests to be processed while the late joiner catches up # (no strict checking that these requests are actually being processed simultaneously with the node catchup) app.test_run_txs( network=network, args=args, num_txs=int(TOTAL_REQUESTS / 2), nodes=original_nodes, # doesn't contain late joiner verify=False, # will try to verify for late joiner and it might not be ready yet ) suspend.wait_for_late_joiner(original_nodes[0], late_joiner) # kill the old node(s) and ensure we are still making progress for backup_to_retire in nodes_to_kill: LOG.success(f"Stopping node {backup_to_retire.node_id}") backup_to_retire.stop() # check nodes are ok after we killed one off app.test_run_txs( network=network, args=args, nodes=nodes_to_keep, num_txs=len(nodes_to_keep), timeout=30, ignore_failures=True, # in the event of an early view change due to the late joiner this might # take longer than usual to complete and we don't want the test to break here ) suspend.test_suspend_nodes(network, args, nodes_to_keep) # run txs while nodes get suspended app.test_run_txs( network=network, args=args, num_txs=4 * TOTAL_REQUESTS, timeout=30, ignore_failures=True, # in the event of an early view change due to the late joiner this might # take longer than usual to complete and we don't want the test to break here ) suspend.update_view_info(network, view_info) # check nodes have resumed normal execution before shutting down app.test_run_txs(network=network, args=args, num_txs=len(nodes_to_keep)) # we have asserted that all nodes are caught up # assert that view changes actually did occur assert len(view_info) > 1 LOG.success("----------- views and primaries recorded -----------") for view, primary in view_info.items(): LOG.success(f"view {view} - primary {primary}")
def run(get_command, args): if args.fixed_seed: seed(getpass.getuser()) hosts = args.nodes if not hosts: hosts = ["local://localhost"] * minimum_number_of_local_nodes(args) args.initial_user_count = 3 LOG.info("Starting nodes on {}".format(hosts)) with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: network.start_and_join(args) primary, backups = network.find_nodes() command_args = get_command_args(args, get_command) if args.use_jwt: jwt_key_priv_pem, _ = infra.crypto.generate_rsa_keypair(2048) jwt_cert_pem = infra.crypto.generate_cert(jwt_key_priv_pem) jwt_kid = "my_key_id" jwt_issuer = "https://example.issuer" # Add JWT issuer with tempfile.NamedTemporaryFile(prefix="ccf", mode="w+") as metadata_fp: jwt_cert_der = infra.crypto.cert_pem_to_der(jwt_cert_pem) der_b64 = base64.b64encode(jwt_cert_der).decode("ascii") data = { "issuer": jwt_issuer, "jwks": { "keys": [{ "kty": "RSA", "kid": jwt_kid, "x5c": [der_b64] }] }, } json.dump(data, metadata_fp) metadata_fp.flush() network.consortium.set_jwt_issuer(primary, metadata_fp.name) jwt = infra.crypto.create_jwt({}, jwt_key_priv_pem, jwt_kid) command_args += ["--bearer-token", jwt] nodes_to_send_to = filter_nodes(primary, backups, args.send_tx_to) clients = [] client_hosts = [] if args.one_client_per_backup: if not backups: raise Exception( "--one-client-per-backup was set but no backup was found") client_hosts = ["localhost"] * len(backups) else: if args.client_nodes: client_hosts.extend(args.client_nodes) if args.num_localhost_clients: client_hosts.extend(["localhost"] * int(args.num_localhost_clients)) if not client_hosts: client_hosts = ["localhost"] for client_id, client_host in enumerate(client_hosts): node = nodes_to_send_to[client_id % len(nodes_to_send_to)] remote_client = configure_remote_client(args, client_id, client_host, node, command_args) clients.append(remote_client) if args.network_only: for remote_client in clients: LOG.info( f"Client can be run with: {remote_client.remote.get_cmd()}" ) while True: time.sleep(60) else: for remote_client in clients: remote_client.start() hard_stop_timeout = 90 try: with cimetrics.upload.metrics(complete=False) as metrics: tx_rates = infra.rates.TxRates(primary) start_time = time.time() while True: stop_waiting = True for i, remote_client in enumerate(clients): done = remote_client.check_done() # all the clients need to be done LOG.info( f"Client {i} has {'completed' if done else 'not completed'} running ({time.time() - start_time:.2f}s / {hard_stop_timeout}s)" ) stop_waiting = stop_waiting and done if stop_waiting: break if time.time() > start_time + hard_stop_timeout: raise TimeoutError( f"Client still running after {hard_stop_timeout}s" ) time.sleep(5) tx_rates.get_metrics() for remote_client in clients: perf_result = remote_client.get_result() LOG.success( f"{args.label}/{remote_client.name}: {perf_result}" ) # TODO: Only results for first client are uploaded # https://github.com/microsoft/CCF/issues/1046 if remote_client == clients[0]: LOG.success( f"Uploading results for {remote_client.name}") metrics.put(args.label, perf_result) else: LOG.warning( f"Skipping upload for {remote_client.name}") primary, _ = network.find_primary() with primary.client() as nc: r = nc.get("/node/memory") assert r.status_code == http.HTTPStatus.OK.value results = r.body.json() tx_rates.insert_metrics(**results) # Construct name for heap metric, removing ^ suffix if present heap_peak_metric = args.label if heap_peak_metric.endswith("^"): heap_peak_metric = heap_peak_metric[:-1] heap_peak_metric += "_mem" peak_value = results["peak_allocated_heap_size"] metrics.put(heap_peak_metric, peak_value) LOG.info(f"Rates:\n{tx_rates}") tx_rates.save_results(args.metrics_file) for remote_client in clients: remote_client.stop() except Exception: LOG.error("Stopping clients due to exception") for remote_client in clients: remote_client.stop() raise
def run(args): hosts = ["localhost"] * (4 if args.consensus == "pbft" else 1) with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: check = infra.checker.Checker() network.start_and_join(args) primary, _ = network.find_nodes() primary_pid = primary.remote.remote.proc.pid num_fds = psutil.Process(primary_pid).num_fds() max_fds = num_fds + 150 LOG.success(f"{primary_pid} has {num_fds} open file descriptors") resource.prlimit(primary_pid, resource.RLIMIT_NOFILE, (max_fds, max_fds)) LOG.success(f"set max fds to {max_fds} on {primary_pid}") nb_conn = (max_fds - num_fds) * 2 clients = [] with contextlib.ExitStack() as es: LOG.success(f"Creating {nb_conn} clients") for i in range(nb_conn): try: clients.append(es.enter_context(primary.client("user0"))) LOG.info(f"Created client {i}") except OSError: LOG.error(f"Failed to create client {i}") # Creating clients may not actually create connections/fds. Send messages until we run out of fds for i, c in enumerate(clients): if psutil.Process(primary_pid).num_fds() >= max_fds: LOG.warning(f"Reached fd limit at client {i}") break LOG.info(f"Sending as client {i}") check(c.post("/app/log/private", { "id": 42, "msg": "foo" }), result=True) try: clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"}) except Exception: pass else: assert False, "Expected error due to fd limit" num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") with contextlib.ExitStack() as es: to_create = max_fds - num_fds + 1 LOG.success(f"Creating {to_create} clients") for i in range(to_create): clients.append(es.enter_context(primary.client("user0"))) LOG.info(f"Created client {i}") for i, c in enumerate(clients): if psutil.Process(primary_pid).num_fds() >= max_fds: LOG.warning(f"Reached fd limit at client {i}") break LOG.info(f"Sending as client {i}") check(c.post("/app/log/private", { "id": 42, "msg": "foo" }), result=True) try: clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"}) except Exception: pass else: assert False, "Expected error due to fd limit" num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors") LOG.info("Disconnecting clients") clients = [] time.sleep(1) num_fds = psutil.Process(primary_pid).num_fds() LOG.success( f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
def run(args): # SNIPPET_START: parsing with open(args.scenario) as f: scenario = json.load(f) hosts = scenario.get("hosts", ["localhost", "localhost"]) if args.consensus == "pbft": hosts = ["localhost"] * 3 args.package = scenario["package"] # SNIPPET_END: parsing scenario_dir = os.path.dirname(args.scenario) # SNIPPET_START: create_network with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes) as network: network.start_and_join(args) # SNIPPET_END: create_network primary, backups = network.find_nodes() with primary.client() as mc: check = infra.checker.Checker() check_commit = infra.checker.Checker(mc) for connection in scenario["connections"]: with (primary.client("user0") if not connection.get("on_backup") else random.choice(backups).client("user0")) as client: txs = connection.get("transactions", []) for include_file in connection.get("include", []): with open(os.path.join(scenario_dir, include_file)) as f: txs += json.load(f) for tx in txs: r = client.call( tx["method"], body=tx["body"], http_verb=tx.get("verb", "POST"), ) if tx.get("expected_error") is not None: check( r, error=lambda status, msg, transaction=tx: status # pylint: disable=no-member == http.HTTPStatus( transaction.get("expected_error")).value, ) elif tx.get("expected_result") is not None: check_commit(r, result=tx.get("expected_result")) else: check_commit(r, result=lambda res: res is not None) network.wait_for_node_commit_sync(args.consensus) if args.network_only: LOG.info("Keeping network alive with the following nodes:") LOG.info(" Primary = {}:{}".format(primary.pubhost, primary.rpc_port)) for i, f in enumerate(backups): LOG.info(" Backup[{}] = {}:{}".format(i, f.pubhost, f.rpc_port)) input("Press Enter to shutdown...")
def run(args, additional_attack_args): # Test that vegeta is available subprocess.run([VEGETA_BIN, "-version"], capture_output=True, check=True) with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, ) as network: network.start_and_join(args) primary, _ = network.find_primary() primary_hostname = f"{primary.pubhost}:{primary.pubport}" vegeta_targets = "vegeta_targets" with open(vegeta_targets, "w") as f: for i in range(10): TargetGenerator.write_vegeta_target_line( f, primary_hostname, "/app/log/private", body={"id": i, "msg": f"Private message: {i}"}, ) for i in range(10): TargetGenerator.write_vegeta_target_line( f, primary_hostname, f"/app/log/private?id={i}", method="GET" ) for i in range(10): TargetGenerator.write_vegeta_target_line( f, primary_hostname, "/app/log/public", body={"id": i, "msg": f"Public message: {i}"}, ) for i in range(10): TargetGenerator.write_vegeta_target_line( f, primary_hostname, f"/app/log/public?id={i}", method="GET" ) attack_cmd = [VEGETA_BIN, "attack"] attack_cmd += ["--targets", vegeta_targets] attack_cmd += ["--format", "json"] attack_cmd += ["--duration", "10s"] sa = primary.session_auth("user0") attack_cmd += ["--cert", sa["session_auth"].cert] attack_cmd += ["--key", sa["session_auth"].key] attack_cmd += ["--root-certs", sa["ca"]] attack_cmd += additional_attack_args attack_cmd_s = " ".join(attack_cmd) LOG.warning(f"Starting: {attack_cmd_s}") vegeta_run = subprocess.Popen(attack_cmd, stdout=subprocess.PIPE) tee_split = subprocess.Popen( ["tee", "vegeta_results.bin"], stdin=vegeta_run.stdout, stdout=subprocess.PIPE, ) report_cmd = [VEGETA_BIN, "report", "--every", "5s"] vegeta_report = subprocess.Popen(report_cmd, stdin=tee_split.stdout) # Start a second thread which will print the primary's memory stats at regular intervals shutdown_event = threading.Event() memory_thread = threading.Thread( target=print_memory_stats, args=(primary, shutdown_event) ) memory_thread.start() LOG.info("Waiting for completion...") vegeta_report.communicate() LOG.info("Shutting down...") shutdown_event.set() memory_thread.join() LOG.success("Done!")
def run(args): # Three nodes minimum to make sure that the raft network can still make progress # if one node stops hosts = ["localhost"] * (4 if args.consensus == "pbft" else 3) with infra.network.network(hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb) as network: check = infra.checker.Checker() network.start_and_join(args) current_view = None # Number of nodes F to stop until network cannot make progress nodes_to_stop = math.ceil(len(hosts) / 2) if args.consensus == "pbft": nodes_to_stop = math.ceil(len(hosts) / 3) for _ in range(nodes_to_stop): # Note that for the first iteration, the primary is known in advance anyway LOG.debug("Find freshly elected primary") # After a view change in pbft, finding the new primary takes longer primary, current_view = network.find_primary( timeout=(30 if args.consensus == "pbft" else 3)) LOG.debug( "Commit new transactions, primary:{}, current_view:{}".format( primary.node_id, current_view)) with primary.client("user0") as c: res = c.post( "/app/log/private", { "id": current_view, "msg": "This log is committed in view {}".format( current_view), }, ) check(res, result=True) seqno = res.seqno LOG.debug("Waiting for transaction to be committed by all nodes") wait_for_seqno_to_commit(seqno, current_view, network.get_joined_nodes()) test_kill_primary(network, args, find_new_primary=False) # More than F nodes have been stopped, trying to commit any message LOG.debug("No progress can be made as more than {} nodes have stopped". format(nodes_to_stop)) try: primary, _ = network.find_primary() assert False, "Primary should not be found" except infra.network.PrimaryNotFound: pass LOG.success( f"As expected, primary could not be found after election duration ({network.election_duration}s)." ) LOG.success("Test ended successfully.")
def run(args): os.makedirs(args.schema_dir, exist_ok=True) changed_files = [] old_schema = set( os.path.join(dir_path, filename) for dir_path, _, filenames in os.walk(args.schema_dir) for filename in filenames) documents_valid = True all_methods = [] def fetch_schema(client, prefix): api_response = client.get(f"/{prefix}/api") check(api_response, error=lambda status, msg: status == http.HTTPStatus.OK.value) response_body = api_response.body.json() paths = response_body["paths"] all_methods.extend(paths.keys()) formatted_schema = json.dumps(response_body, indent=2) openapi_target_file = os.path.join(args.schema_dir, f"{prefix}_openapi.json") try: old_schema.remove(openapi_target_file) except KeyError: pass with open(openapi_target_file, "a+") as f: f.seek(0) previous = f.read() if previous != formatted_schema: LOG.debug("Writing schema to {}".format(openapi_target_file)) f.truncate(0) f.seek(0) f.write(formatted_schema) changed_files.append(openapi_target_file) else: LOG.debug("Schema matches in {}".format(openapi_target_file)) try: openapi_spec_validator.validate_spec(response_body) except Exception as e: LOG.error(f"Validation of {prefix} schema failed") LOG.error(e) return False return True with infra.network.network(args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes) as network: network.start_and_join(args) primary, _ = network.find_primary() check = infra.checker.Checker() with primary.client("user0") as user_client: LOG.info("user frontend") if not fetch_schema(user_client, "app"): documents_valid = False with primary.client() as node_client: LOG.info("node frontend") if not fetch_schema(node_client, "node"): documents_valid = False with primary.client("member0") as member_client: LOG.info("member frontend") if not fetch_schema(member_client, "gov"): documents_valid = False made_changes = False if len(old_schema) > 0: LOG.error( "Removing old files which are no longer reported by the service:") for f in old_schema: LOG.error(" " + f) os.remove(f) f_dir = os.path.dirname(f) # Remove empty directories too while not os.listdir(f_dir): os.rmdir(f_dir) f_dir = os.path.dirname(f_dir) made_changes = True if len(changed_files) > 0: LOG.error("Made changes to the following schema files:") for f in changed_files: LOG.error(" " + f) made_changes = True if args.list_all: LOG.info("Discovered methods:") for method in sorted(set(all_methods)): LOG.info(f" {method}") if made_changes or not documents_valid: sys.exit(1)
def run(args): hosts = args.node or DEFAULT_NODES if not args.verbose: LOG.remove() LOG.add( sys.stdout, format="<green>[{time:HH:mm:ss.SSS}]</green> {message}", ) LOG.disable("infra") LOG.disable("ccf") LOG.info( f"Starting {len(hosts)} CCF node{'s' if len(hosts) > 1 else ''}...") if args.enclave_type == "virtual": LOG.warning("Virtual mode enabled") with infra.network.network( hosts=hosts, binary_directory=args.binary_dir, library_directory=args.library_dir, dbg_nodes=args.debug_nodes, ) as network: if args.recover: args.label = args.label + "_recover" LOG.info("Recovering network from:") LOG.info(f" - Common directory: {args.common_dir}") LOG.info(f" - Ledger: {args.ledger_dir}") if args.snapshot_dir: LOG.info(f" - Snapshots: {args.snapshot_dir}") else: LOG.warning( "No available snapshot to recover from. Entire transaction history will be replayed." ) network.start_in_recovery( args, args.ledger_dir, snapshot_dir=args.snapshot_dir, common_dir=args.common_dir, ) network.recover(args) else: network.start_and_join(args) primary, backups = network.find_nodes() max_len = len(str(len(backups))) # To be sure, confirm that the app frontend is open on each node for node in [primary, *backups]: with node.client("user0") as c: if args.verbose: r = c.get("/app/commit") else: r = c.get("/app/commit", log_capture=[]) assert r.status_code == http.HTTPStatus.OK, r.status_code def pad_node_id(nid): return (f"{{:{max_len}d}}").format(nid) LOG.info("Started CCF network with the following nodes:") LOG.info(" Node [{}] = https://{}:{}".format( pad_node_id(primary.node_id), primary.pubhost, primary.rpc_port)) for b in backups: LOG.info(" Node [{}] = https://{}:{}".format( pad_node_id(b.node_id), b.pubhost, b.rpc_port)) LOG.info( f"You can now issue business transactions to the {args.package} application." ) LOG.info( f"Keys and certificates have been copied to the common folder: {network.common_dir}" ) LOG.info( "See https://microsoft.github.io/CCF/master/users/issue_commands.html for more information." ) LOG.warning("Press Ctrl+C to shutdown the network.") try: while True: time.sleep(60) except KeyboardInterrupt: LOG.info("Stopping all CCF nodes...") LOG.info("All CCF nodes stopped.")
def run(args): chosen_suite = [] if not args.test_suite: args.test_suite = ["all"] for choice in args.test_suite: try: chosen_suite.extend(s.suites[choice]) except KeyError as e: raise ValueError(f"Unhandled choice: {choice}") from e seed = None if os.getenv("SHUFFLE_SUITE"): seed = os.getenv("SHUFFLE_SUITE_SEED") if seed is None: seed = time.time() seed = int(seed) LOG.success(f"Shuffling full suite with seed {seed}") random.seed(seed) random.shuffle(chosen_suite) s.validate_tests_signature(chosen_suite) if args.enforce_reqs is False: LOG.warning("Test requirements will be ignored") txs = app.LoggingTxs() network = infra.network.Network(args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, txs=txs) network.start_and_join(args) LOG.info( f"Running {len(chosen_suite)} tests for {args.test_duration} seconds") run_tests = {} success = True elapsed = args.test_duration if args.filter is not None: filter_re = re.compile(args.filter) def filter_fun(x): return filter_re is None or filter_re.match(x[1].__name__) tests_to_run = filter(filter_fun, enumerate(chosen_suite)) else: tests_to_run = enumerate(chosen_suite) for i, test in tests_to_run: status = None reason = None if elapsed <= 0: LOG.warning( f"Test duration time ({args.test_duration} seconds) is up!") break try: LOG.debug(f"Running {s.test_name(test)}...") test_time_before = time.time() # Actually run the test new_network = test(network, args) status = TestStatus.success except reqs.TestRequirementsNotMet as ce: LOG.warning(f"Test requirements for {s.test_name(test)} not met") status = TestStatus.skipped reason = str(ce) new_network = network except Exception: LOG.exception(f"Test {s.test_name(test)} failed") status = TestStatus.failure new_network = network test_elapsed = time.time() - test_time_before # Construct test report run_tests[i] = { "name": s.test_name(test), "status": status.name, "elapsed (s)": round(test_elapsed, 2), "memory": mem_stats(new_network), } if reason is not None: run_tests[i]["reason"] = reason # If the test function did not return a network, it is not possible to continue if new_network is None: raise ValueError( f"Network returned by {s.test_name(test)} is None") # If the network was changed (e.g. recovery test), use the new network from now on if new_network != network: network = new_network LOG.debug(f"Test {s.test_name(test)} took {test_elapsed:.2f} secs") # For now, if a test fails, the entire test suite if stopped if status is TestStatus.failure: success = False break elapsed -= test_elapsed network.stop_all_nodes() if success: LOG.success( f"Full suite passed. Ran {len(run_tests)}/{len(chosen_suite)}") else: LOG.error(f"Suite failed. Ran {len(run_tests)}/{len(chosen_suite)}") if seed: LOG.info(f"Full suite was shuffled with seed: {seed}") for idx, test in run_tests.items(): status = test["status"] if status == TestStatus.success.name: log_fn = LOG.success elif status == TestStatus.skipped.name: log_fn = LOG.warning else: log_fn = LOG.error log_fn(f"Test #{idx}:\n{json.dumps(test, indent=4)}") if not success: sys.exit(1)
def run(args): # Keep track of how many propose, vote and withdraw are issued in this test proposals_issued = 0 votes_issued = 0 withdrawals_issued = 0 with infra.network.network( args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb ) as network: network.start_and_join(args) primary, _ = network.find_primary() ledger_directory = network.find_primary()[0].remote.ledger_path() ledger = ccf.ledger.Ledger(ledger_directory) ( original_proposals, original_votes, original_withdrawals, ) = count_governance_operations(ledger) LOG.info("Add new member proposal (implicit vote)") ( new_member_proposal, _, careful_vote, ) = network.consortium.generate_and_propose_new_member( primary, curve=infra.network.ParticipantsCurve.secp256k1 ) proposals_issued += 1 LOG.info("2/3 members accept the proposal") p = network.consortium.vote_using_majority( primary, new_member_proposal, careful_vote ) votes_issued += p.votes_for assert new_member_proposal.state == infra.proposal.ProposalState.Accepted LOG.info("Create new proposal but withdraw it before it is accepted") new_member_proposal, _, _ = network.consortium.generate_and_propose_new_member( primary, curve=infra.network.ParticipantsCurve.secp256k1 ) proposals_issued += 1 with primary.client() as c: response = network.consortium.get_member_by_id( new_member_proposal.proposer_id ).withdraw(primary, new_member_proposal) infra.checker.Checker(c)(response) assert response.status_code == http.HTTPStatus.OK.value assert response.body.json()["state"] == ProposalState.Withdrawn.value withdrawals_issued += 1 # Refresh ledger to beginning ledger = ccf.ledger.Ledger(ledger_directory) ( final_proposals, final_votes, final_withdrawals, ) = count_governance_operations(ledger) assert ( final_proposals == original_proposals + proposals_issued ), f"Unexpected number of propose operations recorded in the ledger (expected {original_proposals + proposals_issued}, found {final_proposals})" assert ( final_votes == original_votes + votes_issued ), f"Unexpected number of vote operations recorded in the ledger (expected {original_votes + votes_issued}, found {final_votes})" assert ( final_withdrawals == original_withdrawals + withdrawals_issued ), f"Unexpected number of withdraw operations recorded in the ledger (expected {original_withdrawals + withdrawals_issued}, found {final_withdrawals})"