Exemplo n.º 1
0
def run(args):
    with infra.network.network(
        args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        network.start_and_join(args)
        primary, _ = network.find_primary()

        cmd = [
            "python",
            args.client_tutorial,
            network.common_dir,
        ]
        rc = infra.proc.ccall(*cmd).returncode
        assert rc == 0, f"Failed to run tutorial script: {rc}"

    cmd = [
        "python",
        args.ledger_tutorial,
        primary.get_ledger()[1],
    ]
    rc = infra.proc.ccall(*cmd).returncode
    assert rc == 0, f"Failed to run tutorial script: {rc}"
Exemplo n.º 2
0
def run(args):
    args.jwt_key_refresh_interval_s = 1

    with infra.network.network(args.nodes,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        network = test_jwt_without_key_policy(network, args)
        network = test_jwt_with_sgx_key_policy(network, args)
        network = test_jwt_with_sgx_key_filter(network, args)
        network = test_jwt_key_auto_refresh(network, args)

    args.jwt_key_refresh_interval_s = 100000
    with infra.network.network(args.nodes,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        network = test_jwt_key_initial_refresh(network, args)
Exemplo n.º 3
0
def run_to_destruction(args):
    hosts = ["localhost", "localhost", "localhost"]

    with infra.network.network(
        hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        network.start_and_join(args)

        LOG.warning("About to issue transactions until destruction")
        try:
            wsm = 5000
            while True:
                LOG.info(f"Trying with writes scaled by {wsm}")
                network = test(network, args, batch_size=10, write_size_multiplier=wsm)
                wsm += (
                    50000  # Grow very quickly, expect to fail on the second iteration
                )
        except Exception as e:
            timeout = 10

            LOG.info("Large write set caused an exception, as expected")
            LOG.info(f"Exception was: {e}")
            LOG.info(f"Polling for {timeout}s for node to terminate")

            end_time = time.time() + timeout
            while time.time() < end_time:
                time.sleep(0.1)
                exit_code = network.nodes[0].remote.remote.proc.poll()
                if exit_code is not None:
                    LOG.info(f"Node terminated with exit code {exit_code}")
                    assert exit_code != 0
                    break

            if time.time() > end_time:
                raise TimeoutError(
                    f"Node took longer than {timeout}s to terminate"
                ) from e

            network.ignore_errors_on_shutdown()
Exemplo n.º 4
0
def run(args):
    hosts = ["localhost", "localhost", "localhost"]

    with infra.network.network(
        hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        network.start_and_join(args)

        network = test(network, args, batch_size=1)
        network = test(network, args, batch_size=10)
        network = test(network, args, batch_size=100)
        network = test(network, args, batch_size=1000)

        network = test(network, args, batch_size=1000, write_key_divisor=10)
        network = test(network, args, batch_size=1000, write_size_multiplier=10)
        network = test(
            network,
            args,
            batch_size=1000,
            write_key_divisor=10,
            write_size_multiplier=10,
        )
Exemplo n.º 5
0
def run(args):
    txs = app.LoggingTxs()
    with infra.network.network(
        args.nodes,
        args.binary_dir,
        args.debug_nodes,
        args.perf_nodes,
        pdb=args.pdb,
        txs=txs,
    ) as network:
        network.start_and_join(args)

        test_add_node_from_backup(network, args)
        test_add_node(network, args)
        test_add_node_untrusted_code(network, args)
        test_retire_backup(network, args)
        test_add_as_many_pending_nodes(network, args)
        test_add_node(network, args)
        test_retire_primary(network, args)

        if args.snapshot_tx_interval is not None:
            test_add_node_from_snapshot(network, args, copy_ledger_read_only=True)
Exemplo n.º 6
0
def run(args):
    hosts = ["localhost", "localhost", "localhost"]

    txs = app.LoggingTxs()

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb,
                               txs=txs) as network:
        network.start_and_join(args)

        for i in range(args.recovery):
            # Alternate between recovery with primary change and stable primary-ship
            if i % 2 == 0:
                recovered_network = test_share_resilience(network, args)
            else:
                recovered_network = test(network, args)
            network.stop_all_nodes()
            network = recovered_network
            LOG.success("Recovery complete on all nodes")
Exemplo n.º 7
0
def run(args):
    txs = app.LoggingTxs("user0")
    with infra.network.network(
        args.nodes,
        args.binary_dir,
        args.debug_nodes,
        args.perf_nodes,
        pdb=args.pdb,
        txs=txs,
    ) as network:
        network.start_and_join(args)

        test_version(network, args)

        if args.consensus != "bft":
            test_join_straddling_primary_replacement(network, args)
            test_node_replacement(network, args)
            test_add_node_from_backup(network, args)
            test_add_node(network, args)
            test_add_node_on_other_curve(network, args)
            test_retire_backup(network, args)
            test_add_as_many_pending_nodes(network, args)
            test_add_node(network, args)
            test_retire_primary(network, args)
            test_add_node_with_read_only_ledger(network, args)

            test_add_node_from_snapshot(network, args)
            test_add_node_from_snapshot(network, args, from_backup=True)
            test_add_node_from_snapshot(network, args, copy_ledger_read_only=False)

            test_node_filter(network, args)
            test_retiring_nodes_emit_at_most_one_signature(network, args)

        if args.reconfiguration_type == "2tx":
            test_learner_catches_up(network, args)

        test_node_certificates_validity_period(network, args)
        test_add_node_invalid_validity_period(network, args)
Exemplo n.º 8
0
def run(args):
    with infra.network.network(
            args.nodes,
            args.binary_dir,
            args.debug_nodes,
            args.perf_nodes,
            pdb=args.pdb,
    ) as network:
        network.start_and_join(args)

        txs = app.LoggingTxs()
        txs.issue(
            network=network,
            number_txs=3,
        )
        txs.verify()

        network = test(network, args)

        txs.issue(
            network=network,
            number_txs=3,
        )
        txs.verify()
Exemplo n.º 9
0
def service_startups(args):
    LOG.info("Starting service with insufficient number of recovery members")
    args.initial_member_count = 2
    args.initial_recovery_member_count = 0
    args.initial_operator_count = 1
    with infra.network.network(args.nodes, args.binary_dir,
                               pdb=args.pdb) as network:
        try:
            network.start_and_join(args)
            assert False, "Service cannot be opened with no recovery members"
        except AssertionError:
            primary, _ = network.find_primary()
            network.consortium.check_for_service(
                primary, infra.network.ServiceStatus.OPENING)
            LOG.success(
                "Service could not be opened with insufficient number of recovery mmebers"
            )

    LOG.info(
        "Starting service with a recovery operator member, a non-recovery operator member and a non-recovery non-operator member"
    )
    args.initial_member_count = 3
    args.initial_recovery_member_count = 1
    args.initial_operator_count = 2
    with infra.network.network(args.nodes, args.binary_dir,
                               pdb=args.pdb) as network:
        network.start_and_join(args)

    LOG.info(
        "Starting service with a recovery operator member, a recovery non-operator member and a non-recovery non-operator member"
    )
    args.initial_member_count = 3
    args.initial_recovery_member_count = 2
    args.initial_operator_count = 1
    with infra.network.network(args.nodes, args.binary_dir,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
Exemplo n.º 10
0
def service_startups(args):
    LOG.info("Starting service with insufficient number of recovery members")
    args.initial_member_count = 2
    args.initial_recovery_member_count = 0
    args.initial_operator_count = 1
    with infra.network.network(args.nodes, args.binary_dir,
                               pdb=args.pdb) as network:
        try:
            network.start_and_join(args)
            assert False, "Service cannot be opened with no recovery members"
        except infra.proposal.ProposalNotAccepted as e:
            assert (e.proposal.state == infra.proposal.ProposalState.OPEN
                    ), e.proposal.state
            LOG.success(
                "Service could not be opened with insufficient number of recovery mmebers"
            )

    LOG.info(
        "Starting service with a recovery operator member, a non-recovery operator member and a non-recovery non-operator member"
    )
    args.initial_member_count = 3
    args.initial_recovery_member_count = 1
    args.initial_operator_count = 2
    with infra.network.network(args.nodes, args.binary_dir,
                               pdb=args.pdb) as network:
        network.start_and_join(args)

    LOG.info(
        "Starting service with a recovery operator member, a recovery non-operator member and a non-recovery non-operator member"
    )
    args.initial_member_count = 3
    args.initial_recovery_member_count = 2
    args.initial_operator_count = 1
    with infra.network.network(args.nodes, args.binary_dir,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
Exemplo n.º 11
0
def recovery_shares_scenario(args):
    # Members 0 and 1 are recovery members, member 2 isn't
    args.initial_member_count = 3
    args.initial_recovery_member_count = 2
    non_recovery_member_id = "member2"

    # Recovery threshold is initially set to number of recovery members (2)
    with infra.network.network(args.nodes,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)

        # Membership changes trigger re-sharing and re-keying and are
        # only supported with CFT
        if args.consensus != "cft":
            LOG.warning(
                "Skipping test recovery threshold as consensus is not CFT")
            return

        LOG.info("Update recovery shares")
        assert_recovery_shares_update(True, test_update_recovery_shares,
                                      network, args)

        LOG.info("Non-recovery member does not have a recovery share")
        primary, _ = network.find_primary()
        with primary.client(non_recovery_member_id) as mc:
            r = mc.get("/gov/recovery_share")
            assert r.status_code == http.HTTPStatus.NOT_FOUND.value
            assert (
                f"Recovery share not found for member {network.consortium.get_member_by_local_id(non_recovery_member_id).service_id}"
                in r.body.json()["error"]["message"])

        # Removing a recovery number is not possible as the number of recovery
        # members would be under recovery threshold (2)
        LOG.info("Removing a recovery member should not be possible")
        try:
            test_remove_member(network, args, recovery_member=True)
            assert False, "Removing a recovery member should not be possible"
        except infra.proposal.ProposalNotAccepted as e:
            assert e.proposal.state == infra.proposal.ProposalState.FAILED

        # However, removing a non-recovery member is allowed
        LOG.info("Removing a non-recovery member is still possible")
        member_to_remove = network.consortium.get_member_by_local_id(
            non_recovery_member_id)
        test_remove_member(network, args, member_to_remove=member_to_remove)

        LOG.info("Removing an already-removed member succeeds with no effect")
        test_remove_member(network, args, member_to_remove=member_to_remove)

        LOG.info("Adding one non-recovery member")
        assert_recovery_shares_update(False,
                                      test_add_member,
                                      network,
                                      args,
                                      recovery_member=False)
        LOG.info("Adding one recovery member")
        assert_recovery_shares_update(True,
                                      test_add_member,
                                      network,
                                      args,
                                      recovery_member=True)
        LOG.info("Removing one non-recovery member")
        assert_recovery_shares_update(False,
                                      test_remove_member,
                                      network,
                                      args,
                                      recovery_member=False)
        LOG.info("Removing one recovery member")
        assert_recovery_shares_update(True,
                                      test_remove_member,
                                      network,
                                      args,
                                      recovery_member=True)

        LOG.info("Reduce recovery threshold")
        assert_recovery_shares_update(
            True,
            test_set_recovery_threshold,
            network,
            args,
            recovery_threshold=network.consortium.recovery_threshold - 1,
        )

        # Removing a recovery member now succeeds
        LOG.info("Removing one recovery member")
        assert_recovery_shares_update(True,
                                      test_remove_member,
                                      network,
                                      args,
                                      recovery_member=True)

        LOG.info("Set recovery threshold to 0 is impossible")
        exception = infra.proposal.ProposalNotCreated
        try:
            test_set_recovery_threshold(network, args, recovery_threshold=0)
            assert False, "Setting recovery threshold to 0 should not be possible"
        except exception as e:
            assert (e.response.status_code == 400
                    and e.response.body.json()["error"]["code"]
                    == "ProposalFailedToValidate"), e.response.body.text()

        LOG.info(
            "Set recovery threshold to more that number of active recovery members is impossible"
        )
        try:
            test_set_recovery_threshold(
                network,
                args,
                recovery_threshold=len(
                    network.consortium.get_active_recovery_members()) + 1,
            )
            assert (
                False
            ), "Setting recovery threshold to more than number of active recovery members should not be possible"
        except infra.proposal.ProposalNotAccepted as e:
            assert e.proposal.state == infra.proposal.ProposalState.FAILED

        try:
            test_set_recovery_threshold(network, args, recovery_threshold=256)
            assert False, "Recovery threshold cannot be set to > 255"
        except exception as e:
            assert (e.response.status_code == 400
                    and e.response.body.json()["error"]["code"]
                    == "ProposalFailedToValidate"), e.response.body.text()

        try:
            network.consortium.set_recovery_threshold(primary,
                                                      recovery_threshold=None)
            assert False, "Recovery threshold value must be passed as proposal argument"
        except exception as e:
            assert (e.response.status_code == 400
                    and e.response.body.json()["error"]["code"]
                    == "ProposalFailedToValidate"), e.response.body.text()

        LOG.info(
            "Setting recovery threshold to current threshold does not update shares"
        )
        assert_recovery_shares_update(
            False,
            test_set_recovery_threshold,
            network,
            args,
            recovery_threshold=network.consortium.recovery_threshold,
        )
Exemplo n.º 12
0
def run(args):
    hosts = args.node or ["localhost"] * 3

    if not args.verbose:
        LOG.remove()
        LOG.add(
            sys.stdout,
            format="<green>[{time:YYYY-MM-DD HH:mm:ss.SSS}]</green> {message}",
        )
        LOG.disable("infra")
        LOG.disable("ccf")

    LOG.info(f"Starting {len(hosts)} CCF nodes...")
    if args.enclave_type == "virtual":
        LOG.warning("Virtual mode enabled")

    with infra.network.network(hosts=hosts,
                               binary_directory=args.binary_dir,
                               dbg_nodes=args.debug_nodes) as network:
        if args.recover:
            args.label = args.label + "_recover"
            LOG.info("Recovering network from:")
            LOG.info(f" - Ledger: {args.ledger_dir}")
            LOG.info(
                f" - Defunct network public encryption key: {args.network_enc_pubk}"
            )
            LOG.info(f" - Common directory: {args.common_dir}")
            network.start_in_recovery(args, args.ledger_dir, args.common_dir)
            network.recover(args, args.network_enc_pubk)
        else:
            network.start_and_join(args)

        primary, backups = network.find_nodes()
        LOG.info("Started CCF network with the following nodes:")
        LOG.info("  Node [{:2d}] = {}:{}".format(primary.node_id,
                                                 primary.pubhost,
                                                 primary.rpc_port))
        for b in backups:
            LOG.info("  Node [{:2d}] = {}:{}".format(b.node_id, b.pubhost,
                                                     b.rpc_port))

        # Dump primary info to file for tutorial testing
        if args.network_info_file is not None:
            dump_network_info(args.network_info_file, network, primary)

        LOG.info(
            f"You can now issue business transactions to the {args.package} application."
        )
        LOG.info(
            f"Keys and certificates have been copied to the common folder: {network.common_dir}"
        )
        LOG.info(
            "See https://microsoft.github.io/CCF/users/issue_commands.html for more information."
        )
        LOG.warning("Press Ctrl+C to shutdown the network.")

        try:
            while True:
                time.sleep(60)

        except KeyboardInterrupt:
            LOG.info("Stopping all CCF nodes...")

    LOG.info("All CCF nodes stopped.")
Exemplo n.º 13
0
def run(get_command, args):
    if args.fixed_seed:
        seed(getpass.getuser())

    hosts = args.nodes
    if not hosts:
        hosts = ["local://localhost"] * minimum_number_of_local_nodes(args)

    args.initial_user_count = 3
    args.sig_ms_interval = 1000  # Set to cchost default value

    LOG.info("Starting nodes on {}".format(hosts))

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        primary, backups = network.find_nodes()

        command_args = get_command_args(args, get_command)

        if args.use_jwt:
            jwt_issuer = infra.jwt_issuer.JwtIssuer("https://example.issuer")
            jwt_issuer.register(network)
            jwt = jwt_issuer.issue_jwt()
            command_args += ["--bearer-token", jwt]

        nodes_to_send_to = filter_nodes(primary, backups, args.send_tx_to)
        clients = []
        client_hosts = []
        if args.one_client_per_backup:
            if not backups:
                raise Exception(
                    "--one-client-per-backup was set but no backup was found")
            client_hosts = ["localhost"] * len(backups)
        else:
            if args.client_nodes:
                client_hosts.extend(args.client_nodes)

        if args.num_localhost_clients:
            client_hosts.extend(["localhost"] *
                                int(args.num_localhost_clients))

        if not client_hosts:
            client_hosts = ["localhost"]

        for client_id, client_host in enumerate(client_hosts):
            node = nodes_to_send_to[client_id % len(nodes_to_send_to)]
            remote_client = configure_remote_client(args, client_id,
                                                    client_host, node,
                                                    command_args)
            clients.append(remote_client)

        if args.network_only:
            for remote_client in clients:
                LOG.info(
                    f"Client can be run with: {remote_client.remote.get_cmd()}"
                )
            while True:
                time.sleep(60)
        else:
            for remote_client in clients:
                remote_client.start()

            hard_stop_timeout = 90

            try:
                with cimetrics.upload.metrics(complete=False) as metrics:
                    tx_rates = infra.rates.TxRates(primary)
                    start_time = time.time()
                    while True:
                        stop_waiting = True
                        for i, remote_client in enumerate(clients):
                            done = remote_client.check_done()
                            # all the clients need to be done
                            LOG.info(
                                f"Client {i} has {'completed' if done else 'not completed'} running ({time.time() - start_time:.2f}s / {hard_stop_timeout}s)"
                            )
                            stop_waiting = stop_waiting and done
                        if stop_waiting:
                            break
                        if time.time() > start_time + hard_stop_timeout:
                            raise TimeoutError(
                                f"Client still running after {hard_stop_timeout}s"
                            )

                        time.sleep(5)

                    tx_rates.get_metrics()

                    for remote_client in clients:
                        perf_result = remote_client.get_result()
                        LOG.success(
                            f"{args.label}/{remote_client.name}: {perf_result}"
                        )

                        # TODO: Only results for first client are uploaded
                        # https://github.com/microsoft/CCF/issues/1046
                        if remote_client == clients[0]:
                            LOG.success(
                                f"Uploading results for {remote_client.name}")
                            metrics.put(args.label, perf_result)
                        else:
                            LOG.warning(
                                f"Skipping upload for {remote_client.name}")

                    primary, _ = network.find_primary()
                    with primary.client() as nc:
                        r = nc.get("/node/memory")
                        assert r.status_code == http.HTTPStatus.OK.value

                        results = r.body.json()
                        tx_rates.insert_metrics(**results)

                        # Construct name for heap metric, removing ^ suffix if present
                        heap_peak_metric = args.label
                        if heap_peak_metric.endswith("^"):
                            heap_peak_metric = heap_peak_metric[:-1]
                        heap_peak_metric += "_mem"

                        peak_value = results["peak_allocated_heap_size"]
                        metrics.put(heap_peak_metric, peak_value)

                    LOG.info(f"Rates:\n{tx_rates}")
                    tx_rates.save_results(args.metrics_file)

                    for remote_client in clients:
                        remote_client.stop()

            except Exception:
                LOG.error("Stopping clients due to exception")
                for remote_client in clients:
                    remote_client.stop()
                raise
Exemplo n.º 14
0
def run(args):
    hosts = ["localhost", "localhost"]

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        primary, _ = network.find_nodes()

        first_code_id = get_code_id(
            infra.path.build_lib_path(args.package, args.enclave_type))

        with primary.client() as uc:
            r = uc.get("/node/code")
            assert r.body.json() == {
                "versions": [{
                    "digest": first_code_id,
                    "status": "ACCEPTED"
                }],
            }, r.body

        LOG.info("Adding a new node")
        new_node = network.create_and_trust_node(args.package, "localhost",
                                                 args)
        assert new_node

        new_code_id = get_code_id(
            infra.path.build_lib_path(args.patched_file_name,
                                      args.enclave_type))

        LOG.info(f"Adding a node with unsupported code id {new_code_id}")
        code_not_found_exception = None
        try:
            network.create_and_add_pending_node(args.patched_file_name,
                                                "localhost",
                                                args,
                                                timeout=3)
        except infra.network.CodeIdNotFound as err:
            code_not_found_exception = err

        assert (
            code_not_found_exception is not None
        ), f"Adding a node with unsupported code id {new_code_id} should fail"

        # Slow quote verification means that any attempt to add a node may cause an election, so confirm primary after adding node
        primary, _ = network.find_primary()

        network.consortium.add_new_code(primary, new_code_id)

        with primary.client() as uc:
            r = uc.get("/node/code")
            versions = sorted(r.body.json()["versions"],
                              key=lambda x: x["digest"])
            expected = sorted(
                [
                    {
                        "digest": first_code_id,
                        "status": "ACCEPTED"
                    },
                    {
                        "digest": new_code_id,
                        "status": "ACCEPTED"
                    },
                ],
                key=lambda x: x["digest"],
            )
            assert versions == expected, versions

        new_nodes = set()
        old_nodes_count = len(network.nodes)
        new_nodes_count = old_nodes_count + 1

        LOG.info(
            f"Adding more new nodes ({new_nodes_count}) than originally existed ({old_nodes_count})"
        )
        for _ in range(0, new_nodes_count):
            new_node = network.create_and_trust_node(args.patched_file_name,
                                                     "localhost", args)
            assert new_node
            new_nodes.add(new_node)

        LOG.info("Stopping all original nodes")
        old_nodes = set(network.nodes).difference(new_nodes)
        for node in old_nodes:
            LOG.debug(f"Stopping old node {node.node_id}")
            node.stop()

        new_primary, _ = network.wait_for_new_primary(primary.node_id)
        LOG.info(f"New_primary is {new_primary.node_id}")

        LOG.info("Adding another node to the network")
        new_node = network.create_and_trust_node(args.patched_file_name,
                                                 "localhost", args)
        assert new_node
        network.wait_for_node_commit_sync(args.consensus)

        LOG.info("Remove first code id")
        network.consortium.retire_code(new_node, first_code_id)

        with new_node.client() as uc:
            r = uc.get("/node/code")
            versions = sorted(r.body.json()["versions"],
                              key=lambda x: x["digest"])
            expected = sorted(
                [
                    {
                        "digest": first_code_id,
                        "status": "RETIRED"
                    },
                    {
                        "digest": new_code_id,
                        "status": "ACCEPTED"
                    },
                ],
                key=lambda x: x["digest"],
            )
            assert versions == expected, versions

        LOG.info(f"Adding a node with retired code id {first_code_id}")
        code_not_found_exception = None
        try:
            network.create_and_add_pending_node(args.package,
                                                "localhost",
                                                args,
                                                timeout=3)
        except infra.network.CodeIdRetired as err:
            code_not_found_exception = err

        assert (
            code_not_found_exception is not None
        ), f"Adding a node with unsupported code id {new_code_id} should fail"

        LOG.info("Adding another node with the new code to the network")
        new_node = network.create_and_trust_node(args.patched_file_name,
                                                 "localhost", args)
        assert new_node
        network.wait_for_node_commit_sync(args.consensus)
Exemplo n.º 15
0
def run(args):
    # This is deliberately 5, because the rest of the test depends on this
    # to grow a prefix and allow just enough nodes to resume to reach the
    # desired election result. Conversion to a general f isn't trivial.
    hosts = ["local://localhost"] * 5

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        primary, backups = network.find_nodes()

        # Suspend three of the backups to prevent commit
        backups[1].suspend()
        backups[2].suspend()
        backups[3].stop()

        committable_txs = []
        # Run some transactions that can't be committed now
        with primary.client("user0") as uc:
            for i in range(3):
                committable_txs.append(
                    uc.post("/app/log/private", {
                        "id": 100 + i,
                        "msg": "Hello world"
                    }))

        last_tx = committable_txs[-1]
        sig_view, sig_seqno = last_tx.view, last_tx.seqno + 1
        with backups[0].client() as bc:
            wait_for_pending(bc, sig_view, sig_seqno)

        # Suspend the final backup and run some transactions which only the partitioned
        # primary hears, which should be discarded by the new primary
        # NB: We can't guarantee that these will be discarded. Since we can't control
        # what order the queued actions occur in after resuming, they may be appended
        # before an election is called. They key assertion is that this primary is able
        # to rejoin the network whatever happens, even when (in the usual case) they
        # hold a suffix which has been discarded.
        backups[0].suspend()
        post_partition_txs = []
        with primary.client("user0") as uc:
            for i in range(3):
                post_partition_txs.append(
                    uc.post("/app/log/private", {
                        "id": 100 + i,
                        "msg": "Hello world"
                    }))

        # Sleep long enough that this primary should be instantly replaced when nodes wake
        sleep_time = 2 * args.raft_election_timeout_ms / 1000
        LOG.info(f"Sleeping {sleep_time}s")
        time.sleep(sleep_time)

        # Suspend the primary, resume other backups
        primary.suspend()
        backups[0].resume()
        backups[1].resume()
        backups[2].resume()
        new_primary, _ = network.wait_for_new_primary(primary,
                                                      timeout_multiplier=6)

        with new_primary.client("user0") as uc:
            # Check that uncommitted but committable suffix is preserved
            check_commit = infra.checker.Checker(uc)
            for tx in committable_txs:
                check_commit(tx)

        # Check that new transactions can be committed
        with new_primary.client("user0") as uc:
            for i in range(3):
                r = uc.post("/app/log/private", {
                    "id": 100 + i,
                    "msg": "Hello world"
                })
                assert r.status_code == 200
                uc.wait_for_commit(r)

        # Resume original primary, check that they rejoin correctly, including new transactions
        primary.resume()
        network.wait_for_node_commit_sync()
Exemplo n.º 16
0
def run(args):
    hosts = ["localhost"] * (4 if args.consensus == "bft" else 2)
    os.makedirs(args.schema_dir, exist_ok=True)

    changed_files = []
    methods_with_schema = set()
    methods_without_schema = set()
    old_schema = set(
        os.path.join(dir_path, filename)
        for dir_path, _, filenames in os.walk(args.schema_dir)
        for filename in filenames)

    documents_valid = True

    all_methods = []

    def fetch_schema(client, prefix):
        api_response = client.get(f"/{prefix}/api")
        check(api_response,
              error=lambda status, msg: status == http.HTTPStatus.OK.value)

        response_body = api_response.body.json()
        paths = response_body["paths"]
        all_methods.extend(paths.keys())

        # Fetch the schema of each method
        for method, _ in paths.items():
            schema_found = False
            expected_method_prefix = "/"
            if method.startswith(expected_method_prefix):
                method = method[len(expected_method_prefix):]
            schema_response = client.get(
                f'/{prefix}/api/schema?method="{method}"')
            check(
                schema_response,
                error=lambda status, msg: status == http.HTTPStatus.OK.value,
            )

            if schema_response.body:
                for verb, schema_element in schema_response.body.json().items(
                ):
                    for schema_type in ["params", "result"]:
                        element_name = "{}_schema".format(schema_type)
                        element = schema_element[element_name]
                        target_file = build_schema_file_path(
                            args.schema_dir, verb, method, schema_type)
                        if element is not None and len(element) != 0:
                            try:
                                old_schema.remove(target_file)
                            except KeyError:
                                pass
                            schema_found = True
                            formatted_schema = json.dumps(element, indent=2)
                            os.makedirs(os.path.dirname(target_file),
                                        exist_ok=True)
                            with open(target_file, "a+") as f:
                                f.seek(0)
                                previous = f.read()
                                if previous != formatted_schema:
                                    LOG.debug("Writing schema to {}".format(
                                        target_file))
                                    f.truncate(0)
                                    f.seek(0)
                                    f.write(formatted_schema)
                                    changed_files.append(target_file)
                                else:
                                    LOG.debug("Schema matches in {}".format(
                                        target_file))

            if schema_found:
                methods_with_schema.add(method)
            else:
                methods_without_schema.add(method)

        formatted_schema = json.dumps(response_body, indent=2)
        openapi_target_file = os.path.join(args.schema_dir,
                                           f"{prefix}_openapi.json")

        try:
            old_schema.remove(openapi_target_file)
        except KeyError:
            pass

        with open(openapi_target_file, "a+") as f:
            f.seek(0)
            previous = f.read()
            if previous != formatted_schema:
                LOG.debug("Writing schema to {}".format(openapi_target_file))
                f.truncate(0)
                f.seek(0)
                f.write(formatted_schema)
                changed_files.append(openapi_target_file)
            else:
                LOG.debug("Schema matches in {}".format(openapi_target_file))

        try:
            openapi_spec_validator.validate_spec(response_body)
        except Exception as e:
            LOG.error(f"Validation of {prefix} schema failed")
            LOG.error(e)
            return False

        return True

    with infra.network.network(hosts, args.binary_dir, args.debug_nodes,
                               args.perf_nodes) as network:
        network.start_and_join(args)
        primary, _ = network.find_primary()

        check = infra.checker.Checker()

        with primary.client("user0") as user_client:
            LOG.info("user frontend")
            if not fetch_schema(user_client, "app"):
                documents_valid = False

        with primary.client() as node_client:
            LOG.info("node frontend")
            if not fetch_schema(node_client, "node"):
                documents_valid = False

        with primary.client("member0") as member_client:
            LOG.info("member frontend")
            if not fetch_schema(member_client, "gov"):
                documents_valid = False

    made_changes = False

    if len(old_schema) > 0:
        LOG.error(
            "Removing old files which are no longer reported by the service:")
        for f in old_schema:
            LOG.error(" " + f)
            os.remove(f)
            f_dir = os.path.dirname(f)
            # Remove empty directories too
            while not os.listdir(f_dir):
                os.rmdir(f_dir)
                f_dir = os.path.dirname(f_dir)
        made_changes = True

    if len(changed_files) > 0:
        LOG.error("Made changes to the following schema files:")
        for f in changed_files:
            LOG.error(" " + f)
        made_changes = True

    if args.list_all:
        LOG.info("Discovered methods:")
        for method in sorted(set(all_methods)):
            LOG.info(f"  {method}")

    if made_changes or not documents_valid:
        sys.exit(1)
Exemplo n.º 17
0
def run(args):
    hosts = ["localhost"]

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        primary, others = network.find_nodes()

        regulators = [
            AppUser(network, "FCA", "GB"),
            AppUser(network, "SEC", "FR")
        ]
        banks = [
            AppUser(network, f"bank{country}", country)
            for country in ("US", "GB", "GR", "FR")
        ]

        # Give regulators permissions to register regulators and banks
        for regulator in regulators:
            proposal_body, _ = ccf.proposal_generator.set_user_data(
                regulator.ccf_id,
                {
                    "proposals": {
                        "REGISTER_REGULATORS": True,
                        "REGISTER_BANKS": True
                    }
                },
            )
            (
                proposal_result,
                error,
            ) = network.consortium.get_any_active_member().propose(
                primary, proposal_body)
            network.consortium.vote_using_majority(primary,
                                                   proposal_result["id"])

        if args.run_poll:
            with open("revealed.log", "a+") as stdout:
                subprocess.Popen(
                    [
                        "python3",
                        f"{os.path.realpath(os.path.dirname(__file__))}/poll.py",
                        f"--host={primary.host}",
                        f"--port={primary.rpc_port}",
                        f"--regulator-name={regulators[0].name}",
                        f"--bank-name={banks[0].name}",
                    ],
                    stdout=stdout,
                )
        else:
            LOG.warning("")
            LOG.warning(
                "================= Network setup complete, you can run the below command to poll the service. "
                + "Press enter to continue =================")
            LOG.warning("")
            LOG.warning(
                f"python3 {os.path.realpath(os.path.dirname(__file__))}/poll.py --host={primary.host} --port={primary.rpc_port}"
            )
            LOG.warning("")
            input("")

        data = []
        with open(args.lua_script, "r") as f:
            data = f.readlines()

        scripts = {}
        scripts["FCA"] = "".join(data)
        scripts[
            "SEC"] = "if tonumber(amt) > 15000 then return true else return false end"

        for regulator in regulators:
            with primary.user_client(format="msgpack",
                                     user_id=regulator.name) as c:
                check = infra.checker.Checker()

                check(
                    c.post(
                        "REG_register",
                        {
                            "regulator_id": regulator.ccf_id,
                            "country": regulator.country,
                            "script": scripts[regulator.name],
                            "name": regulator.name,
                        },
                    ),
                    result=regulator.ccf_id,
                )
                check(
                    c.post("REG_get", {"id": regulator.ccf_id}),
                    result=[
                        regulator.country,
                        scripts[regulator.name],
                        regulator.name,
                    ],
                )

            LOG.debug(f"User {regulator} successfully registered as regulator")

        with primary.user_client(format="msgpack",
                                 user_id=regulators[0].name) as c:
            for bank in banks:
                check = infra.checker.Checker()

                check(
                    c.post("BK_register", {
                        "bank_id": bank.ccf_id,
                        "country": bank.country
                    }),
                    result=bank.ccf_id,
                )
                check(c.post("BK_get", {"id": bank.ccf_id}),
                      result=bank.country)
                LOG.debug(f"User {bank} successfully registered as bank")

        LOG.success(
            f"{len(regulators)} regulator and {len(banks)} bank(s) successfully setup"
        )

        tx_id = 0  # Tracks how many transactions have been issued
        LOG.info(
            f"Loading scenario file as bank {banks[0].ccf_id} ({banks[0].name})"
        )

        with primary.user_client(format="msgpack",
                                 user_id=regulators[0].name) as reg_c:
            with primary.user_client(format="msgpack",
                                     user_id=banks[0].name) as c:
                with open(args.datafile, newline="") as f:
                    start_time = perf_counter()
                    datafile = csv.DictReader(f)
                    for row in datafile:
                        json_tx = {
                            "src":
                            row["origin"],
                            "dst":
                            row["destination"],
                            "amt":
                            row["amount"],
                            "type":
                            row["type"],
                            "timestamp":
                            strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()),
                            "src_country":
                            row["src_country"],
                            "dst_country":
                            row["dst_country"],
                        }

                        check(c.post("TX_record", json_tx), result=tx_id)
                        print(json.dumps(json_tx))
                        tx_id += 1

                        if tx_id % 1000 == 0:
                            elapsed_time = perf_counter() - start_time
                            LOG.info(
                                f"1000 transactions took {elapsed_time}: tx_id: {tx_id}"
                            )
                            start_time = perf_counter()
                LOG.success("Scenario file successfully loaded")

        LOG.warning("Data loading completed, press Enter to shutdown...")
        input()
Exemplo n.º 18
0
def run(get_command, args):
    if args.fixed_seed:
        seed(getpass.getuser())

    hosts = args.nodes
    if not hosts:
        hosts = ["localhost"] * minimum_number_of_local_nodes(args)

    LOG.info("Starting nodes on {}".format(hosts))

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        primary, backups = network.find_nodes()

        command_args = get_command_args(args, get_command)

        nodes_to_send_to = filter_nodes(primary, backups, args.send_tx_to)
        clients = []
        client_hosts = []
        if args.one_client_per_backup:
            if not backups:
                raise Exception(
                    "--one-client-per-backup was set but no backup was found")
            client_hosts = ["localhost"] * len(backups)
        else:
            if args.client_nodes:
                client_hosts.extend(args.client_nodes)

        if args.num_localhost_clients:
            client_hosts.extend(["localhost"] *
                                int(args.num_localhost_clients))

        if not client_hosts:
            client_hosts = ["localhost"]

        for client_id, client_host in enumerate(client_hosts):
            node = nodes_to_send_to[client_id % len(nodes_to_send_to)]
            remote_client = configure_remote_client(args, client_id,
                                                    client_host, node,
                                                    command_args)
            clients.append(remote_client)

        if args.network_only:
            for remote_client in clients:
                LOG.info(
                    f"Client can be run with: {remote_client.remote.get_cmd()}"
                )
            while True:
                time.sleep(60)
        else:
            for remote_client in clients:
                remote_client.start()

            hard_stop_timeout = 90

            try:
                with cimetrics.upload.metrics(complete=False) as metrics:
                    tx_rates = infra.rates.TxRates(primary)
                    start_time = time.time()
                    while True:
                        stop_waiting = True
                        for i, remote_client in enumerate(clients):
                            done = remote_client.check_done()
                            # all the clients need to be done
                            LOG.info(
                                f"Client {i} has {'completed' if done else 'not completed'} running ({time.time() - start_time:.2f}s / {hard_stop_timeout}s)"
                            )
                            stop_waiting = stop_waiting and done
                        if stop_waiting:
                            break
                        if time.time() > start_time + hard_stop_timeout:
                            raise TimeoutError(
                                f"Client still running after {hard_stop_timeout}s"
                            )

                        time.sleep(5)

                    tx_rates.get_metrics()

                    for remote_client in clients:
                        perf_result = remote_client.get_result()
                        LOG.success(
                            f"{args.label}/{remote_client.name}: {perf_result}"
                        )

                        # TODO: Only results for first client are uploaded
                        # https://github.com/microsoft/CCF/issues/1046
                        if remote_client == clients[0]:
                            LOG.success(
                                f"Uploading results for {remote_client.name}")
                            metrics.put(args.label, perf_result)
                        else:
                            LOG.warning(
                                f"Skipping upload for {remote_client.name}")

                    LOG.info(f"Rates:\n{tx_rates}")
                    tx_rates.save_results(args.metrics_file)

                    for remote_client in clients:
                        remote_client.stop()

            except Exception:
                LOG.error("Stopping clients due to exception")
                for remote_client in clients:
                    remote_client.stop()
                raise
Exemplo n.º 19
0
def run(args):
    with infra.network.network(
        args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        network.start_and_join(args)
        primary, _ = network.find_primary()

        network = test_missing_signature_header(network, args)
        network = test_corrupted_signature(network, args)

        LOG.info("Original members can ACK")
        network.consortium.get_any_active_member().ack(primary)

        LOG.info("Network cannot be opened twice")
        try:
            network.consortium.open_network(primary)
        except infra.proposal.ProposalNotAccepted as e:
            assert e.proposal.state == infra.proposal.ProposalState.Failed

        LOG.info("Proposal to add a new member (with different curve)")
        (
            new_member_proposal,
            new_member,
            careful_vote,
        ) = network.consortium.generate_and_propose_new_member(
            remote_node=primary,
            curve=infra.network.ParticipantsCurve(args.participants_curve).next(),
        )

        LOG.info("Check proposal has been recorded in open state")
        proposals = network.consortium.get_proposals(primary)
        proposal_entry = next(
            (p for p in proposals if p.proposal_id == new_member_proposal.proposal_id),
            None,
        )
        assert proposal_entry
        assert proposal_entry.state == ProposalState.Open

        LOG.info("Rest of consortium accept the proposal")
        network.consortium.vote_using_majority(
            primary, new_member_proposal, careful_vote
        )
        assert new_member_proposal.state == ProposalState.Accepted

        # Manually add new member to consortium
        network.consortium.members.append(new_member)

        LOG.debug(
            "Further vote requests fail as the proposal has already been accepted"
        )
        params_error = http.HTTPStatus.BAD_REQUEST.value
        assert (
            network.consortium.get_member_by_id(0)
            .vote(primary, new_member_proposal, careful_vote)
            .status_code
            == params_error
        )
        assert (
            network.consortium.get_member_by_id(1)
            .vote(primary, new_member_proposal, careful_vote)
            .status_code
            == params_error
        )

        LOG.debug("Accepted proposal cannot be withdrawn")
        response = network.consortium.get_member_by_id(
            new_member_proposal.proposer_id
        ).withdraw(primary, new_member_proposal)
        assert response.status_code == params_error

        LOG.info("New non-active member should get insufficient rights response")
        try:
            proposal_trust_0, careful_vote = ccf.proposal_generator.trust_node(0)
            new_member.propose(primary, proposal_trust_0)
            assert (
                False
            ), "New non-active member should get insufficient rights response"
        except infra.proposal.ProposalNotCreated as e:
            assert e.response.status_code == http.HTTPStatus.FORBIDDEN.value

        LOG.debug("New member ACK")
        new_member.ack(primary)

        LOG.info("New member is now active and send an accept node proposal")
        trust_node_proposal_0 = new_member.propose(primary, proposal_trust_0)
        trust_node_proposal_0.vote_for = careful_vote

        LOG.debug("Members vote to accept the accept node proposal")
        network.consortium.vote_using_majority(
            primary, trust_node_proposal_0, careful_vote
        )
        assert trust_node_proposal_0.state == infra.proposal.ProposalState.Accepted

        LOG.info("New member makes a new proposal")
        proposal_trust_1, careful_vote = ccf.proposal_generator.trust_node(1)
        trust_node_proposal = new_member.propose(primary, proposal_trust_1)

        LOG.debug("Other members (non proposer) are unable to withdraw new proposal")
        response = network.consortium.get_member_by_id(1).withdraw(
            primary, trust_node_proposal
        )
        assert response.status_code == http.HTTPStatus.FORBIDDEN.value

        LOG.debug("Proposer withdraws their proposal")
        response = new_member.withdraw(primary, trust_node_proposal)
        assert response.status_code == http.HTTPStatus.OK.value
        assert trust_node_proposal.state == infra.proposal.ProposalState.Withdrawn

        proposals = network.consortium.get_proposals(primary)
        proposal_entry = next(
            (p for p in proposals if p.proposal_id == trust_node_proposal.proposal_id),
            None,
        )
        assert proposal_entry
        assert proposal_entry.state == ProposalState.Withdrawn

        LOG.debug("Further withdraw proposals fail")
        response = new_member.withdraw(primary, trust_node_proposal)
        assert response.status_code == params_error

        LOG.debug("Further votes fail")
        response = new_member.vote(primary, trust_node_proposal, careful_vote)
        assert response.status_code == params_error
Exemplo n.º 20
0
def run(args):
    with infra.network.network(
        args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        network.start_and_join(args)
        network = test_cert_store(network, args)
Exemplo n.º 21
0
def run(args):
    hosts = ["localhost", "localhost", "localhost"]

    LOG.info(f"setting seed to {args.seed}")
    random.seed(args.seed)
    txs = app.LoggingTxs()

    with infra.network.network(
        hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs
    ) as network:
        network.start_and_join(args)
        original_nodes = network.get_joined_nodes()
        view_info = {}

        suspend.update_view_info(network, view_info)
        app.test_run_txs(network=network, args=args, num_txs=TOTAL_REQUESTS)
        suspend.update_view_info(network, view_info)

        nodes_to_kill = [network.find_any_backup()]
        nodes_to_keep = [n for n in original_nodes if n not in nodes_to_kill]

        # check that a new node can catch up after all the requests
        late_joiner = network.create_and_trust_node(args.package, "localhost", args)
        nodes_to_keep.append(late_joiner)

        # some requests to be processed while the late joiner catches up
        # (no strict checking that these requests are actually being processed simultaneously with the node catchup)
        app.test_run_txs(
            network=network,
            args=args,
            num_txs=int(TOTAL_REQUESTS / 2),
            nodes=original_nodes,  # doesn't contain late joiner
            verify=False,  # will try to verify for late joiner and it might not be ready yet
        )

        suspend.wait_for_late_joiner(original_nodes[0], late_joiner)

        # kill the old node(s) and ensure we are still making progress
        for backup_to_retire in nodes_to_kill:
            LOG.success(f"Stopping node {backup_to_retire.node_id}")
            backup_to_retire.stop()

        # check nodes are ok after we killed one off
        app.test_run_txs(
            network=network,
            args=args,
            nodes=nodes_to_keep,
            num_txs=len(nodes_to_keep),
            timeout=30,
            ignore_failures=True,
            # in the event of an early view change due to the late joiner this might
            # take longer than usual to complete and we don't want the test to break here
        )

        suspend.test_suspend_nodes(network, args, nodes_to_keep)

        # run txs while nodes get suspended
        app.test_run_txs(
            network=network,
            args=args,
            num_txs=4 * TOTAL_REQUESTS,
            timeout=30,
            ignore_failures=True,
            # in the event of an early view change due to the late joiner this might
            # take longer than usual to complete and we don't want the test to break here
        )

        suspend.update_view_info(network, view_info)

        # check nodes have resumed normal execution before shutting down
        app.test_run_txs(network=network, args=args, num_txs=len(nodes_to_keep))

        # we have asserted that all nodes are caught up
        # assert that view changes actually did occur
        assert len(view_info) > 1

        LOG.success("----------- views and primaries recorded -----------")
        for view, primary in view_info.items():
            LOG.success(f"view {view} - primary {primary}")
Exemplo n.º 22
0
def run(get_command, args):
    if args.fixed_seed:
        seed(getpass.getuser())

    hosts = args.nodes
    if not hosts:
        hosts = ["local://localhost"] * minimum_number_of_local_nodes(args)

    args.initial_user_count = 3

    LOG.info("Starting nodes on {}".format(hosts))

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        network.start_and_join(args)
        primary, backups = network.find_nodes()

        command_args = get_command_args(args, get_command)

        if args.use_jwt:
            jwt_key_priv_pem, _ = infra.crypto.generate_rsa_keypair(2048)
            jwt_cert_pem = infra.crypto.generate_cert(jwt_key_priv_pem)
            jwt_kid = "my_key_id"
            jwt_issuer = "https://example.issuer"
            # Add JWT issuer
            with tempfile.NamedTemporaryFile(prefix="ccf",
                                             mode="w+") as metadata_fp:
                jwt_cert_der = infra.crypto.cert_pem_to_der(jwt_cert_pem)
                der_b64 = base64.b64encode(jwt_cert_der).decode("ascii")
                data = {
                    "issuer": jwt_issuer,
                    "jwks": {
                        "keys": [{
                            "kty": "RSA",
                            "kid": jwt_kid,
                            "x5c": [der_b64]
                        }]
                    },
                }
                json.dump(data, metadata_fp)
                metadata_fp.flush()
                network.consortium.set_jwt_issuer(primary, metadata_fp.name)
            jwt = infra.crypto.create_jwt({}, jwt_key_priv_pem, jwt_kid)

            command_args += ["--bearer-token", jwt]

        nodes_to_send_to = filter_nodes(primary, backups, args.send_tx_to)
        clients = []
        client_hosts = []
        if args.one_client_per_backup:
            if not backups:
                raise Exception(
                    "--one-client-per-backup was set but no backup was found")
            client_hosts = ["localhost"] * len(backups)
        else:
            if args.client_nodes:
                client_hosts.extend(args.client_nodes)

        if args.num_localhost_clients:
            client_hosts.extend(["localhost"] *
                                int(args.num_localhost_clients))

        if not client_hosts:
            client_hosts = ["localhost"]

        for client_id, client_host in enumerate(client_hosts):
            node = nodes_to_send_to[client_id % len(nodes_to_send_to)]
            remote_client = configure_remote_client(args, client_id,
                                                    client_host, node,
                                                    command_args)
            clients.append(remote_client)

        if args.network_only:
            for remote_client in clients:
                LOG.info(
                    f"Client can be run with: {remote_client.remote.get_cmd()}"
                )
            while True:
                time.sleep(60)
        else:
            for remote_client in clients:
                remote_client.start()

            hard_stop_timeout = 90

            try:
                with cimetrics.upload.metrics(complete=False) as metrics:
                    tx_rates = infra.rates.TxRates(primary)
                    start_time = time.time()
                    while True:
                        stop_waiting = True
                        for i, remote_client in enumerate(clients):
                            done = remote_client.check_done()
                            # all the clients need to be done
                            LOG.info(
                                f"Client {i} has {'completed' if done else 'not completed'} running ({time.time() - start_time:.2f}s / {hard_stop_timeout}s)"
                            )
                            stop_waiting = stop_waiting and done
                        if stop_waiting:
                            break
                        if time.time() > start_time + hard_stop_timeout:
                            raise TimeoutError(
                                f"Client still running after {hard_stop_timeout}s"
                            )

                        time.sleep(5)

                    tx_rates.get_metrics()

                    for remote_client in clients:
                        perf_result = remote_client.get_result()
                        LOG.success(
                            f"{args.label}/{remote_client.name}: {perf_result}"
                        )

                        # TODO: Only results for first client are uploaded
                        # https://github.com/microsoft/CCF/issues/1046
                        if remote_client == clients[0]:
                            LOG.success(
                                f"Uploading results for {remote_client.name}")
                            metrics.put(args.label, perf_result)
                        else:
                            LOG.warning(
                                f"Skipping upload for {remote_client.name}")

                    primary, _ = network.find_primary()
                    with primary.client() as nc:
                        r = nc.get("/node/memory")
                        assert r.status_code == http.HTTPStatus.OK.value

                        results = r.body.json()
                        tx_rates.insert_metrics(**results)

                        # Construct name for heap metric, removing ^ suffix if present
                        heap_peak_metric = args.label
                        if heap_peak_metric.endswith("^"):
                            heap_peak_metric = heap_peak_metric[:-1]
                        heap_peak_metric += "_mem"

                        peak_value = results["peak_allocated_heap_size"]
                        metrics.put(heap_peak_metric, peak_value)

                    LOG.info(f"Rates:\n{tx_rates}")
                    tx_rates.save_results(args.metrics_file)

                    for remote_client in clients:
                        remote_client.stop()

            except Exception:
                LOG.error("Stopping clients due to exception")
                for remote_client in clients:
                    remote_client.stop()
                raise
Exemplo n.º 23
0
def run(args):
    hosts = ["localhost"] * (4 if args.consensus == "pbft" else 1)

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        check = infra.checker.Checker()
        network.start_and_join(args)
        primary, _ = network.find_nodes()

        primary_pid = primary.remote.remote.proc.pid
        num_fds = psutil.Process(primary_pid).num_fds()
        max_fds = num_fds + 150
        LOG.success(f"{primary_pid} has {num_fds} open file descriptors")

        resource.prlimit(primary_pid, resource.RLIMIT_NOFILE,
                         (max_fds, max_fds))
        LOG.success(f"set max fds to {max_fds} on {primary_pid}")

        nb_conn = (max_fds - num_fds) * 2
        clients = []

        with contextlib.ExitStack() as es:
            LOG.success(f"Creating {nb_conn} clients")
            for i in range(nb_conn):
                try:
                    clients.append(es.enter_context(primary.client("user0")))
                    LOG.info(f"Created client {i}")
                except OSError:
                    LOG.error(f"Failed to create client {i}")

            # Creating clients may not actually create connections/fds. Send messages until we run out of fds
            for i, c in enumerate(clients):
                if psutil.Process(primary_pid).num_fds() >= max_fds:
                    LOG.warning(f"Reached fd limit at client {i}")
                    break
                LOG.info(f"Sending as client {i}")
                check(c.post("/app/log/private", {
                    "id": 42,
                    "msg": "foo"
                }),
                      result=True)

            try:
                clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"})
            except Exception:
                pass
            else:
                assert False, "Expected error due to fd limit"

            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.success(
                f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
            LOG.info("Disconnecting clients")
            clients = []

        time.sleep(1)
        num_fds = psutil.Process(primary_pid).num_fds()
        LOG.success(
            f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")

        with contextlib.ExitStack() as es:
            to_create = max_fds - num_fds + 1
            LOG.success(f"Creating {to_create} clients")
            for i in range(to_create):
                clients.append(es.enter_context(primary.client("user0")))
                LOG.info(f"Created client {i}")

            for i, c in enumerate(clients):
                if psutil.Process(primary_pid).num_fds() >= max_fds:
                    LOG.warning(f"Reached fd limit at client {i}")
                    break
                LOG.info(f"Sending as client {i}")
                check(c.post("/app/log/private", {
                    "id": 42,
                    "msg": "foo"
                }),
                      result=True)

            try:
                clients[-1].post("/app/log/private", {"id": 42, "msg": "foo"})
            except Exception:
                pass
            else:
                assert False, "Expected error due to fd limit"

            num_fds = psutil.Process(primary_pid).num_fds()
            LOG.success(
                f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
            LOG.info("Disconnecting clients")
            clients = []

        time.sleep(1)
        num_fds = psutil.Process(primary_pid).num_fds()
        LOG.success(
            f"{primary_pid} has {num_fds}/{max_fds} open file descriptors")
Exemplo n.º 24
0
def run(args):
    # SNIPPET_START: parsing
    with open(args.scenario) as f:
        scenario = json.load(f)

    hosts = scenario.get("hosts", ["localhost", "localhost"])
    if args.consensus == "pbft":
        hosts = ["localhost"] * 3
    args.package = scenario["package"]
    # SNIPPET_END: parsing

    scenario_dir = os.path.dirname(args.scenario)

    # SNIPPET_START: create_network
    with infra.network.network(hosts, args.binary_dir, args.debug_nodes,
                               args.perf_nodes) as network:
        network.start_and_join(args)
        # SNIPPET_END: create_network

        primary, backups = network.find_nodes()

        with primary.client() as mc:

            check = infra.checker.Checker()
            check_commit = infra.checker.Checker(mc)

            for connection in scenario["connections"]:
                with (primary.client("user0")
                      if not connection.get("on_backup") else
                      random.choice(backups).client("user0")) as client:
                    txs = connection.get("transactions", [])

                    for include_file in connection.get("include", []):
                        with open(os.path.join(scenario_dir,
                                               include_file)) as f:
                            txs += json.load(f)

                    for tx in txs:
                        r = client.call(
                            tx["method"],
                            body=tx["body"],
                            http_verb=tx.get("verb", "POST"),
                        )

                        if tx.get("expected_error") is not None:
                            check(
                                r,
                                error=lambda status, msg, transaction=tx:
                                status
                                # pylint: disable=no-member
                                == http.HTTPStatus(
                                    transaction.get("expected_error")).value,
                            )

                        elif tx.get("expected_result") is not None:
                            check_commit(r, result=tx.get("expected_result"))

                        else:
                            check_commit(r, result=lambda res: res is not None)

                network.wait_for_node_commit_sync(args.consensus)

    if args.network_only:
        LOG.info("Keeping network alive with the following nodes:")
        LOG.info("  Primary = {}:{}".format(primary.pubhost, primary.rpc_port))
        for i, f in enumerate(backups):
            LOG.info("  Backup[{}] = {}:{}".format(i, f.pubhost, f.rpc_port))

        input("Press Enter to shutdown...")
Exemplo n.º 25
0
def run(args, additional_attack_args):
    # Test that vegeta is available
    subprocess.run([VEGETA_BIN, "-version"], capture_output=True, check=True)

    with infra.network.network(
        args.nodes,
        args.binary_dir,
        args.debug_nodes,
        args.perf_nodes,
        pdb=args.pdb,
    ) as network:
        network.start_and_join(args)

        primary, _ = network.find_primary()
        primary_hostname = f"{primary.pubhost}:{primary.pubport}"

        vegeta_targets = "vegeta_targets"
        with open(vegeta_targets, "w") as f:
            for i in range(10):
                TargetGenerator.write_vegeta_target_line(
                    f,
                    primary_hostname,
                    "/app/log/private",
                    body={"id": i, "msg": f"Private message: {i}"},
                )

            for i in range(10):
                TargetGenerator.write_vegeta_target_line(
                    f, primary_hostname, f"/app/log/private?id={i}", method="GET"
                )

            for i in range(10):
                TargetGenerator.write_vegeta_target_line(
                    f,
                    primary_hostname,
                    "/app/log/public",
                    body={"id": i, "msg": f"Public message: {i}"},
                )

            for i in range(10):
                TargetGenerator.write_vegeta_target_line(
                    f, primary_hostname, f"/app/log/public?id={i}", method="GET"
                )

        attack_cmd = [VEGETA_BIN, "attack"]
        attack_cmd += ["--targets", vegeta_targets]
        attack_cmd += ["--format", "json"]
        attack_cmd += ["--duration", "10s"]
        sa = primary.session_auth("user0")
        attack_cmd += ["--cert", sa["session_auth"].cert]
        attack_cmd += ["--key", sa["session_auth"].key]
        attack_cmd += ["--root-certs", sa["ca"]]
        attack_cmd += additional_attack_args

        attack_cmd_s = " ".join(attack_cmd)
        LOG.warning(f"Starting: {attack_cmd_s}")
        vegeta_run = subprocess.Popen(attack_cmd, stdout=subprocess.PIPE)

        tee_split = subprocess.Popen(
            ["tee", "vegeta_results.bin"],
            stdin=vegeta_run.stdout,
            stdout=subprocess.PIPE,
        )

        report_cmd = [VEGETA_BIN, "report", "--every", "5s"]
        vegeta_report = subprocess.Popen(report_cmd, stdin=tee_split.stdout)

        # Start a second thread which will print the primary's memory stats at regular intervals
        shutdown_event = threading.Event()
        memory_thread = threading.Thread(
            target=print_memory_stats, args=(primary, shutdown_event)
        )
        memory_thread.start()

        LOG.info("Waiting for completion...")
        vegeta_report.communicate()

        LOG.info("Shutting down...")
        shutdown_event.set()
        memory_thread.join()

        LOG.success("Done!")
Exemplo n.º 26
0
def run(args):
    # Three nodes minimum to make sure that the raft network can still make progress
    # if one node stops
    hosts = ["localhost"] * (4 if args.consensus == "pbft" else 3)

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        check = infra.checker.Checker()

        network.start_and_join(args)
        current_view = None

        # Number of nodes F to stop until network cannot make progress
        nodes_to_stop = math.ceil(len(hosts) / 2)
        if args.consensus == "pbft":
            nodes_to_stop = math.ceil(len(hosts) / 3)

        for _ in range(nodes_to_stop):
            # Note that for the first iteration, the primary is known in advance anyway
            LOG.debug("Find freshly elected primary")
            # After a view change in pbft, finding the new primary takes longer
            primary, current_view = network.find_primary(
                timeout=(30 if args.consensus == "pbft" else 3))

            LOG.debug(
                "Commit new transactions, primary:{}, current_view:{}".format(
                    primary.node_id, current_view))
            with primary.client("user0") as c:
                res = c.post(
                    "/app/log/private",
                    {
                        "id":
                        current_view,
                        "msg":
                        "This log is committed in view {}".format(
                            current_view),
                    },
                )
                check(res, result=True)
                seqno = res.seqno

            LOG.debug("Waiting for transaction to be committed by all nodes")
            wait_for_seqno_to_commit(seqno, current_view,
                                     network.get_joined_nodes())

            test_kill_primary(network, args, find_new_primary=False)

        # More than F nodes have been stopped, trying to commit any message
        LOG.debug("No progress can be made as more than {} nodes have stopped".
                  format(nodes_to_stop))
        try:
            primary, _ = network.find_primary()
            assert False, "Primary should not be found"
        except infra.network.PrimaryNotFound:
            pass

        LOG.success(
            f"As expected, primary could not be found after election duration ({network.election_duration}s)."
        )
        LOG.success("Test ended successfully.")
Exemplo n.º 27
0
def run(args):
    os.makedirs(args.schema_dir, exist_ok=True)

    changed_files = []
    old_schema = set(
        os.path.join(dir_path, filename)
        for dir_path, _, filenames in os.walk(args.schema_dir)
        for filename in filenames)

    documents_valid = True
    all_methods = []

    def fetch_schema(client, prefix):
        api_response = client.get(f"/{prefix}/api")
        check(api_response,
              error=lambda status, msg: status == http.HTTPStatus.OK.value)

        response_body = api_response.body.json()
        paths = response_body["paths"]
        all_methods.extend(paths.keys())

        formatted_schema = json.dumps(response_body, indent=2)
        openapi_target_file = os.path.join(args.schema_dir,
                                           f"{prefix}_openapi.json")

        try:
            old_schema.remove(openapi_target_file)
        except KeyError:
            pass

        with open(openapi_target_file, "a+") as f:
            f.seek(0)
            previous = f.read()
            if previous != formatted_schema:
                LOG.debug("Writing schema to {}".format(openapi_target_file))
                f.truncate(0)
                f.seek(0)
                f.write(formatted_schema)
                changed_files.append(openapi_target_file)
            else:
                LOG.debug("Schema matches in {}".format(openapi_target_file))

        try:
            openapi_spec_validator.validate_spec(response_body)
        except Exception as e:
            LOG.error(f"Validation of {prefix} schema failed")
            LOG.error(e)
            return False

        return True

    with infra.network.network(args.nodes, args.binary_dir, args.debug_nodes,
                               args.perf_nodes) as network:
        network.start_and_join(args)
        primary, _ = network.find_primary()

        check = infra.checker.Checker()

        with primary.client("user0") as user_client:
            LOG.info("user frontend")
            if not fetch_schema(user_client, "app"):
                documents_valid = False

        with primary.client() as node_client:
            LOG.info("node frontend")
            if not fetch_schema(node_client, "node"):
                documents_valid = False

        with primary.client("member0") as member_client:
            LOG.info("member frontend")
            if not fetch_schema(member_client, "gov"):
                documents_valid = False

    made_changes = False

    if len(old_schema) > 0:
        LOG.error(
            "Removing old files which are no longer reported by the service:")
        for f in old_schema:
            LOG.error(" " + f)
            os.remove(f)
            f_dir = os.path.dirname(f)
            # Remove empty directories too
            while not os.listdir(f_dir):
                os.rmdir(f_dir)
                f_dir = os.path.dirname(f_dir)
        made_changes = True

    if len(changed_files) > 0:
        LOG.error("Made changes to the following schema files:")
        for f in changed_files:
            LOG.error(" " + f)
        made_changes = True

    if args.list_all:
        LOG.info("Discovered methods:")
        for method in sorted(set(all_methods)):
            LOG.info(f"  {method}")

    if made_changes or not documents_valid:
        sys.exit(1)
Exemplo n.º 28
0
def run(args):
    hosts = args.node or DEFAULT_NODES

    if not args.verbose:
        LOG.remove()
        LOG.add(
            sys.stdout,
            format="<green>[{time:HH:mm:ss.SSS}]</green> {message}",
        )
        LOG.disable("infra")
        LOG.disable("ccf")

    LOG.info(
        f"Starting {len(hosts)} CCF node{'s' if len(hosts) > 1 else ''}...")
    if args.enclave_type == "virtual":
        LOG.warning("Virtual mode enabled")

    with infra.network.network(
            hosts=hosts,
            binary_directory=args.binary_dir,
            library_directory=args.library_dir,
            dbg_nodes=args.debug_nodes,
    ) as network:
        if args.recover:
            args.label = args.label + "_recover"
            LOG.info("Recovering network from:")
            LOG.info(f" - Common directory: {args.common_dir}")
            LOG.info(f" - Ledger: {args.ledger_dir}")
            if args.snapshot_dir:
                LOG.info(f" - Snapshots: {args.snapshot_dir}")
            else:
                LOG.warning(
                    "No available snapshot to recover from. Entire transaction history will be replayed."
                )
            network.start_in_recovery(
                args,
                args.ledger_dir,
                snapshot_dir=args.snapshot_dir,
                common_dir=args.common_dir,
            )
            network.recover(args)
        else:
            network.start_and_join(args)

        primary, backups = network.find_nodes()
        max_len = len(str(len(backups)))

        # To be sure, confirm that the app frontend is open on each node
        for node in [primary, *backups]:
            with node.client("user0") as c:
                if args.verbose:
                    r = c.get("/app/commit")
                else:
                    r = c.get("/app/commit", log_capture=[])
                assert r.status_code == http.HTTPStatus.OK, r.status_code

        def pad_node_id(nid):
            return (f"{{:{max_len}d}}").format(nid)

        LOG.info("Started CCF network with the following nodes:")
        LOG.info("  Node [{}] = https://{}:{}".format(
            pad_node_id(primary.node_id), primary.pubhost, primary.rpc_port))

        for b in backups:
            LOG.info("  Node [{}] = https://{}:{}".format(
                pad_node_id(b.node_id), b.pubhost, b.rpc_port))

        LOG.info(
            f"You can now issue business transactions to the {args.package} application."
        )
        LOG.info(
            f"Keys and certificates have been copied to the common folder: {network.common_dir}"
        )
        LOG.info(
            "See https://microsoft.github.io/CCF/master/users/issue_commands.html for more information."
        )
        LOG.warning("Press Ctrl+C to shutdown the network.")

        try:
            while True:
                time.sleep(60)

        except KeyboardInterrupt:
            LOG.info("Stopping all CCF nodes...")

    LOG.info("All CCF nodes stopped.")
Exemplo n.º 29
0
def run(args):
    chosen_suite = []

    if not args.test_suite:
        args.test_suite = ["all"]

    for choice in args.test_suite:
        try:
            chosen_suite.extend(s.suites[choice])
        except KeyError as e:
            raise ValueError(f"Unhandled choice: {choice}") from e

    seed = None
    if os.getenv("SHUFFLE_SUITE"):
        seed = os.getenv("SHUFFLE_SUITE_SEED")
        if seed is None:
            seed = time.time()
        seed = int(seed)
        LOG.success(f"Shuffling full suite with seed {seed}")
        random.seed(seed)
        random.shuffle(chosen_suite)
    s.validate_tests_signature(chosen_suite)

    if args.enforce_reqs is False:
        LOG.warning("Test requirements will be ignored")

    txs = app.LoggingTxs()
    network = infra.network.Network(args.nodes,
                                    args.binary_dir,
                                    args.debug_nodes,
                                    args.perf_nodes,
                                    txs=txs)
    network.start_and_join(args)

    LOG.info(
        f"Running {len(chosen_suite)} tests for {args.test_duration} seconds")

    run_tests = {}
    success = True
    elapsed = args.test_duration

    if args.filter is not None:
        filter_re = re.compile(args.filter)

        def filter_fun(x):
            return filter_re is None or filter_re.match(x[1].__name__)

        tests_to_run = filter(filter_fun, enumerate(chosen_suite))
    else:
        tests_to_run = enumerate(chosen_suite)

    for i, test in tests_to_run:
        status = None
        reason = None

        if elapsed <= 0:
            LOG.warning(
                f"Test duration time ({args.test_duration} seconds) is up!")
            break

        try:
            LOG.debug(f"Running {s.test_name(test)}...")
            test_time_before = time.time()

            # Actually run the test
            new_network = test(network, args)
            status = TestStatus.success

        except reqs.TestRequirementsNotMet as ce:
            LOG.warning(f"Test requirements for {s.test_name(test)} not met")
            status = TestStatus.skipped
            reason = str(ce)
            new_network = network

        except Exception:
            LOG.exception(f"Test {s.test_name(test)} failed")
            status = TestStatus.failure
            new_network = network

        test_elapsed = time.time() - test_time_before

        # Construct test report
        run_tests[i] = {
            "name": s.test_name(test),
            "status": status.name,
            "elapsed (s)": round(test_elapsed, 2),
            "memory": mem_stats(new_network),
        }

        if reason is not None:
            run_tests[i]["reason"] = reason

        # If the test function did not return a network, it is not possible to continue
        if new_network is None:
            raise ValueError(
                f"Network returned by {s.test_name(test)} is None")

        # If the network was changed (e.g. recovery test), use the new network from now on
        if new_network != network:
            network = new_network

        LOG.debug(f"Test {s.test_name(test)} took {test_elapsed:.2f} secs")

        # For now, if a test fails, the entire test suite if stopped
        if status is TestStatus.failure:
            success = False
            break

        elapsed -= test_elapsed

    network.stop_all_nodes()

    if success:
        LOG.success(
            f"Full suite passed. Ran {len(run_tests)}/{len(chosen_suite)}")
    else:
        LOG.error(f"Suite failed. Ran {len(run_tests)}/{len(chosen_suite)}")

    if seed:
        LOG.info(f"Full suite was shuffled with seed: {seed}")

    for idx, test in run_tests.items():
        status = test["status"]
        if status == TestStatus.success.name:
            log_fn = LOG.success
        elif status == TestStatus.skipped.name:
            log_fn = LOG.warning
        else:
            log_fn = LOG.error
        log_fn(f"Test #{idx}:\n{json.dumps(test, indent=4)}")

    if not success:
        sys.exit(1)
Exemplo n.º 30
0
def run(args):
    # Keep track of how many propose, vote and withdraw are issued in this test
    proposals_issued = 0
    votes_issued = 0
    withdrawals_issued = 0

    with infra.network.network(
        args.nodes, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb
    ) as network:
        network.start_and_join(args)
        primary, _ = network.find_primary()

        ledger_directory = network.find_primary()[0].remote.ledger_path()

        ledger = ccf.ledger.Ledger(ledger_directory)
        (
            original_proposals,
            original_votes,
            original_withdrawals,
        ) = count_governance_operations(ledger)

        LOG.info("Add new member proposal (implicit vote)")
        (
            new_member_proposal,
            _,
            careful_vote,
        ) = network.consortium.generate_and_propose_new_member(
            primary, curve=infra.network.ParticipantsCurve.secp256k1
        )
        proposals_issued += 1

        LOG.info("2/3 members accept the proposal")
        p = network.consortium.vote_using_majority(
            primary, new_member_proposal, careful_vote
        )
        votes_issued += p.votes_for
        assert new_member_proposal.state == infra.proposal.ProposalState.Accepted

        LOG.info("Create new proposal but withdraw it before it is accepted")
        new_member_proposal, _, _ = network.consortium.generate_and_propose_new_member(
            primary, curve=infra.network.ParticipantsCurve.secp256k1
        )
        proposals_issued += 1

        with primary.client() as c:
            response = network.consortium.get_member_by_id(
                new_member_proposal.proposer_id
            ).withdraw(primary, new_member_proposal)
            infra.checker.Checker(c)(response)
        assert response.status_code == http.HTTPStatus.OK.value
        assert response.body.json()["state"] == ProposalState.Withdrawn.value
        withdrawals_issued += 1

    # Refresh ledger to beginning
    ledger = ccf.ledger.Ledger(ledger_directory)

    (
        final_proposals,
        final_votes,
        final_withdrawals,
    ) = count_governance_operations(ledger)

    assert (
        final_proposals == original_proposals + proposals_issued
    ), f"Unexpected number of propose operations recorded in the ledger (expected {original_proposals + proposals_issued}, found {final_proposals})"
    assert (
        final_votes == original_votes + votes_issued
    ), f"Unexpected number of vote operations recorded in the ledger (expected {original_votes + votes_issued}, found {final_votes})"
    assert (
        final_withdrawals == original_withdrawals + withdrawals_issued
    ), f"Unexpected number of withdraw operations recorded in the ledger (expected {original_withdrawals + withdrawals_issued}, found {final_withdrawals})"