示例#1
0
def test_all_nodes_cert_renewal(network, args, valid_from=None):
    primary, _ = network.find_primary()

    valid_from = valid_from or datetime.now()
    validity_period_days = args.maximum_node_certificate_validity_days

    self_signed_node_certs_before = {}
    for node in network.get_joined_nodes():
        # Note: GET /node/self_signed_certificate endpoint was added after 2.0.0-r6
        if node.version_after("ccf-2.0.0-rc6"):
            self_signed_node_certs_before[
                node.local_node_id] = node.retrieve_self_signed_cert()

    network.consortium.set_all_nodes_certificate_validity(
        primary,
        valid_from=valid_from,
        validity_period_days=validity_period_days,
    )

    # Node certificates are updated on global commit hook
    network.wait_for_all_nodes_to_commit(primary)

    for node in network.get_joined_nodes():
        node.set_certificate_validity_period(valid_from, validity_period_days)
        if node.version_after("ccf-2.0.0-rc6"):
            assert (
                self_signed_node_certs_before[node.local_node_id] !=
                node.retrieve_self_signed_cert()
            ), f"Self-signed node certificate for node {node.local_node_id} was not renewed"
示例#2
0
def test_invalid_partitions(network, args):
    nodes = network.get_joined_nodes()

    try:
        network.partitioner.partition(
            [nodes[0], nodes[2]],
            [nodes[1], nodes[2]],
        )
        assert False, "Node should not appear in two or more partitions"
    except ValueError:
        pass

    try:
        network.partitioner.partition()
        assert False, "At least one partition should be specified"
    except ValueError:
        pass

    try:
        invalid_local_node_id = -1
        new_node = infra.node.Node(invalid_local_node_id, "local://localhost")
        network.partitioner.partition([new_node])
        assert False, "All nodes should belong to network"
    except ValueError:
        pass

    return network
示例#3
0
def test_kill_primary_no_reqs(network, args):
    old_primary, _ = network.find_primary_and_any_backup()
    old_primary.stop()
    new_primary, _ = network.wait_for_new_primary(old_primary)

    # Verify that the TxID reported just after an election is valid
    # Note that the first TxID read after an election may be of a signature
    # Tx (time-based signature generation) in the new term rather than the
    # last entry in the previous term
    for node in network.get_joined_nodes():
        with node.client() as c:
            r = c.get("/node/network")
            c.wait_for_commit(r)

            # Also verify that reported last ack time are as expected
            r = c.get("/node/consensus")
            acks = r.body.json()["details"]["acks"]
            for ack in acks.values():
                if node is new_primary:
                    assert (ack["last_received_ms"] <
                            network.args.election_timeout_ms), acks
                else:
                    assert (
                        ack["last_received_ms"] == 0
                    ), f"Backup {node.local_node_id} should report time of last acks of 0: {acks}"

    return network
示例#4
0
    def check(network, args, *nargs, **kwargs):
        primary, _ = network.find_primary()
        with primary.client(
                network.consortium.get_any_active_member().local_id) as c:
            r = c.post(
                "/gov/query",
                {
                    "text":
                    """tables = ...
                        trusted_nodes_count = 0
                        tables["public:ccf.gov.nodes.info"]:foreach(function(node_id, details)
                            if details["status"] == "TRUSTED" then
                                trusted_nodes_count = trusted_nodes_count + 1
                            end
                        end)
                        return trusted_nodes_count
                        """
                },
            )

            trusted_nodes_count = r.body.json()
            running_nodes_count = len(network.get_joined_nodes())
            would_leave_nodes_count = running_nodes_count - nodes_to_kill_count
            minimum_nodes_to_run_count = ceil((trusted_nodes_count + 1) / 2)
            if args.consensus == "cft" and (would_leave_nodes_count <
                                            minimum_nodes_to_run_count):
                raise TestRequirementsNotMet(
                    f"Cannot kill {nodes_to_kill_count} node(s) as the network would not be able to make progress"
                    f" (would leave {would_leave_nodes_count} nodes but requires {minimum_nodes_to_run_count} nodes to make progress) "
                )
示例#5
0
def run(args):
    txs = app.LoggingTxs()
    with infra.network.network(
        args.nodes,
        args.binary_dir,
        args.debug_nodes,
        args.perf_nodes,
        pdb=args.pdb,
        txs=txs,
    ) as network:
        network.start_and_join(args)

        test_add_node_from_backup(network, args)
        test_add_node(network, args)
        test_retire_backup(network, args)
        test_add_as_many_pending_nodes(network, args)
        test_add_node(network, args)
        test_retire_primary(network, args)

        test_add_node_from_snapshot(network, args)
        test_add_node_from_snapshot(network, args, from_backup=True)
        test_add_node_from_snapshot(network, args, copy_ledger_read_only=False)
        latest_node_log = network.get_joined_nodes()[-1].remote.log_path()
        with open(latest_node_log, "r+") as log:
            assert any(
                "No snapshot found: Node will replay all historical transactions" in l
                for l in log.readlines()
            ), "New nodes shouldn't join from snapshot if snapshot evidence cannot be verified"

        test_node_filter(network, args)
示例#6
0
def wait_for_reconfiguration_to_complete(network, timeout=10):
    max_num_configs = 0
    max_rid = 0
    all_same_rid = False
    end_time = time.time() + timeout
    while max_num_configs > 1 or not all_same_rid:
        max_num_configs = 0
        all_same_rid = True
        for node in network.get_joined_nodes():
            with node.client(self_signed_ok=True) as c:
                try:
                    r = c.get("/node/consensus")
                    rj = r.body.json()
                    cfgs = rj["details"]["configs"]
                    num_configs = len(cfgs)
                    max_num_configs = max(max_num_configs, num_configs)
                    if num_configs == 1 and cfgs[0]["rid"] != max_rid:
                        max_rid = max(max_rid, cfgs[0]["rid"])
                        all_same_rid = False
                except Exception as ex:
                    # OK, retiring node may be gone or a joining node may not be ready yet
                    LOG.info(f"expected RPC failure because of: {ex}")
        time.sleep(0.5)
        LOG.info(f"max num configs: {max_num_configs}, max rid: {max_rid}")
        if time.time() > end_time:
            raise Exception("Reconfiguration did not complete in time")
示例#7
0
def test_version(network, args):
    nodes = network.get_joined_nodes()

    for node in nodes:
        with node.client() as c:
            r = c.get("/node/version")
            assert r.body.json()["ccf_version"] == args.ccf_version
示例#8
0
def run(args):
    txs = app.LoggingTxs()
    with infra.network.network(
            args.nodes,
            args.binary_dir,
            args.debug_nodes,
            args.perf_nodes,
            pdb=args.pdb,
            txs=txs,
    ) as network:
        network.start_and_join(args)

        test_add_node_from_backup(network, args)
        test_add_node(network, args)
        test_add_node_untrusted_code(network, args)
        test_retire_backup(network, args)
        test_add_as_many_pending_nodes(network, args)
        test_add_node(network, args)
        test_retire_primary(network, args)

        if args.snapshot_tx_interval is not None:
            test_add_node_from_snapshot(network,
                                        args,
                                        copy_ledger_read_only=True)
            test_add_node_from_snapshot(network,
                                        args,
                                        copy_ledger_read_only=False)
            errors, _ = network.get_joined_nodes()[-1].stop()
            if not any(
                    "No snapshot found: Node will request all historical transactions"
                    in s for s in errors):
                raise ValueError(
                    "New node shouldn't join from snapshot if snapshot cannot be verified"
                )
示例#9
0
def test_view_history(network, args):
    if args.consensus == "pbft":
        # This appears to work in PBFT, but it is unacceptably slow:
        # - Each /tx request is a write, with a non-trivial roundtrip response time
        # - Since each read (eg - /tx and /commit) has produced writes and a unique tx ID,
        #    there are too many IDs to test exhaustively
        # We could rectify this by making this test non-exhaustive (bisecting for view changes,
        # sampling within a view), but for now it is exhaustive and Raft-only
        LOG.warning("Skipping view reconstruction in PBFT")
        return network

    check = infra.checker.Checker()

    for node in network.get_joined_nodes():
        with node.client("user0") as c:
            r = c.get("/node/commit")
            check(c)

            commit_view = r.body["view"]
            commit_seqno = r.body["seqno"]

            # Retrieve status for all possible Tx IDs
            seqno_to_views = {}
            for seqno in range(1, commit_seqno + 1):
                views = []
                for view in range(1, commit_view + 1):
                    r = c.get(f"/node/tx?view={view}&seqno={seqno}")
                    check(r)
                    status = TxStatus(r.body["status"])
                    if status == TxStatus.Committed:
                        views.append(view)
                seqno_to_views[seqno] = views

            # Check we have exactly one Tx ID for each seqno
            txs_ok = True
            for seqno, views in seqno_to_views.items():
                if len(views) != 1:
                    txs_ok = False
                    LOG.error(
                        f"Node {node.node_id}: Found {len(views)} committed Tx IDs for seqno {seqno}"
                    )

            tx_ids_condensed = ", ".join(
                " OR ".join(f"{view}.{seqno}" for view in views or ["UNKNOWN"])
                for seqno, views in seqno_to_views.items())

            if txs_ok:
                LOG.success(
                    f"Node {node.node_id}: Found a valid sequence of Tx IDs:\n{tx_ids_condensed}"
                )
            else:
                LOG.error(
                    f"Node {node.node_id}: Invalid sequence of Tx IDs:\n{tx_ids_condensed}"
                )
                raise RuntimeError(
                    f"Node {node.node_id}: Incomplete or inconsistent view history"
                )

    return network
示例#10
0
def run(args):
    # Three nodes minimum to make sure that the raft network can still make progress
    # if one node stops
    hosts = ["localhost"] * (4 if args.consensus == "bft" else 3)

    with infra.network.network(hosts,
                               args.binary_dir,
                               args.debug_nodes,
                               args.perf_nodes,
                               pdb=args.pdb) as network:
        check = infra.checker.Checker()

        network.start_and_join(args)
        current_view = None

        # Number of nodes F to stop until network cannot make progress
        nodes_to_stop = math.ceil(len(hosts) / 2)
        if args.consensus == "bft":
            nodes_to_stop = math.ceil(len(hosts) / 3)

        primary_is_known = True
        for node_to_stop in range(nodes_to_stop):
            # Note that for the first iteration, the primary is known in advance anyway
            LOG.debug("Find freshly elected primary")
            # After a view change in pbft, finding the new primary takes longer
            primary, current_view = network.find_primary(
                timeout=(30 if args.consensus == "bft" else 3))

            LOG.debug(
                "Commit new transactions, primary:{}, current_view:{}".format(
                    primary.node_id, current_view))
            with primary.client("user0") as c:
                res = c.post(
                    "/app/log/private",
                    {
                        "id":
                        current_view,
                        "msg":
                        "This log is committed in view {}".format(
                            current_view),
                    },
                )
                check(res, result=True)
                seqno = res.seqno

            LOG.debug("Waiting for transaction to be committed by all nodes")
            wait_for_seqno_to_commit(seqno, current_view,
                                     network.get_joined_nodes())

            try:
                test_kill_primary(network, args)
            except PrimaryNotFound:
                if node_to_stop < nodes_to_stop - 1:
                    raise
                else:
                    primary_is_known = False

        assert not primary_is_known, "Primary is still known"
        LOG.success("Test ended successfully.")
示例#11
0
def test_quote(network, args):
    primary, _ = network.find_nodes()
    with primary.client() as c:
        oed = subprocess.run(
            [
                os.path.join(args.oe_binary, "oesign"),
                "dump",
                "-e",
                infra.path.build_lib_path(args.package, args.enclave_type),
            ],
            capture_output=True,
            check=True,
        )
        lines = [
            line for line in oed.stdout.decode().split(os.linesep)
            if line.startswith("mrenclave=")
        ]
        expected_mrenclave = lines[0].strip().split("=")[1]

        r = c.get("/node/quotes/self")
        primary_quote_info = r.body.json()
        assert primary_quote_info["node_id"] == 0
        primary_mrenclave = primary_quote_info["mrenclave"]
        assert primary_mrenclave == expected_mrenclave, (
            primary_mrenclave,
            expected_mrenclave,
        )

        r = c.get("/node/quotes")
        quotes = r.body.json()["quotes"]
        assert len(quotes) == len(network.get_joined_nodes())

        for quote in quotes:
            mrenclave = quote["mrenclave"]
            assert mrenclave == expected_mrenclave, (mrenclave,
                                                     expected_mrenclave)
            quote_path = os.path.join(network.common_dir,
                                      f"quote{quote['node_id']}")
            endorsements_path = os.path.join(
                network.common_dir, f"endorsements{quote['node_id']}")

            with open(quote_path, "wb") as q:
                q.write(bytes.fromhex(quote["raw"]))

            with open(endorsements_path, "wb") as e:
                e.write(bytes.fromhex(quote["endorsements"]))

            cafile = os.path.join(network.common_dir, "networkcert.pem")
            assert (infra.proc.ccall(
                "verify_quote.sh",
                f"https://{primary.pubhost}:{primary.pubport}",
                "--cacert",
                f"{cafile}",
                log_output=True,
            ).returncode == 0
                    ), f"Quote verification for node {quote['node_id']} failed"

    return network
示例#12
0
def test_each_node_cert_renewal(network, args):
    primary, _ = network.find_primary()
    now = datetime.now()
    validity_period_allowed = args.max_allowed_node_cert_validity_days - 1
    validity_period_forbidden = args.max_allowed_node_cert_validity_days + 1

    test_vectors = [
        (now, validity_period_allowed, None),
        (now, None,
         None),  # Omit validity period (deduced from service configuration)
        (now, -1, infra.proposal.ProposalNotCreated),
        (now, validity_period_forbidden, infra.proposal.ProposalNotAccepted),
    ]

    for (valid_from, validity_period_days, expected_exception) in test_vectors:
        for node in network.get_joined_nodes():
            with node.client() as c:
                c.get("/node/network/nodes")

                node_cert_tls_before = node.get_tls_certificate_pem()
                assert (infra.crypto.compute_public_key_der_hash_hex_from_pem(
                    node_cert_tls_before) == node.node_id)

                try:
                    valid_from_x509 = str(
                        infra.crypto.datetime_to_X509time(valid_from))
                    network.consortium.set_node_certificate_validity(
                        primary,
                        node,
                        valid_from=valid_from_x509,
                        validity_period_days=validity_period_days,
                    )
                    node.set_certificate_validity_period(
                        valid_from_x509,
                        validity_period_days
                        or args.max_allowed_node_cert_validity_days,
                    )
                except Exception as e:
                    assert isinstance(e, expected_exception)
                    continue
                else:
                    assert (expected_exception is
                            None), "Proposal should have not succeeded"

                node_cert_tls_after = node.get_tls_certificate_pem()
                assert (
                    node_cert_tls_before != node_cert_tls_after
                ), f"Node {node.local_node_id} certificate was not renewed"
                node.verify_certificate_validity_period()
                LOG.info(
                    f"Certificate for node {node.local_node_id} has successfully been renewed"
                )

                # Long-connected client is still connected after certificate renewal
                c.get("/node/network/nodes")

    return network
示例#13
0
def node_configs(network):
    configs = {}
    for node in network.get_joined_nodes():
        try:
            with node.client() as nc:
                configs[node.node_id] = nc.get("/node/config").body.json()
        except Exception:
            pass
    return configs
示例#14
0
def mem_stats(network):
    mem = {}
    for node in network.get_joined_nodes():
        try:
            with node.client() as c:
                r = c.get("/node/memory", 0.1)
                mem[node.node_id] = r.body.json()
        except Exception:
            pass
    return mem
示例#15
0
def test_quote(network, args):
    if args.enclave_type == "virtual":
        LOG.warning("Quote test can only run in real enclaves, skipping")
        return network

    primary, _ = network.find_nodes()
    with primary.client() as c:
        oed = subprocess.run(
            [
                os.path.join(args.oe_binary, "oesign"),
                "dump",
                "-e",
                infra.path.build_lib_path(args.package, args.enclave_type),
            ],
            capture_output=True,
            check=True,
        )
        lines = [
            line
            for line in oed.stdout.decode().split(os.linesep)
            if line.startswith("mrenclave=")
        ]
        expected_mrenclave = lines[0].strip().split("=")[1]

        r = c.get("/node/quotes/self")
        primary_quote_info = r.body.json()
        assert primary_quote_info["node_id"] == primary.node_id
        primary_mrenclave = primary_quote_info["mrenclave"]
        assert primary_mrenclave == expected_mrenclave, (
            primary_mrenclave,
            expected_mrenclave,
        )

        r = c.get("/node/quotes")
        quotes = r.body.json()["quotes"]
        assert len(quotes) == len(network.get_joined_nodes())

        for quote in quotes:
            mrenclave = quote["mrenclave"]
            assert mrenclave == expected_mrenclave, (mrenclave, expected_mrenclave)

            cafile = os.path.join(network.common_dir, "service_cert.pem")
            assert (
                infra.proc.ccall(
                    "verify_quote.sh",
                    f"https://{primary.get_public_rpc_host()}:{primary.get_public_rpc_port()}",
                    "--cacert",
                    f"{cafile}",
                    log_output=True,
                ).returncode
                == 0
            ), f"Quote verification for node {quote['node_id']} failed"

    return network
示例#16
0
        def wrapper(*args, **kwargs):
            network = args[0]
            infra.e2e_args = vars(args[1])
            snapshot_tx_interval = infra.e2e_args.get("snapshot_tx_interval")
            if snapshot_tx_interval is not None:
                issue_historical_queries_with_snapshot(
                    network, int(snapshot_tx_interval))
            network = func(*args, **kwargs)
            # Only verify entries on node just added
            network.txs.verify(node=network.get_joined_nodes()[-1])

            return network
示例#17
0
def test_version(network, args):
    if args.ccf_version is None:
        LOG.warning(
            "Skipping network version check as no expected version is specified"
        )
        return

    nodes = network.get_joined_nodes()

    for node in nodes:
        with node.client() as c:
            r = c.get("/node/version")
            assert r.body.json()["ccf_version"] == args.ccf_version
示例#18
0
def test_all_nodes_cert_renewal(network, args):
    primary, _ = network.find_primary()

    valid_from = str(infra.crypto.datetime_to_X509time(datetime.now()))
    validity_period_days = args.maximum_node_certificate_validity_days

    network.consortium.set_all_nodes_certificate_validity(
        primary,
        valid_from=valid_from,
        validity_period_days=validity_period_days,
    )

    for node in network.get_joined_nodes():
        node.set_certificate_validity_period(valid_from, validity_period_days)
示例#19
0
def test_kill_primary(network, args):
    primary, _ = network.find_primary_and_any_backup()
    primary.stop()
    network.wait_for_new_primary(primary)

    # Verify that the TxID reported just after an election is valid
    # Note that the first TxID read after an election may be of a signature
    # Tx (time-based signature generation) in the new term rather than the
    # last entry in the previous term
    for node in network.get_joined_nodes():
        with node.client() as c:
            r = c.get("/node/network")
            c.wait_for_commit(r)

    return network
示例#20
0
def test_node_ids(network, args):
    nodes = network.get_joined_nodes()
    for node in nodes:
        for _, interface in node.host.rpc_interfaces.items():
            with node.client() as c:
                r = c.get(
                    f"/node/network/nodes?host={interface.public_host}&port={interface.public_port}"
                )

                assert r.status_code == http.HTTPStatus.OK.value
                info = r.body.json()["nodes"]
                assert len(info) == 1
                assert info[0]["node_id"] == node.node_id
                assert info[0]["status"] == NodeStatus.TRUSTED.value
                assert len(info[0]["rpc_interfaces"]) == len(
                    node.host.rpc_interfaces)
    return network
示例#21
0
def test_verify_quotes(network, args):
    if args.enclave_type == "virtual":
        LOG.warning("Skipping quote test with virtual enclave")
        return network

    for node in network.get_joined_nodes():
        LOG.info(f"Verifying quote for node {node.node_id}")
        cafile = os.path.join(network.common_dir, "networkcert.pem")
        assert (infra.proc.ccall(
            "verify_quote.sh",
            f"https://{node.pubhost}:{node.pubport}",
            "--cacert",
            f"{cafile}",
            log_output=True,
        ).returncode == 0
                ), f"Quote verification for node {node.node_id} failed"

    return network
示例#22
0
def test_new_service(
    network,
    args,
    install_path,
    binary_dir,
    library_dir,
    version,
    cycle_existing_nodes=False,
):
    LOG.info("Update constitution")
    primary, _ = network.find_primary()
    new_constitution = get_new_constitution_for_install(args, install_path)
    network.consortium.set_constitution(primary, new_constitution)

    # Note: Changes to constitution between versions should be tested here

    LOG.info(f"Add node to new service [cycle nodes: {cycle_existing_nodes}]")
    nodes_to_cycle = network.get_joined_nodes() if cycle_existing_nodes else []
    nodes_to_add_count = len(nodes_to_cycle) if cycle_existing_nodes else 1

    for _ in range(0, nodes_to_add_count):
        new_node = network.create_node(
            "local://localhost",
            binary_dir=binary_dir,
            library_dir=library_dir,
            version=version,
        )
        network.join_node(new_node, args.package, args)
        network.trust_node(new_node, args)
        new_node.verify_certificate_validity_period(
            expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS
        )

    for node in nodes_to_cycle:
        network.retire_node(primary, node)
        if primary == node:
            primary, _ = network.wait_for_new_primary(primary)
        node.stop()

    test_all_nodes_cert_renewal(network, args)

    LOG.info("Apply transactions to new nodes only")
    issue_activity_on_live_service(network, args)
    test_random_receipts(network, args, lts=True)
示例#23
0
文件: recovery.py 项目: lynshi/CCF
def test_recover_service(network, args, from_snapshot=False):
    network.save_service_identity(args)
    old_primary, _ = network.find_primary()

    snapshots_dir = None
    if from_snapshot:
        snapshots_dir = network.get_committed_snapshots(old_primary)

    # Start health watcher and stop nodes one by one until a recovery has to be staged
    watcher = infra.health_watcher.NetworkHealthWatcher(network,
                                                        args,
                                                        verbose=True)
    watcher.start()

    for node in network.get_joined_nodes():
        time.sleep(args.election_timeout_ms / 1000)
        node.stop()

    watcher.wait_for_recovery()

    # Stop remaining nodes
    network.stop_all_nodes()

    current_ledger_dir, committed_ledger_dirs = old_primary.get_ledger()

    recovered_network = infra.network.Network(
        args.nodes,
        args.binary_dir,
        args.debug_nodes,
        args.perf_nodes,
        existing_network=network,
    )
    recovered_network.start_in_recovery(
        args,
        ledger_dir=current_ledger_dir,
        committed_ledger_dirs=committed_ledger_dirs,
        snapshots_dir=snapshots_dir,
    )

    recovered_network.recover(args)

    return recovered_network
示例#24
0
def test_verify_quotes(network, args):
    if args.enclave_type == "virtual":
        LOG.warning("Skipping quote test with virtual enclave")
        return network

    LOG.info("Check the network is stable")
    primary, _ = network.find_primary()
    check_can_progress(primary)

    for node in network.get_joined_nodes():
        LOG.info(f"Verifying quote for node {node.node_id}")
        cafile = os.path.join(network.common_dir, "service_cert.pem")
        assert (infra.proc.ccall(
            "verify_quote.sh",
            f"https://{node.get_public_rpc_host()}:{node.get_public_rpc_port()}",
            "--cacert",
            f"{cafile}",
            log_output=True,
        ).returncode == 0
                ), f"Quote verification for node {node.node_id} failed"

    return network
示例#25
0
def test_node_replacement(network, args):
    primary, backups = network.find_nodes()

    nodes = network.get_joined_nodes()
    node_to_replace = backups[-1]
    f = infra.e2e_args.max_f(args, len(nodes))
    f_backups = backups[:f]

    # Retire one node
    network.consortium.retire_node(primary, node_to_replace)
    node_to_replace.stop()
    network.nodes.remove(node_to_replace)
    check_can_progress(primary)

    # Add in a node using the same address
    replacement_node = network.create_and_trust_node(
        args.package,
        f"local://{node_to_replace.host}:{node_to_replace.rpc_port}",
        args,
        node_port=node_to_replace.node_port,
        from_snapshot=False,
    )

    assert replacement_node.node_id != node_to_replace.node_id
    assert replacement_node.host == node_to_replace.host
    assert replacement_node.node_port == node_to_replace.node_port
    assert replacement_node.rpc_port == node_to_replace.rpc_port
    LOG.info(
        f"Stopping {len(f_backups)} other nodes to make progress depend on the replacement"
    )
    for other_backup in f_backups:
        other_backup.suspend()
    # Confirm the network can make progress
    check_can_progress(primary)
    for other_backup in f_backups:
        other_backup.resume()

    return network
示例#26
0
def test_ack_state_digest_update(network, args):
    for node in network.get_joined_nodes():
        network.consortium.get_any_active_member().update_ack_state_digest(node)
    return network
示例#27
0
 def check(network, args, *nargs, **kwargs):
     running_nodes = len(network.get_joined_nodes())
     if running_nodes < n:
         raise TestRequirementsNotMet(
             f"Too few nodes. Only have {running_nodes}, requires at least {n}"
         )
示例#28
0
def run(args):
    hosts = ["localhost", "localhost", "localhost"]

    LOG.info(f"setting seed to {args.seed}")
    random.seed(args.seed)
    txs = app.LoggingTxs()

    with infra.network.network(
        hosts, args.binary_dir, args.debug_nodes, args.perf_nodes, pdb=args.pdb, txs=txs
    ) as network:
        network.start_and_join(args)
        original_nodes = network.get_joined_nodes()
        view_info = {}

        suspend.update_view_info(network, view_info)
        app.test_run_txs(network=network, args=args, num_txs=TOTAL_REQUESTS)
        suspend.update_view_info(network, view_info)

        nodes_to_kill = [network.find_any_backup()]
        nodes_to_keep = [n for n in original_nodes if n not in nodes_to_kill]

        # check that a new node can catch up after all the requests
        late_joiner = network.create_and_trust_node(args.package, "localhost", args)
        nodes_to_keep.append(late_joiner)

        # some requests to be processed while the late joiner catches up
        # (no strict checking that these requests are actually being processed simultaneously with the node catchup)
        app.test_run_txs(
            network=network,
            args=args,
            num_txs=int(TOTAL_REQUESTS / 2),
            nodes=original_nodes,  # doesn't contain late joiner
            verify=False,  # will try to verify for late joiner and it might not be ready yet
        )

        suspend.wait_for_late_joiner(original_nodes[0], late_joiner)

        # kill the old node(s) and ensure we are still making progress
        for backup_to_retire in nodes_to_kill:
            LOG.success(f"Stopping node {backup_to_retire.node_id}")
            backup_to_retire.stop()

        # check nodes are ok after we killed one off
        app.test_run_txs(
            network=network,
            args=args,
            nodes=nodes_to_keep,
            num_txs=len(nodes_to_keep),
            timeout=30,
            ignore_failures=True,
            # in the event of an early view change due to the late joiner this might
            # take longer than usual to complete and we don't want the test to break here
        )

        suspend.test_suspend_nodes(network, args, nodes_to_keep)

        # run txs while nodes get suspended
        app.test_run_txs(
            network=network,
            args=args,
            num_txs=4 * TOTAL_REQUESTS,
            timeout=30,
            ignore_failures=True,
            # in the event of an early view change due to the late joiner this might
            # take longer than usual to complete and we don't want the test to break here
        )

        suspend.update_view_info(network, view_info)

        # check nodes have resumed normal execution before shutting down
        app.test_run_txs(network=network, args=args, num_txs=len(nodes_to_keep))

        # we have asserted that all nodes are caught up
        # assert that view changes actually did occur
        assert len(view_info) > 1

        LOG.success("----------- views and primaries recorded -----------")
        for view, primary in view_info.items():
            LOG.success(f"view {view} - primary {primary}")
示例#29
0
def test_view_history(network, args):
    if args.consensus == "bft":
        # This appears to work in BFT, but it is unacceptably slow:
        # - Each /tx request is a write, with a non-trivial roundtrip response time
        # - Since each read (eg - /tx and /commit) has produced writes and a unique tx ID,
        #    there are too many IDs to test exhaustively
        # We could rectify this by making this test non-exhaustive (bisecting for view changes,
        # sampling within a view), but for now it is exhaustive and Raft-only
        LOG.warning("Skipping view reconstruction in BFT")
        return network

    check = infra.checker.Checker()

    previous_node = None
    previous_tx_ids = ""
    for node in network.get_joined_nodes():
        with node.client("user0") as c:
            r = c.get("/node/commit")
            check(c)

            commit_tx_id = TxID.from_str(r.body.json()["transaction_id"])

            # Retrieve status for all possible Tx IDs
            seqno_to_views = {}
            for seqno in range(1, commit_tx_id.seqno + 1):
                views = []
                for view in range(1, commit_tx_id.view + 1):
                    r = c.get(f"/node/tx?transaction_id={view}.{seqno}", log_capture=[])
                    check(r)
                    status = TxStatus(r.body.json()["status"])
                    if status == TxStatus.Committed:
                        views.append(view)
                seqno_to_views[seqno] = views

            # Check we have exactly one Tx ID for each seqno
            txs_ok = True
            for seqno, views in seqno_to_views.items():
                if len(views) != 1:
                    txs_ok = False
                    LOG.error(
                        f"Node {node.node_id}: Found {len(views)} committed Tx IDs for seqno {seqno}"
                    )

            tx_ids_condensed = ", ".join(
                " OR ".join(f"{view}.{seqno}" for view in views or ["UNKNOWN"])
                for seqno, views in seqno_to_views.items()
            )

            if txs_ok:
                LOG.success(
                    f"Node {node.node_id}: Found a valid sequence of Tx IDs:\n{tx_ids_condensed}"
                )
            else:
                LOG.error(
                    f"Node {node.node_id}: Invalid sequence of Tx IDs:\n{tx_ids_condensed}"
                )
                raise RuntimeError(
                    f"Node {node.node_id}: Incomplete or inconsistent view history"
                )

            # Compare view history between nodes
            if previous_tx_ids:
                # Some nodes may have a slightly longer view history so only compare the common prefix
                min_tx_ids_len = min(len(previous_tx_ids), len(tx_ids_condensed))
                assert (
                    tx_ids_condensed[:min_tx_ids_len]
                    == previous_tx_ids[:min_tx_ids_len]
                ), f"Tx IDs don't match between node {node.node_id} and node {previous_node.node_id}: {tx_ids_condensed[:min_tx_ids_len]} and {previous_tx_ids[:min_tx_ids_len]}"

            previous_tx_ids = tx_ids_condensed
            previous_node = node

    return network
示例#30
0
def test_new_service(
    network,
    args,
    install_path,
    binary_dir,
    library_dir,
    version,
    cycle_existing_nodes=False,
):
    LOG.info("Update constitution")
    primary, _ = network.find_primary()
    new_constitution = get_new_constitution_for_install(args, install_path)
    network.consortium.set_constitution(primary, new_constitution)

    all_nodes = network.get_joined_nodes()

    # Note: Changes to constitution between versions should be tested here

    LOG.info(f"Add node to new service [cycle nodes: {cycle_existing_nodes}]")
    nodes_to_cycle = network.get_joined_nodes() if cycle_existing_nodes else []
    nodes_to_add_count = len(nodes_to_cycle) if cycle_existing_nodes else 1

    # Pre-2.0 nodes require X509 time format
    valid_from = str(infra.crypto.datetime_to_X509time(
        datetime.datetime.now()))

    for _ in range(0, nodes_to_add_count):
        new_node = network.create_node(
            "local://localhost",
            binary_dir=binary_dir,
            library_dir=library_dir,
            version=version,
        )
        network.join_node(new_node, args.package, args)
        network.trust_node(
            new_node,
            args,
            valid_from=valid_from,
        )
        new_node.verify_certificate_validity_period(
            expected_validity_period_days=DEFAULT_NODE_CERTIFICATE_VALIDITY_DAYS
        )
        all_nodes.append(new_node)

    for node in nodes_to_cycle:
        network.retire_node(primary, node)
        if primary == node:
            primary, _ = network.wait_for_new_primary(primary)
        node.stop()

    test_all_nodes_cert_renewal(network, args, valid_from=valid_from)
    test_service_cert_renewal(network, args, valid_from=valid_from)

    LOG.info("Waiting for retired nodes to be automatically removed")
    for node in all_nodes:
        network.wait_for_node_in_store(
            primary,
            node.node_id,
            node_status=ccf.ledger.NodeStatus.TRUSTED
            if node.is_joined() else None,
        )

    if args.check_2tx_reconfig_migration:
        test_migration_2tx_reconfiguration(
            network,
            args,
            initial_is_1tx=False,  # Reconfiguration type added in 2.x
            binary_dir=binary_dir,
            library_dir=library_dir,
            version=version,
            valid_from=valid_from,
        )

    LOG.info("Apply transactions to new nodes only")
    issue_activity_on_live_service(network, args)
    test_random_receipts(network, args, lts=True)