Пример #1
0
def test_catchup_with_all_nodes_sending_cons_proofs_dead(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, logsearch):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 10)

    log_re_ask, _ = logsearch(
        msgs=['requesting .* missing transactions after timeout'])
    old_re_ask_count = len(log_re_ask)

    catchup_reqs = {
        node.name: start_delaying(node.nodeIbStasher, cqDelay())
        for node in other_nodes
    }
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[
        AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(
        eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only from nodes that didn't respond first
    for node_id, node_reqs in catchup_reqs.items():
        if node_id not in audit_catchup_service._nodes_ledger_sizes:
            stop_delaying_and_process(node_reqs)

    # Check catchup finishes successfully, and there were reasks
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    assert len(log_re_ask) - old_re_ask_count > 0
def test_catchup_uses_only_nodes_with_cons_proofs(looper,
                                                  txnPoolNodeSet,
                                                  sdk_pool_handle,
                                                  sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    start_delaying(lagging_node.nodeIbStasher, delay_3pc())
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10)

    catchup_reqs = {node.name: start_delaying(node.nodeIbStasher, cqDelay())
                    for node in other_nodes}
    audit_catchup_service = lagging_node.ledgerManager._node_leecher._leechers[AUDIT_LEDGER_ID]._catchup_rep_service
    lagging_node.start_catchup()
    looper.run(eventually(lambda: assert_eq(audit_catchup_service._is_working, True)))

    # Make sure number of cons proofs gathered when all nodes are
    assert len(audit_catchup_service._nodes_ledger_sizes) == 3

    # Allow catchup requests only for interesting nodes
    for node_id in audit_catchup_service._nodes_ledger_sizes.keys():
        stop_delaying_and_process(catchup_reqs[node_id])

    # Check catchup finishes successfully
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_catchup_with_skipped_commits(tdir, tconf,
                                      looper,
                                      txnPoolNodeSet,
                                      sdk_pool_handle,
                                      sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    lagging_stasher = lagging_node.nodeIbStasher
    other_nodes = txnPoolNodeSet[:-1]
    other_stashers = [node.nodeIbStasher for node in other_nodes]

    def lagging_node_state() -> NodeLeecherService.State:
        return lagging_node.ledgerManager._node_leecher._state

    def check_lagging_node_is_not_syncing_audit():
        assert lagging_node_state() != NodeLeecherService.State.SyncingAudit

    def check_lagging_node_done_catchup():
        assert lagging_node_state() == NodeLeecherService.State.Idle

    # Preload nodes with some transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, 1)

    # Delay some commits on lagging node
    some_commits = start_delaying(lagging_stasher, delay_some_commits(0, [2, 3]))

    # Order 4 more requests in pool
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4)

    with delay_rules(lagging_stasher, delay_catchup(AUDIT_LEDGER_ID)):
        # Start catchup
        lagging_node.start_catchup()
        looper.runFor(0.5)
        assert lagging_node_state() == NodeLeecherService.State.SyncingAudit

        # Allow some missing commits to be finally received
        stop_delaying_and_process(some_commits)
        looper.runFor(0.5)

    # Ensure that audit ledger is caught up by lagging node
    looper.run(eventually(check_lagging_node_done_catchup))

    # Ensure that all nodes will eventually have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Пример #4
0
def test_max_3pc_batches_in_flight(tdir, tconf,
                                   looper,
                                   txnPoolNodeSet,
                                   sdk_pool_handle,
                                   sdk_wallet_client):
    # Check pool initial state
    initial_3pc = txnPoolNodeSet[0].master_replica.last_ordered_3pc
    for node in txnPoolNodeSet[1:]:
        assert node.master_replica.last_ordered_3pc == initial_3pc

    # Utility
    def check_ordered_till(pp_seq_no: int):
        for node in txnPoolNodeSet:
            last_ordered = node.master_replica.last_ordered_3pc
            assert last_ordered[0] == initial_3pc[0]
            assert last_ordered[1] == pp_seq_no

    # Delay some commits
    all_stashers = [node.nodeIbStasher for node in txnPoolNodeSet]
    delayers = []
    for num in range(BATCHES_TO_ORDER):
        pp_seq_no = initial_3pc[1] + num + 1
        delayer = start_delaying(all_stashers, delay_3pc(after=pp_seq_no - 1,
                                                         before=pp_seq_no + 1))
        delayers.append((pp_seq_no, delayer))

    # Send a number of requests
    reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, BATCHES_TO_ORDER)

    # Continuously check number of batches in flight
    for pp_seq_no, delayer in delayers:
        stop_delaying_and_process(delayer)
        looper.run(eventually(check_ordered_till, pp_seq_no))

        for node in txnPoolNodeSet:
            for replica in node.replicas.values():
                batches_in_flight = replica.lastPrePrepareSeqNo - replica.last_ordered_3pc[1]
                assert batches_in_flight <= MAX_BATCHES_IN_FLIGHT

    # Check all requests are ordered
    sdk_get_and_check_replies(looper, reqs)

    # Ensure that all nodes will eventually have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_during_unstash(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client, tconf):
    slow_node = txnPoolNodeSet[-1]
    other_nodes = txnPoolNodeSet[:-1]

    slow_stasher = slow_node.nodeIbStasher
    other_stashers = [n.nodeIbStasher for n in other_nodes]
    all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet]

    # Preload nodes with some transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, 1)

    # Prevent ordering of some requests
    start_delaying(all_stashers, delay_3pc(after=7, msgs=(Prepare, Commit)))

    # Stop ordering on slow node and send requests
    slow_node_after_5 = start_delaying(slow_stasher,
                                       delay_3pc(after=5, msgs=Commit))
    slow_node_until_5 = start_delaying(slow_stasher, delay_3pc(after=0))
    reqs_view_0 = sdk_send_random_requests(looper, sdk_pool_handle,
                                           sdk_wallet_client, 8)

    # Make pool order first 2 batches and pause
    pool_after_3 = start_delaying(other_stashers, delay_3pc(after=3))
    looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3))

    # Start catchup, continue ordering everywhere (except two last batches on slow node)
    with delay_rules(slow_stasher, cr_delay()):
        slow_node._do_start_catchup(just_started=False)
        looper.run(eventually(check_catchup_is_started, slow_node))
        stop_delaying_and_process(pool_after_3)
        looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7))

    # Finish catchup and continue processing on slow node
    looper.run(eventually(check_catchup_is_finished, slow_node))
    stop_delaying_and_process(slow_node_until_5)
    looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5))

    # Start view change and allow slow node to get remaining commits
    with delay_rules(all_stashers, icDelay()):
        for node in txnPoolNodeSet:
            node.view_changer.on_master_degradation()
        looper.runFor(0.1)
    stop_delaying_and_process(slow_node_after_5)

    # Ensure that expected number of requests was ordered
    replies = sdk_get_replies(looper, reqs_view_0)
    for rep in replies[:6]:
        sdk_check_reply(rep)

    # Ensure that everything is ok
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_catchup_with_skipped_commits(tdir, tconf, looper, txnPoolNodeSet,
                                      sdk_pool_handle, sdk_wallet_client):
    lagging_node = txnPoolNodeSet[-1]
    lagging_stasher = lagging_node.nodeIbStasher
    other_nodes = txnPoolNodeSet[:-1]
    other_stashers = [node.nodeIbStasher for node in other_nodes]

    def lagging_node_state() -> NodeLeecherService.State:
        return lagging_node.ledgerManager._node_leecher._state

    def check_lagging_node_is_not_syncing_audit():
        assert lagging_node_state() != NodeLeecherService.State.SyncingAudit

    def check_lagging_node_done_catchup():
        assert lagging_node_state() == NodeLeecherService.State.Idle

    def check_nodes_ordered_till(nodes: Iterable, view_no: int,
                                 pp_seq_no: int):
        for node in nodes:
            assert compare_3PC_keys((view_no, pp_seq_no),
                                    node.master_replica.last_ordered_3pc) >= 0

    # Preload nodes with some transactions
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    for node in txnPoolNodeSet:
        assert node.master_replica.last_ordered_3pc == (0, 1)

    # Setup delayers
    lagging_mid_commits = start_delaying(
        lagging_stasher, delay_3pc(after=3, before=6, msgs=Commit))
    others_mid_commits = start_delaying(
        other_stashers, delay_3pc(after=3, before=6, msgs=Commit))
    start_delaying(lagging_stasher, delay_3pc(before=4, msgs=Commit))

    # Send more requests
    reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                                    6)

    # Wait until pool ordered till (0, 3)
    looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3))
    assert lagging_node.master_replica.last_ordered_3pc == (0, 1)

    with delay_rules(lagging_stasher, delay_catchup(DOMAIN_LEDGER_ID)):
        with delay_rules(lagging_stasher, delay_catchup(AUDIT_LEDGER_ID)):
            # Start catchup
            lagging_node.start_catchup()
            looper.runFor(0.5)
            assert lagging_node_state(
            ) == NodeLeecherService.State.SyncingAudit

            # Process missing commits on lagging node
            stop_delaying_and_process(lagging_mid_commits)
            looper.runFor(0.5)

        # Allow to catchup audit ledger
        looper.run(eventually(check_lagging_node_is_not_syncing_audit))
        stop_delaying_and_process(others_mid_commits)

    # Ensure that audit ledger is caught up by lagging node
    looper.run(eventually(check_lagging_node_done_catchup))

    # Ensure that all requests were ordered
    sdk_get_and_check_replies(looper, reqs)

    # Ensure that all nodes will eventually have same data
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)