def testNodeCatchupAfterDisconnect(
        sdk_new_node_caught_up, txnPoolNodeSet,
        sdk_node_set_with_node_added_after_some_txns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns

    logger.debug("Disconnecting node {} with pool ledger size {}".format(
        new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            new_node,
                                            stopNode=False)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Connecting the stopped node, {}".format(new_node))
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)

    logger.debug("Waiting for the node to catch up, {}".format(new_node))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 10)
    checkNodeDataForEquality(new_node, *txnPoolNodeSet[:-1])
Exemplo n.º 2
0
def test_node_catchup_after_checkpoints(looper, txnPoolNodeSet, chk_freq_patched,
                                        wallet1, client1, client1Connected,
                                        slow_node_and_others):
    """
    For some reason a node misses 3pc messages but eventually the node stashes
    some amount checkpoints and decides to catchup.
    """
    slow_node, other_nodes = slow_node_and_others
    completed_catchups_before = get_number_of_completed_catchups(slow_node)

    logger.info("Step 1: Send less than required for start the catchup procedure"
                "and check the slow node falls behind")
    batches_num = 2 * chk_freq_patched - 1
    send_reqs_batches_and_get_suff_replies(looper, wallet1, client1,
                                           num_reqs=batches_num,
                                           num_batches=batches_num,
                                           )
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    logger.info("Step 2: Send remaining requests in order to trigger the catchup"
                "procedure for the slow node, then check data equality")
    send_reqs_batches_and_get_suff_replies(looper, wallet1, client1,
                                           num_reqs=1
                                           )
    waitNodeDataEquality(looper, slow_node, *other_nodes)
    # check if there was a catchup
    completed_catchups_after = get_number_of_completed_catchups(slow_node)
    assert completed_catchups_after == completed_catchups_before + 1
def testNodeCatchupAfterDisconnect(sdk_new_node_caught_up, txnPoolNodeSet,
                                   sdk_node_set_with_node_added_after_some_txns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns

    logger.debug("Disconnecting node {} with pool ledger size {}".
                 format(new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, new_node, stopNode=False)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Connecting the stopped node, {}".format(new_node))
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)

    logger.debug("Waiting for the node to catch up, {}".format(new_node))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 10)
    checkNodeDataForEquality(new_node, *txnPoolNodeSet[:-1])
def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet,
                                   nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns

    logger.debug("Stopping node {} with pool ledger size {}".format(
        newNode, newNode.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            newNode,
                                            stopNode=False)
    looper.removeProdable(newNode)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Starting the stopped node, {}".format(newNode))
    looper.add(newNode)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode)

    logger.debug("Waiting for the node to catch up, {}".format(newNode))
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
def testNodeCatchupAfterLostConnection(newNodeCaughtUp, txnPoolNodeSet,
                                   nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that has poor internet connection and got unsynced after some
    transactions should eventually get the transactions which happened while
    it was not accessible
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    logger.debug("Disconnecting node {}, ledger size {}".
                 format(newNode, newNode.domainLedger.size))
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, newNode,
                                            stopNode=False)
    looper.removeProdable(newNode)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1])

    # logger.debug("Ensure node {} gets disconnected".format(newNode))
    ensure_node_disconnected(looper, newNode, txnPoolNodeSet[:-1])

    logger.debug("Connecting the node {} back, ledger size {}".
                 format(newNode, newNode.domainLedger.size))
    looper.add(newNode)

    logger.debug("Waiting for the node to catch up, {}".format(newNode))
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, client1,
                                          wallet1, client1Connected):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.is_catchup_needed,
                             compare_val_to=True))

    def caught_up_for_current_view_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.caught_up_for_current_view,
                             compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = caught_up_for_current_view_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    # The bad_node caught up due to receiving sufficient ViewChangeDone
    # messages
    assert caught_up_for_current_view_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
def testNodeCatchupFPlusOne(looper, txnPoolNodeSet, sdk_pool_handle,
                            sdk_wallet_steward, tconf, tdir, tdirWithPoolTxns,
                            allPluginsPath, testNodeClass):
    """
    Check that f+1 nodes is enough for catchup
    """

    assert len(txnPoolNodeSet) == 4

    node1 = txnPoolNodeSet[-1]
    node0 = txnPoolNodeSet[-2]

    logger.debug("Stopping node0 with pool ledger size {}".format(
        node0.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node0,
                                            stopNode=True)
    looper.removeProdable(node0)

    logger.debug("Sending requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)

    logger.debug("Stopping node1 with pool ledger size {}".format(
        node1.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node1,
                                            stopNode=True)
    looper.removeProdable(node1)

    # Make sure new node got out of sync
    # Excluding state check since the node is stopped hence the state db is closed
    waitNodeDataInequality(looper,
                           node0,
                           *txnPoolNodeSet[:-2],
                           exclude_from_check=['check_state'])

    # TODO: Check if the node has really stopped processing requests?

    logger.debug("Starting the stopped node0")
    nodeHa, nodeCHa = HA(*node0.nodestack.ha), HA(*node0.clientstack.ha)
    config_helper = PNodeConfigHelper(node0.name, tconf, chroot=tdir)
    node0 = testNodeClass(node0.name,
                          config_helper=config_helper,
                          ha=nodeHa,
                          cliha=nodeCHa,
                          config=tconf,
                          pluginPaths=allPluginsPath)
    looper.add(node0)

    logger.debug("Waiting for the node0 to catch up")
    waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2])

    logger.debug("Sending more requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 2)
    checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2])
Exemplo n.º 8
0
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle,
                                    sdk_wallet_client):
    """
    A node is slow to receive COMMIT messages so after a view change it
    starts catchup. But before it can start requesting txns, the COMMITs messages
    are received and are ordered. The node should not request any transactions.
    :return:
    """
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           2 * 3, 3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    delay_cm = 30
    delat_cp = 100
    slow_node.nodeIbStasher.delay(cDelay(delay_cm))
    # The slow node receives consistency proofs after some delay, this delay
    # gives the opportunity to deliver all 3PC messages
    slow_node.nodeIbStasher.delay(cpDelay(delat_cp))

    # Count of `getCatchupReqs` which is called to construct the `CatchupReq`
    # to be sent
    def domain_cr_count():
        return sum(1 for entry in slow_node.ledgerManager.spylog.getAll(
            slow_node.ledgerManager.getCatchupReqs)
                   if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID)

    old_count = domain_cr_count()
    sent_batches = 10
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           2 * sent_batches, sent_batches)
    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the `slow_node` is behind
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Unstash only COMMIT messages
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename)

    looper.runFor(2)

    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(
        ConsistencyProof.typename)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # No `CatchupReq`s constructed, hence no `CatchupReq`s could have
    # been sent
    assert domain_cr_count() == old_count
    # Some stashed ordered requests have been processed
    rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
    assert sent_batches in rv

    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client,
                               sdk_pool_handle)
def test_node_catchup_after_checkpoints(
        looper,
        chkFreqPatched,
        reqs_for_checkpoint,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        broken_node_and_others):
    """
    A node misses 3pc messages and checkpoints during some period but later it
    stashes some amount of checkpoints and decides to catchup.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize
    broken_node, other_nodes = broken_node_and_others

    logger.info("Step 1: The node misses quite a lot of requests")

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint + max_batch_size)

    waitNodeDataInequality(looper, broken_node, *other_nodes)

    logger.info(
        "Step 2: The node gets requests but cannot process them because of "
        "missed ones. But the nodes eventually stashes some amount checkpoints "
        "after that the node starts catch up")

    repaired_node = repair_broken_node(broken_node)
    completed_catchups_before = get_number_of_completed_catchups(broken_node)

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                           reqs_for_checkpoint - max_batch_size)

    waitNodeDataEquality(looper, repaired_node, *other_nodes)
    # Note that the repaired node might not fill the gap of missed 3PC-messages
    # by requesting them from other nodes because these 3PC-batches start from
    # an already stabilized checkpoint, so a part of these 3PC-messages are
    # already unavailable

    # Verify that a catch-up was done
    completed_catchups_after = get_number_of_completed_catchups(repaired_node)
    assert completed_catchups_after > completed_catchups_before

    logger.info("Step 3: Check if the node is able to process requests")

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint + max_batch_size)

    waitNodeDataEquality(looper, repaired_node, *other_nodes)
def testNodeCatchupFPlusOne(looper,
                            txnPoolNodeSet,
                            sdk_pool_handle,
                            sdk_wallet_steward,
                            tconf, tdir,
                            tdirWithPoolTxns, allPluginsPath, testNodeClass):
    """
    Check that f+1 nodes is enough for catchup
    """

    assert len(txnPoolNodeSet) == 4

    node1 = txnPoolNodeSet[-1]
    node0 = txnPoolNodeSet[-2]

    logger.debug("Stopping node0 with pool ledger size {}".
                 format(node0.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, node0, stopNode=True)
    looper.removeProdable(node0)

    logger.debug("Sending requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)

    logger.debug("Stopping node1 with pool ledger size {}".
                 format(node1.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, node1, stopNode=True)
    looper.removeProdable(node1)

    # Make sure new node got out of sync
    # Excluding state check since the node is stopped hence the state db is closed
    waitNodeDataInequality(looper, node0, *txnPoolNodeSet[:-2],
                           exclude_from_check=['check_state'])

    # TODO: Check if the node has really stopped processing requests?

    logger.debug("Starting the stopped node0")
    nodeHa, nodeCHa = HA(*node0.nodestack.ha), HA(*node0.clientstack.ha)
    config_helper = PNodeConfigHelper(node0.name, tconf, chroot=tdir)
    node0 = testNodeClass(node0.name,
                          config_helper=config_helper,
                          ha=nodeHa, cliha=nodeCHa,
                          config=tconf, pluginPaths=allPluginsPath)
    looper.add(node0)

    logger.debug("Waiting for the node0 to catch up")
    waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2])

    logger.debug("Sending more requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 2)
    checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2])
Exemplo n.º 11
0
def testNodeCatchupFPlusOne(txnPoolNodeSet, poolAfterSomeTxns, tconf, tdir,
                            tdirWithPoolTxns, allPluginsPath, testNodeClass):
    """
    Check that f+1 nodes is enough for catchup
    """
    looper, client, wallet = poolAfterSomeTxns

    assert len(txnPoolNodeSet) == 4

    node1 = txnPoolNodeSet[-1]
    node0 = txnPoolNodeSet[-2]

    logger.debug("Stopping node0 with pool ledger size {}".format(
        node0.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node0,
                                            stopNode=True)
    looper.removeProdable(node0)

    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)

    logger.debug("Stopping node1 with pool ledger size {}".format(
        node1.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node1,
                                            stopNode=True)
    looper.removeProdable(node1)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, node0, *txnPoolNodeSet[:-2])

    # TODO: Check if the node has really stopped processing requests?

    logger.debug("Starting the stopped node0")
    nodeHa, nodeCHa = HA(*node0.nodestack.ha), HA(*node0.clientstack.ha)
    config_helper = PNodeConfigHelper(node0.name, tconf, chroot=tdir)
    node0 = testNodeClass(node0.name,
                          config_helper=config_helper,
                          ha=nodeHa,
                          cliha=nodeCHa,
                          config=tconf,
                          pluginPaths=allPluginsPath)
    looper.add(node0)

    logger.debug("Waiting for the node0 to catch up")
    waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2)
    checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2])
def test_node_catchup_causes_no_desync(looper, txnPoolNodeSet, sdk_pool_handle,
                                       sdk_wallet_client, monkeypatch,
                                       chkFreqPatched, reqs_for_checkpoint):
    """
    Checks that transactions received by catchup do not
    break performance monitoring
    """

    max_batch_size = chkFreqPatched.Max3PCBatchSize
    lagging_node = get_any_non_primary_node(txnPoolNodeSet)
    rest_nodes = set(txnPoolNodeSet).difference({lagging_node})

    # Make master replica lagging by hiding all messages sent to it
    make_master_replica_lag(lagging_node)
    monkeypatch.setattr(lagging_node.master_replica,
                        '_request_missing_three_phase_messages',
                        lambda *x, **y: None)

    # Send some requests and check that all replicas except master executed it
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client,
                              reqs_for_checkpoint - max_batch_size)
    waitNodeDataInequality(looper, lagging_node, *rest_nodes)
    looper.run(eventually(backup_replicas_run_forward, lagging_node))

    assert not lagging_node.monitor.isMasterDegraded()

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client,
                              reqs_for_checkpoint + max_batch_size)
    # Check that catchup done
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)

    lagging_node.reset_delays_and_process_delayeds()

    # Send some more requests to ensure that backup and master replicas
    # are in the same state
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client,
                              reqs_for_checkpoint - max_batch_size)
    looper.run(eventually(replicas_synced, lagging_node))

    # Check that master is not considered to be degraded
    assert not lagging_node.monitor.isMasterDegraded()
def test_node_catchup_causes_no_desync(looper, txnPoolNodeSet, sdk_pool_handle,
                                       sdk_wallet_client, monkeypatch,
                                       chkFreqPatched, reqs_for_checkpoint):
    """
    Checks that transactions received by catchup do not
    break performance monitoring
    """

    max_batch_size = chkFreqPatched.Max3PCBatchSize
    lagging_node = get_any_non_primary_node(txnPoolNodeSet)
    rest_nodes = set(txnPoolNodeSet).difference({lagging_node})

    # Make master replica lagging by hiding all messages sent to it
    make_master_replica_lag(lagging_node)
    monkeypatch.setattr(lagging_node.master_replica,
                        '_request_missing_three_phase_messages',
                        lambda *x, **y: None)

    # Send some requests and check that all replicas except master executed it
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client,
                              reqs_for_checkpoint - max_batch_size)
    waitNodeDataInequality(looper, lagging_node, *rest_nodes)
    looper.run(eventually(backup_replicas_run_forward, lagging_node))

    assert not lagging_node.monitor.isMasterDegraded()

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client,
                              reqs_for_checkpoint + max_batch_size)
    # Check that catchup done
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)

    lagging_node.reset_delays_and_process_delayeds()

    # Send some more requests to ensure that backup and master replicas
    # are in the same state
    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client,
                              reqs_for_checkpoint - max_batch_size)
    looper.run(eventually(replicas_synced, lagging_node))

    # Check that master is not considered to be degraded
    assert not lagging_node.monitor.isMasterDegraded()
def test_node_catchup_after_checkpoints(looper, chk_freq_patched,
                                        txnPoolNodeSet, wallet1, client1,
                                        client1Connected,
                                        broken_node_and_others):
    """
    For some reason a node misses 3pc messages but eventually the node stashes
    some amount checkpoints and decides to catchup.
    """
    broken_node, other_nodes = broken_node_and_others
    completed_catchups_before = get_number_of_completed_catchups(broken_node)

    logger.info("Step 1: The node misses quite a lot requests")
    send_reqs_batches_and_get_suff_replies(
        looper,
        wallet1,
        client1,
        num_reqs=chk_freq_patched + 1,
        num_batches=chk_freq_patched + 1,
    )
    waitNodeDataInequality(looper, broken_node, *other_nodes)

    logger.info(
        "Step 2: The node gets requests but cannot process them because of "
        "missed ones. But the nodes eventually stashes some amount checkpoints "
        "after that the node starts catch up")
    repaired_node = repair_broken_node(broken_node)
    send_reqs_batches_and_get_suff_replies(looper,
                                           wallet1,
                                           client1,
                                           num_reqs=2 * chk_freq_patched,
                                           num_batches=2 * chk_freq_patched)
    waitNodeDataEquality(looper, repaired_node, *other_nodes)

    # check if there was at least 1 catchup
    completed_catchups_after = get_number_of_completed_catchups(repaired_node)
    assert completed_catchups_after >= completed_catchups_before + 1

    logger.info("Step 3: Check if the node is able to process requests")
    send_reqs_batches_and_get_suff_replies(looper,
                                           wallet1,
                                           client1,
                                           num_reqs=chk_freq_patched + 2,
                                           num_batches=chk_freq_patched + 2)
    waitNodeDataEquality(looper, repaired_node, *other_nodes)
def test_node_catchup_causes_no_desync(looper, txnPoolNodeSet, client1,
                                       wallet1, client1Connected, monkeypatch):
    """
    Checks that transactions received by catchup do not
    break performance monitoring
    """

    client, wallet = client1, wallet1
    lagging_node = get_any_non_primary_node(txnPoolNodeSet)
    rest_nodes = set(txnPoolNodeSet).difference({lagging_node})

    # Make master replica lagging by hiding all messages sent to it
    make_master_replica_lag(lagging_node)
    monkeypatch.setattr(lagging_node.master_replica,
                        '_request_missing_three_phase_messages',
                        lambda *x, **y: None)

    # Send some requests and check that all replicas except master executed it
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    waitNodeDataInequality(looper, lagging_node, *rest_nodes)
    looper.run(eventually(backup_replicas_run_forward, lagging_node))

    # Disconnect lagging node, send some more requests and start it back
    # After start it should fall in a such state that it needs to make catchup
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagging_node,
                                            stopNode=False)
    looper.removeProdable(lagging_node)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    looper.add(lagging_node)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagging_node)

    # Check that catchup done
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)

    # Send some more requests to ensure that backup and master replicas
    # are in the same state
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    looper.run(eventually(replicas_synced, lagging_node))

    # Check that master is not considered to be degraded
    assert not lagging_node.monitor.isMasterDegraded()
Exemplo n.º 16
0
def testNodeCatchupFPlusOne(txnPoolNodeSet, poolAfterSomeTxns):
    """
    Check that f+1 nodes is enough for catchup
    """
    looper, client, wallet = poolAfterSomeTxns

    assert len(txnPoolNodeSet) == 4

    node1 = txnPoolNodeSet[-1]
    node0 = txnPoolNodeSet[-2]

    logger.debug("Stopping node0 with pool ledger size {}".
                 format(node0.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, node0, stopNode=False)
    looper.removeProdable(node0)

    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)

    logger.debug("Stopping node1 with pool ledger size {}".
                 format(node1.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, node1, stopNode=False)
    looper.removeProdable(node1)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, node0, *txnPoolNodeSet[:-2])

    # TODO: Check if the node has really stopped processing requests?

    logger.debug("Starting the stopped node0")
    looper.add(node0)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet[:-1], node0)

    logger.debug("Waiting for the node0 to catch up")
    waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2)
    checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2])
Exemplo n.º 17
0
def test_disconnected_node_catchup_plugin_ledger_txns(looper,
                                                      txnPoolNodeSet,
                                                      sdk_wallet_client,
                                                      sdk_pool_handle,
                                                      sdk_new_node_caught_up):
    """
    A node gets disconnected, a few config ledger txns happen,
    the disconnected node comes back up and catches up the config ledger
    """
    new_node = sdk_new_node_caught_up
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, new_node, stopNode=False)

    # Do some demo txns;
    some_demo_txns(looper, sdk_wallet_client, sdk_pool_handle)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
Exemplo n.º 18
0
def test_disconnected_node_catchup_config_ledger_txns(
        looper, some_config_txns_done, txnPoolNodeSet, sdk_wallet_client,
        sdk_pool_handle, newNodeCaughtUp, keys):
    """
    A node gets disconnected, a few config ledger txns happen,
    the disconnected node comes back up and catches up the config ledger
    """
    new_node = newNodeCaughtUp
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            new_node,
                                            stopNode=False)

    # Do some config txns; using a fixture as a method, passing some arguments
    # as None as they only make sense for the fixture (pre-requisites)
    send_some_config_txns(looper, sdk_pool_handle, sdk_wallet_client, keys)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
Exemplo n.º 19
0
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        looper, txnPoolNodeSet, steward1, stewardWallet, tconf, slow_node):
    """
    Delay 3PC to 1 node and then cause view change so by the time the view
    change happens(each node gets >n-f `INSTANCE_CHANGE`s), the slow node is
    behind other nodes. The should initiate catchup to come to the same state
    as other nodes.
    """

    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay = tconf.PerfCheckFreq

    # Bad network introduced
    slow_node.nodeIbStasher.delay(ppDelay(delay, 0))
    slow_node.nodeIbStasher.delay(pDelay(2 * delay, 0))
    slow_node.nodeIbStasher.delay(cDelay(3 * delay, 0))
    for i in range(2):
        sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1,
                                            20)
        waitNodeDataInequality(looper, slow_node, *fast_nodes)

    catchup_reply_counts = {
        n.name: n.ledgerManager.spylog.count(n.ledgerManager.processCatchupRep)
        for n in txnPoolNodeSet
    }
    catchup_done_counts = {
        n.name: n.spylog.count(n.allLedgersCaughtUp)
        for n in txnPoolNodeSet
    }

    def slow_node_processed_some():
        assert slow_node.master_replica.batches

    # The slow node has received some PRE-PREPAREs
    looper.run(eventually(slow_node_processed_some, retryWait=1,
                          timeout=delay))

    # No reverts have been called by the slow node
    rv = getAllReturnVals(slow_node.replicas[0],
                          TestReplica.revert_unordered_batches)
    assert not rv or max(rv) == 0

    # Delay reception of catchup replies so ViewChangeDone can be received
    # before catchup completes
    delay_catchup_reply = 2
    slow_node.nodeIbStasher.delay(cr_delay(delay_catchup_reply))

    ensure_view_change(looper, txnPoolNodeSet)
    # `slow_node` will not have elections done but others will.
    checkProtocolInstanceSetup(looper,
                               fast_nodes,
                               numInstances=len(slow_node.replicas),
                               retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    # `slow_node` does catchup, `fast_nodes` don't
    for n in txnPoolNodeSet:
        assert n.spylog.count(
            n.allLedgersCaughtUp) > catchup_done_counts[n.name]
        if n == slow_node:
            assert n.ledgerManager.spylog.count(
                n.ledgerManager.processCatchupRep) > catchup_reply_counts[
                    n.name]
        else:
            assert n.ledgerManager.spylog.count(
                n.ledgerManager.processCatchupRep) == catchup_reply_counts[
                    n.name]

    # Greater than 0 batches were reverted by the slow node
    assert max(
        getAllReturnVals(
            slow_node.master_replica,
            slow_node.master_replica.revert_unordered_batches)) > 0

    # Bad network repaired
    slow_node.reset_delays_and_process_delayeds()

    # Make sure pool is functional
    sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1, 5)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    bad_node.nodeIbStasher.delay(lsDelay(1000))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                    compare_val_to=True))

    def is_catchup_not_needed_count():
        return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                    compare_val_to=False))

    def has_ordered_till_last_prepared_certificate_count():
        return len(getAllReturnVals(bad_node,
                                    bad_node.has_ordered_till_last_prepared_certificate,
                                    compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = has_ordered_till_last_prepared_certificate_count()
    old_count_3 = is_catchup_not_needed_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    assert is_catchup_not_needed_count() > old_count_3
    # The bad_node caught up due to ordering till last prepared certificate
    assert has_ordered_till_last_prepared_certificate_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
def test_catchup_not_triggered_if_another_in_progress(
        looper,
        chkFreqPatched,
        reqs_for_checkpoint,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        broken_node_and_others):
    """
    A node misses 3pc messages and checkpoints during some period but later it
    stashes some amount of checkpoints and starts catchup. When the node is
    performing the catchup, it receives more checkpoints enough to start a new
    catchup but it does not start it because the first catchup is in progress.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize
    broken_node, other_nodes = broken_node_and_others

    logger.info("Step 1: The node misses quite a lot of 3PC-messages and checkpoints")

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint + max_batch_size)

    waitNodeDataInequality(looper, broken_node, *other_nodes)

    logger.info(
        "Step 2: The node receives 3PC-messages but cannot process them because of "
        "missed ones. But the node eventually stashes some amount of checkpoints "
        "and after that starts catchup")

    repaired_node = repair_broken_node(broken_node)

    initial_do_start_catchup_times = repaired_node.spylog.count(Node._do_start_catchup)
    initial_all_ledgers_caught_up = repaired_node.spylog.count(Node.allLedgersCaughtUp)

    with delay_rules(repaired_node.nodeIbStasher, cr_delay()):
        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint - max_batch_size)

        ensure_all_nodes_have_same_data(looper, other_nodes)
        target_ledger_size = other_nodes[0].domainLedger.size

        looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.syncing),
                              timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet)) +
                                      waits.expectedPoolConsistencyProof(len(txnPoolNodeSet))))
    
        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1

        logger.info(
            "Step 3: While doing the catchup, the node receives new checkpoints "
            "enough to start a new catchup but the node does not start it because "
            "the former is in progress")

        process_checkpoint_times_before = repaired_node.master_replica.spylog.count(Replica.processCheckpoint)

        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint)

        # Wait until the node receives the new checkpoints from all the other nodes
        looper.run(
            eventually(lambda: assertExp(repaired_node.master_replica.spylog.count(Replica.processCheckpoint) -
                                         process_checkpoint_times_before ==
                                         (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                         (len(txnPoolNodeSet) - 1)),
                       timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet))))
    
        # New catchup is not started when another one is in progress
        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
        assert repaired_node.mode == Mode.syncing

    logger.info("Step 4: The node completes the catchup. The ledger has been "
                "updated to the level determined on its start")

    looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.participating),
                          timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))
    assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
    assert repaired_node.spylog.count(Node.allLedgersCaughtUp) - initial_all_ledgers_caught_up == 1
    assert repaired_node.domainLedger.size == target_ledger_size
def test_complete_short_checkpoint_not_included_in_lag_for_catchup(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client,
        tdir, tconf, allPluginsPath):
    """
    Verifies that if the first stored own checkpoint has a not aligned lower
    bound (this means that it was started after a catch-up), is complete
    and there is a quorumed stashed checkpoint from other replicas with
    the same end then this stashed checkpoint is not included into the lag
    for a catch-up, i.e. in such a case the lag which makes the node perform
    catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    received checkpoints.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize

    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
    # Epsilon did not participate in ordering of the batch with EpsilonSteward
    # NYM transaction and the batch with Epsilon NODE transaction.
    # Epsilon got these transactions via catch-up.

    # To complete the first checkpoint send requests for 1 checkpoint minus
    # 2 3PC-batches (since there are already 2 3PC-batches in the first
    # checkpoint : with EpsilonSteward NYM transaction and with Epsilon NODE
    # transaction). This checkpoint has a not aligned lower bound
    # on the new node replicas so it will not be stabilized on them.
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint - 2 * max_batch_size)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # The master replica of the new node stops to receive 3PC-messages
    new_node.master_replica.threePhaseRouter.extend(
        (
            (PrePrepare, lambda *x, **y: None),
            (Prepare, lambda *x, **y: None),
            (Commit, lambda *x, **y: None),
        )
    )

    completed_catchups_before_reqs = get_number_of_completed_catchups(new_node)

    # Send requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                                           reqs_for_checkpoint)

    # Give time for the new node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    # Verify that the new node has not caught up
    assert get_number_of_completed_catchups(new_node) == completed_catchups_before_reqs

    # Send more requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # Verify that the new node has caught up
    assert get_number_of_completed_catchups(new_node) > completed_catchups_before_reqs
Exemplo n.º 23
0
def test_catchup_not_triggered_if_another_in_progress(
        looper,
        chkFreqPatched,
        reqs_for_checkpoint,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        broken_node_and_others):
    """
    A node misses 3pc messages and checkpoints during some period but later it
    stashes some amount of checkpoints and starts catchup. When the node is
    performing the catchup, it receives more checkpoints enough to start a new
    catchup but it does not start it because the first catchup is in progress.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize
    broken_node, other_nodes = broken_node_and_others

    logger.info("Step 1: The node misses quite a lot of 3PC-messages and checkpoints")

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint + max_batch_size)

    waitNodeDataInequality(looper, broken_node, *other_nodes)

    logger.info(
        "Step 2: The node receives 3PC-messages but cannot process them because of "
        "missed ones. But the node eventually stashes some amount of checkpoints "
        "and after that starts catchup")

    repaired_node = repair_broken_node(broken_node)

    initial_do_start_catchup_times = repaired_node.spylog.count(Node._do_start_catchup)
    initial_all_ledgers_caught_up = repaired_node.spylog.count(Node.allLedgersCaughtUp)

    with delay_rules(repaired_node.nodeIbStasher, cr_delay()):
        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint - max_batch_size)

        ensure_all_nodes_have_same_data(looper, other_nodes)

        looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.discovering),
                              timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet)) +
                                      waits.expectedPoolConsistencyProof(len(txnPoolNodeSet))))

        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1

        logger.info(
            "Step 3: While doing the catchup, the node receives new checkpoints "
            "enough to start a new catchup but the node does not start it because "
            "the former is in progress")

        process_checkpoint_times_before = \
            repaired_node.master_replica._checkpointer.spylog.count(CheckpointService.process_checkpoint)

        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint)

        # Wait until the node receives the new checkpoints from all the other nodes
        looper.run(
            eventually(lambda: assertExp(
                repaired_node.master_replica._checkpointer.spylog.count(CheckpointService.process_checkpoint) -
                process_checkpoint_times_before ==
                (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                (len(txnPoolNodeSet) - 1)),
                       timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet))))

        # New catchup is not started when another one is in progress
        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
        assert repaired_node.mode == Mode.discovering

    logger.info("Step 4: The node completes the catchup. The ledger has been "
                "updated to the level determined on its start")

    looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.participating),
                          timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))
    assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
    assert repaired_node.spylog.count(Node.allLedgersCaughtUp) - initial_all_ledgers_caught_up == 1
    assert repaired_node.domainLedger.size == other_nodes[0].domainLedger.size
Exemplo n.º 24
0
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]

    # Bad node does not process any 3 Commit messages, equivalent to messages
    with delay_rules(bad_node.nodeIbStasher, cDelay()):

        # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
        # node catch-ups 2 times.
        # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
        # missing txns, so no stashed ones are applied

        bad_node.nodeIbStasher.delay(lsDelay(1000))

        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize)
        waitNodeDataInequality(looper, bad_node, *other_nodes)

        # Patch all nodes to return ConsistencyProof of a smaller ledger to the
        # bad node but only once, so that the bad_node needs to do catchup again.

        make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                                  Max3PCBatchSize)

        def is_catchup_needed_count():
            return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                        compare_val_to=True))

        def is_catchup_not_needed_count():
            return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                        compare_val_to=False))

        def has_ordered_till_last_prepared_certificate_count():
            return len(getAllReturnVals(bad_node,
                                        bad_node.has_ordered_till_last_prepared_certificate,
                                        compare_val_to=True))

        old_is_catchup_needed_count = is_catchup_needed_count()
        old_is_catchup_not_needed_count = is_catchup_not_needed_count()
        old_has_ordered_till_last_prepared_certificate_count = has_ordered_till_last_prepared_certificate_count()
        ensure_view_change(looper, txnPoolNodeSet)
        checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
        ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet,
                                        exclude_from_check=['check_last_ordered_3pc',
                                                            'check_audit',
                                                            'check_last_ordered_3pc_backup'])

        assert is_catchup_needed_count() == old_is_catchup_needed_count
        assert is_catchup_not_needed_count() > old_is_catchup_not_needed_count
        # The bad_node caught up due to ordering till last prepared certificate
        assert has_ordered_till_last_prepared_certificate_count() > old_has_ordered_till_last_prepared_certificate_count

    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet,
                                    exclude_from_check=['check_last_ordered_3pc_backup'])
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet,
                                    sdk_pool_handle,
                                    sdk_wallet_client):
    """
    A node is slow to receive COMMIT messages so after a view change it
    starts catchup. But before it can start requesting txns, the COMMITs messages
    are received and are ordered. The node should not request any transactions.
    :return:
    """
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           2 * 3,
                                           3)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    delay_cm = 30
    delat_cp = 100
    slow_node.nodeIbStasher.delay(cDelay(delay_cm))
    # The slow node receives consistency proofs after some delay, this delay
    # gives the opportunity to deliver all 3PC messages
    slow_node.nodeIbStasher.delay(cpDelay(delat_cp))

    # Count of `getCatchupReqs` which is called to construct the `CatchupReq`
    # to be sent
    def domain_cr_count(): return sum(1 for entry in
                                      slow_node.ledgerManager.spylog.getAll(
                                          slow_node.ledgerManager.getCatchupReqs) if
                                      entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID)

    old_count = domain_cr_count()
    sent_batches = 10
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           2 * sent_batches,
                                           sent_batches)
    ensure_view_change(looper, nodes=txnPoolNodeSet)

    # After view change, the `slow_node` is behind
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Unstash only COMMIT messages
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename)

    looper.runFor(2)

    slow_node.nodeIbStasher.reset_delays_and_process_delayeds(
        ConsistencyProof.typename)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # No `CatchupReq`s constructed, hence no `CatchupReq`s could have
    # been sent
    assert domain_cr_count() == old_count
    # Some stashed ordered requests have been processed
    rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
    assert sent_batches in rv

    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client,
                               sdk_pool_handle)
def test_slow_node_reverts_unordered_state_during_catchup(looper,
                                                          txnPoolNodeSet,
                                                          sdk_pool_handle,
                                                          sdk_wallet_client):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 25

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))
    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    slow_node.nodeIbStasher.delay(lsDelay(1000))

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                             delay_batches * Max3PCBatchSize)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    # start view change (and hence catchup)
    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        lst_3pc = check_last_ordered_3pc(n1, n2)

    def chk1():
        # `slow_node` has prepared all 3PC messages which
        # `other_nodes` have ordered
        assertEquality(slow_master_replica.last_prepared_before_view_change, lst_3pc)

    looper.run(eventually(chk1, retryWait=1))

    old_pc_count = slow_master_replica.spylog.count(
        slow_master_replica.can_process_since_view_change_in_progress)

    assert len(slow_node.stashedOrderedReqs) == 0

    # Repair the network so COMMITs are received, processed and stashed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica.spylog.count(
            slow_master_replica.can_process_since_view_change_in_progress) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # COMMITs are stashed
        assert len(slow_node.stashedOrderedReqs) == delay_batches * Max3PCBatchSize

    looper.run(eventually(chk3, retryWait=1, timeout=15))

    # fix catchup, so the node gets a chance to be caught-up
    repair_node_catchup_less(other_nodes)

    def chk4():
        # Some COMMITs were ordered but stashed and they were processed
        rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
        assert delay_batches in rv

    looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 5))

    def chk5():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(
            chk5,
            retryWait=1,
            timeout=waits.expectedPoolCatchupTime(
                len(txnPoolNodeSet))))

    # make sure that the pool is functional
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_complete_short_checkpoint_not_included_in_lag_for_catchup(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client,
        tdir, tconf, allPluginsPath):
    """
    Verifies that if the first stored own checkpoint has a not aligned lower
    bound (this means that it was started after a catch-up), is complete
    and there is a quorumed stashed checkpoint from other replicas with
    the same end then this stashed checkpoint is not included into the lag
    for a catch-up, i.e. in such a case the lag which makes the node perform
    catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    received checkpoints.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize

    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])
    # Epsilon did not participate in ordering of the batch with EpsilonSteward
    # NYM transaction and the batch with Epsilon NODE transaction.
    # Epsilon got these transactions via catch-up.

    # To complete the first checkpoint send requests for 1 checkpoint minus
    # 2 3PC-batches (since there are already 2 3PC-batches in the first
    # checkpoint : with EpsilonSteward NYM transaction and with Epsilon NODE
    # transaction). This checkpoint has a not aligned lower bound
    # on the new node replicas so it will not be stabilized on them.
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint - 2 * max_batch_size)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])

    # The master replica of the new node stops to receive 3PC-messages
    new_node.master_replica.threePhaseRouter.extend(
        (
            (PrePrepare, lambda *x, **y: None),
            (Prepare, lambda *x, **y: None),
            (Commit, lambda *x, **y: None),
        )
    )

    completed_catchups_before_reqs = get_number_of_completed_catchups(new_node)

    # Send requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                                           reqs_for_checkpoint)

    # Give time for the new node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])

    # Verify that the new node has not caught up
    assert get_number_of_completed_catchups(new_node) == completed_catchups_before_reqs

    # Send more requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])

    # Verify that the new node has caught up
    assert get_number_of_completed_catchups(new_node) > completed_catchups_before_reqs
def test_slow_node_reverts_unordered_state_during_catchup(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 25

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))
    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    slow_node.nodeIbStasher.delay(lsDelay(1000))

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                             delay_batches * Max3PCBatchSize)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc

    # start view change (and hence catchup)
    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        check_last_ordered_3pc(n1, n2)

    assert slow_master_replica.last_prepared_before_view_change == old_last_ordered

    old_pc_count = slow_master_replica._ordering_service.spylog.count(
        slow_master_replica._ordering_service._validate)

    assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0

    # Repair the network so COMMITs are received, processed and stashed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica._ordering_service.spylog.count(
            slow_master_replica._ordering_service._validate) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # (delay_batches * Max3PCBatchSize * commits_count_in_phase) COMMITs are stashed
        assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == \
               delay_batches * Max3PCBatchSize * (len(txnPoolNodeSet) - 1)

    looper.run(eventually(chk3, retryWait=1, timeout=15))

    # fix catchup, so the node gets a chance to be caught-up
    repair_node_catchup_less(other_nodes)

    def chk4():
        # Some COMMITs were received but stashed and
        # they will processed after catchup
        assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0

    looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 50))

    def chk5():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(chk5,
                   retryWait=1,
                   timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

    # make sure that the pool is functional
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_slow_node_reverts_unordered_state_during_catchup(
        looper, txnPoolNodeSet, client1, wallet1, client1Connected):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 15

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_twice(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                              delay_batches * Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(
            getAllReturnVals(slow_node,
                             slow_node.is_catchup_needed,
                             compare_val_to=True))

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)
    old_cn_count = is_catchup_needed_count()

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        lst_3pc = check_last_ordered_3pc(n1, n2)

    def chk1():
        # `slow_node` has prepared all 3PC messages which
        # `other_nodes` have ordered
        assertEquality(slow_master_replica.last_prepared_before_view_change,
                       lst_3pc)

    looper.run(eventually(chk1, retryWait=1))

    old_pc_count = slow_master_replica.spylog.count(
        slow_master_replica.can_process_since_view_change_in_progress)

    # Repair the network so COMMITs are received and processed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica.spylog.count(
            slow_master_replica.can_process_since_view_change_in_progress
        ) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # Some COMMITs were ordered but stashed and they were processed
        rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
        assert delay_batches in rv

    looper.run(eventually(chk3, retryWait=1, timeout=catchup_rep_delay + 5))

    def chk4():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(chk4,
                   retryWait=1,
                   timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

    def chk5():
        # Once catchup was done, need of other catchup was not found
        assertEquality(is_catchup_needed_count(), old_cn_count)

    looper.run(eventually(chk5, retryWait=1, timeout=5))

    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)