示例#1
0
def test_lag_size_for_catchup(looper, chkFreqPatched, reqs_for_checkpoint,
                              txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client):
    """
    Verifies that if the stored own checkpoints have aligned bounds then
    the master replica lag which makes the node perform catch-up is
    Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed received
    checkpoints.
    """
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    # The master replica of the slow node stops to receive 3PC-messages
    slow_node.master_replica.threePhaseRouter.extend((
        (PrePrepare, lambda *x, **y: None),
        (Prepare, lambda *x, **y: None),
        (Commit, lambda *x, **y: None),
    ))

    completed_catchups_before_reqs = get_number_of_completed_catchups(
        slow_node)

    # Send requests for the slow node's master replica to get
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP quorumed stashed checkpoints
    # from others
    send_reqs_batches_and_get_suff_replies(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_for_checkpoint)

    # Give time for the slow node to catch up if it is going to do it
    looper.runFor(
        waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
        waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    checkNodeDataForInequality(slow_node, *other_nodes)

    # Verify that the slow node has not caught up
    assert get_number_of_completed_catchups(
        slow_node) == completed_catchups_before_reqs

    # Send more requests for the slow node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle, sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper,
                         slow_node,
                         *other_nodes,
                         exclude_from_check=['check_last_ordered_3pc_backup'])

    # Verify that the slow node has caught up
    assert get_number_of_completed_catchups(
        slow_node) > completed_catchups_before_reqs
def test_lag_size_for_catchup(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_client):
    """
    Verifies that if the stored own checkpoints have aligned bounds then
    the master replica lag which makes the node perform catch-up is
    Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed received
    checkpoints.
    """
    slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]

    # The master replica of the slow node stops to receive 3PC-messages
    slow_node.master_replica.threePhaseRouter.extend(
        (
            (PrePrepare, lambda *x, **y: None),
            (Prepare, lambda *x, **y: None),
            (Commit, lambda *x, **y: None),
        )
    )

    completed_catchups_before_reqs = get_number_of_completed_catchups(slow_node)

    # Send requests for the slow node's master replica to get
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP quorumed stashed checkpoints
    # from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                                           reqs_for_checkpoint)

    # Give time for the slow node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    checkNodeDataForInequality(slow_node, *other_nodes)

    # Verify that the slow node has not caught up
    assert get_number_of_completed_catchups(slow_node) == completed_catchups_before_reqs

    # Send more requests for the slow node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper, slow_node, *other_nodes)

    # Verify that the slow node has caught up
    assert get_number_of_completed_catchups(slow_node) > completed_catchups_before_reqs
示例#3
0
def test_catchup_not_triggered_if_another_in_progress(
        looper,
        chkFreqPatched,
        reqs_for_checkpoint,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        broken_node_and_others):
    """
    A node misses 3pc messages and checkpoints during some period but later it
    stashes some amount of checkpoints and starts catchup. When the node is
    performing the catchup, it receives more checkpoints enough to start a new
    catchup but it does not start it because the first catchup is in progress.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize
    broken_node, other_nodes = broken_node_and_others

    logger.info("Step 1: The node misses quite a lot of 3PC-messages and checkpoints")

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint + max_batch_size)

    waitNodeDataInequality(looper, broken_node, *other_nodes)

    logger.info(
        "Step 2: The node receives 3PC-messages but cannot process them because of "
        "missed ones. But the node eventually stashes some amount of checkpoints "
        "and after that starts catchup")

    repaired_node = repair_broken_node(broken_node)

    initial_do_start_catchup_times = repaired_node.spylog.count(Node._do_start_catchup)
    initial_all_ledgers_caught_up = repaired_node.spylog.count(Node.allLedgersCaughtUp)

    with delay_rules(repaired_node.nodeIbStasher, cr_delay()):
        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint - max_batch_size)

        ensure_all_nodes_have_same_data(looper, other_nodes)

        looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.discovering),
                              timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet)) +
                                      waits.expectedPoolConsistencyProof(len(txnPoolNodeSet))))

        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1

        logger.info(
            "Step 3: While doing the catchup, the node receives new checkpoints "
            "enough to start a new catchup but the node does not start it because "
            "the former is in progress")

        process_checkpoint_times_before = \
            repaired_node.master_replica._checkpointer.spylog.count(CheckpointService.process_checkpoint)

        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint)

        # Wait until the node receives the new checkpoints from all the other nodes
        looper.run(
            eventually(lambda: assertExp(
                repaired_node.master_replica._checkpointer.spylog.count(CheckpointService.process_checkpoint) -
                process_checkpoint_times_before ==
                (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                (len(txnPoolNodeSet) - 1)),
                       timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet))))

        # New catchup is not started when another one is in progress
        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
        assert repaired_node.mode == Mode.discovering

    logger.info("Step 4: The node completes the catchup. The ledger has been "
                "updated to the level determined on its start")

    looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.participating),
                          timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))
    assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
    assert repaired_node.spylog.count(Node.allLedgersCaughtUp) - initial_all_ledgers_caught_up == 1
    assert repaired_node.domainLedger.size == other_nodes[0].domainLedger.size
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    1 of 4 nodes goes down. A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            disconnected_node, stopNode=False)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          [disconnected_node],
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, disconnected_node)
    # Give time for the reconnected node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    assert disconnected_node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_commit) == 0
    doOrderTimesBefore = disconnected_node.master_replica.spylog.count(Replica.doOrder)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert disconnected_node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_commit) > 0
    doOrderTimesAfter = disconnected_node.master_replica.spylog.count(Replica.doOrder)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
示例#5
0
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    1 of 4 nodes goes down. A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            disconnected_node,
                                            stopNode=False)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, MISSING_REQS_CNT)

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes, [disconnected_node],
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet,
                                        disconnected_node)
    # Give time for the reconnected node to catch up if it is going to do it
    looper.runFor(
        waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
        waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    assert disconnected_node.master_replica.spylog.count(
        Replica._request_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) == 0
    doOrderTimesBefore = disconnected_node.master_replica.spylog.count(
        Replica.doOrder)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert disconnected_node.master_replica.spylog.count(
        Replica._request_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) > 0
    doOrderTimesAfter = disconnected_node.master_replica.spylog.count(
        Replica.doOrder)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def test_complete_short_checkpoint_not_included_in_lag_for_catchup(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client,
        tdir, tconf, allPluginsPath):
    """
    Verifies that if the first stored own checkpoint has a not aligned lower
    bound (this means that it was started after a catch-up), is complete
    and there is a quorumed stashed checkpoint from other replicas with
    the same end then this stashed checkpoint is not included into the lag
    for a catch-up, i.e. in such a case the lag which makes the node perform
    catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    received checkpoints.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize

    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])
    # Epsilon did not participate in ordering of the batch with EpsilonSteward
    # NYM transaction and the batch with Epsilon NODE transaction.
    # Epsilon got these transactions via catch-up.

    # To complete the first checkpoint send requests for 1 checkpoint minus
    # 2 3PC-batches (since there are already 2 3PC-batches in the first
    # checkpoint : with EpsilonSteward NYM transaction and with Epsilon NODE
    # transaction). This checkpoint has a not aligned lower bound
    # on the new node replicas so it will not be stabilized on them.
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint - 2 * max_batch_size)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])

    # The master replica of the new node stops to receive 3PC-messages
    new_node.master_replica.threePhaseRouter.extend(
        (
            (PrePrepare, lambda *x, **y: None),
            (Prepare, lambda *x, **y: None),
            (Commit, lambda *x, **y: None),
        )
    )

    completed_catchups_before_reqs = get_number_of_completed_catchups(new_node)

    # Send requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                                           reqs_for_checkpoint)

    # Give time for the new node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])

    # Verify that the new node has not caught up
    assert get_number_of_completed_catchups(new_node) == completed_catchups_before_reqs

    # Send more requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup'])

    # Verify that the new node has caught up
    assert get_number_of_completed_catchups(new_node) > completed_catchups_before_reqs
def test_catchup_not_triggered_if_another_in_progress(
        looper,
        chkFreqPatched,
        reqs_for_checkpoint,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        broken_node_and_others):
    """
    A node misses 3pc messages and checkpoints during some period but later it
    stashes some amount of checkpoints and starts catchup. When the node is
    performing the catchup, it receives more checkpoints enough to start a new
    catchup but it does not start it because the first catchup is in progress.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize
    broken_node, other_nodes = broken_node_and_others

    logger.info("Step 1: The node misses quite a lot of 3PC-messages and checkpoints")

    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint + max_batch_size)

    waitNodeDataInequality(looper, broken_node, *other_nodes)

    logger.info(
        "Step 2: The node receives 3PC-messages but cannot process them because of "
        "missed ones. But the node eventually stashes some amount of checkpoints "
        "and after that starts catchup")

    repaired_node = repair_broken_node(broken_node)

    initial_do_start_catchup_times = repaired_node.spylog.count(Node._do_start_catchup)
    initial_all_ledgers_caught_up = repaired_node.spylog.count(Node.allLedgersCaughtUp)

    with delay_rules(repaired_node.nodeIbStasher, cr_delay()):
        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint - max_batch_size)

        ensure_all_nodes_have_same_data(looper, other_nodes)
        target_ledger_size = other_nodes[0].domainLedger.size

        looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.syncing),
                              timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet)) +
                                      waits.expectedPoolConsistencyProof(len(txnPoolNodeSet))))
    
        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1

        logger.info(
            "Step 3: While doing the catchup, the node receives new checkpoints "
            "enough to start a new catchup but the node does not start it because "
            "the former is in progress")

        process_checkpoint_times_before = repaired_node.master_replica.spylog.count(Replica.processCheckpoint)

        send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                               sdk_pool_handle,
                                               sdk_wallet_client,
                                               (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                               reqs_for_checkpoint)

        # Wait until the node receives the new checkpoints from all the other nodes
        looper.run(
            eventually(lambda: assertExp(repaired_node.master_replica.spylog.count(Replica.processCheckpoint) -
                                         process_checkpoint_times_before ==
                                         (Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) *
                                         (len(txnPoolNodeSet) - 1)),
                       timeout=waits.expectedPoolInterconnectionTime(len(txnPoolNodeSet))))
    
        # New catchup is not started when another one is in progress
        assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
        assert repaired_node.mode == Mode.syncing

    logger.info("Step 4: The node completes the catchup. The ledger has been "
                "updated to the level determined on its start")

    looper.run(eventually(lambda: assertExp(repaired_node.mode == Mode.participating),
                          timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))
    assert repaired_node.spylog.count(Node._do_start_catchup) - initial_do_start_catchup_times == 1
    assert repaired_node.spylog.count(Node.allLedgersCaughtUp) - initial_all_ledgers_caught_up == 1
    assert repaired_node.domainLedger.size == target_ledger_size
def test_complete_short_checkpoint_not_included_in_lag_for_catchup(
        looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet,
        sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client,
        tdir, tconf, allPluginsPath):
    """
    Verifies that if the first stored own checkpoint has a not aligned lower
    bound (this means that it was started after a catch-up), is complete
    and there is a quorumed stashed checkpoint from other replicas with
    the same end then this stashed checkpoint is not included into the lag
    for a catch-up, i.e. in such a case the lag which makes the node perform
    catch-up is Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    received checkpoints.
    """
    max_batch_size = chkFreqPatched.Max3PCBatchSize

    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
    # Epsilon did not participate in ordering of the batch with EpsilonSteward
    # NYM transaction and the batch with Epsilon NODE transaction.
    # Epsilon got these transactions via catch-up.

    # To complete the first checkpoint send requests for 1 checkpoint minus
    # 2 3PC-batches (since there are already 2 3PC-batches in the first
    # checkpoint : with EpsilonSteward NYM transaction and with Epsilon NODE
    # transaction). This checkpoint has a not aligned lower bound
    # on the new node replicas so it will not be stabilized on them.
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint - 2 * max_batch_size)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # The master replica of the new node stops to receive 3PC-messages
    new_node.master_replica.threePhaseRouter.extend(
        (
            (PrePrepare, lambda *x, **y: None),
            (Prepare, lambda *x, **y: None),
            (Commit, lambda *x, **y: None),
        )
    )

    completed_catchups_before_reqs = get_number_of_completed_catchups(new_node)

    # Send requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP *
                                           reqs_for_checkpoint)

    # Give time for the new node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    # Verify that the new node has not caught up
    assert get_number_of_completed_catchups(new_node) == completed_catchups_before_reqs

    # Send more requests for the new node's master replica to reach
    # Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 2 quorumed stashed
    # checkpoints from others
    send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet,
                                           sdk_pool_handle,
                                           sdk_wallet_client,
                                           reqs_for_checkpoint)

    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    # Verify that the new node has caught up
    assert get_number_of_completed_catchups(new_node) > completed_catchups_before_reqs