Пример #1
0
def test_second_checkpoint_after_catchup_can_be_stabilized(
        chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward,
        sdk_wallet_client, sdk_pool_handle, tdir, tconf,
        allPluginsPath):
    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
    # Epsilon did not participate in ordering of the batch with EpsilonSteward
    # NYM transaction and the batch with Epsilon NODE transaction.
    # Epsilon got these transactions via catch-up.

    master_replica = new_node.replicas._master_replica

    check_stable_checkpoint(master_replica, 0)
    check_num_received_checkpoints(master_replica, 0)

    assert master_replica.h == 2
    assert master_replica.H == 17

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    for replica in new_node.replicas.values():
        assert replica.h == 2
        assert replica.H == 17

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 6)
    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for replica in new_node.replicas.values():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 5
        assert replica.H == 20

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(stabilization_timeout)

    for replica in new_node.replicas.values():
        check_stable_checkpoint(replica, 10)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 10
        assert replica.H == 25
 def check():
     for replica in slow_node.replicas.values():
         check_stable_checkpoint(replica, 0)
         check_num_unstable_checkpoints(replica, 0)
         check_num_received_checkpoints(replica, 1)
         check_received_checkpoint_votes(replica,
                                         pp_seq_no=5,
                                         num_votes=len(txnPoolNodeSet) - 1)
Пример #3
0
    def check():
        for inst_id, replica in epsilon.replicas.items():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 1)
            check_last_checkpoint(replica, 5)

            check_num_received_checkpoints(replica, 1)
            check_last_received_checkpoint(replica, 5)
Пример #4
0
def test_stashed_checkpoint_processing(chkFreqPatched, looper, txnPoolNodeSet,
                                       sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool of 5 nodes lags to order the last 3PC-batch in a
    checkpoint. By the moment when it eventually orders the 3PC-batch it has
    already received and stashed Checkpoint message from two node, so it
    processes these stashed messages on completing the checkpoint. After this
    it receives Checkpoint messages from two other nodes, processes them and
    stabilizes the checkpoint.
    """
    epsilon = txnPoolNodeSet[-1]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 4)

    epsilon.nodeIbStasher.delay(cDelay())
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Gamma'))
    epsilon.nodeIbStasher.delay(chk_delay(sender_filter='Delta'))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        check_stable_checkpoint(replica, 0)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 1)
        check_received_checkpoint_votes(replica, pp_seq_no=5, num_votes=2)

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(COMMIT)

    def check():
        for inst_id, replica in epsilon.replicas.items():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 1)
            check_last_checkpoint(replica, 5)

            check_num_received_checkpoints(replica, 1)
            check_last_received_checkpoint(replica, 5)

    looper.run(
        eventually(check,
                   timeout=waits.expectedOrderingTime(len(txnPoolNodeSet))))

    epsilon.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for inst_id, replica in epsilon.replicas.items():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 0)
Пример #5
0
def test_second_checkpoint_after_catchup_can_be_stabilized(
        chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_steward,
        sdk_wallet_client, sdk_pool_handle, tdir, tconf, allPluginsPath):
    lagging_node = txnPoolNodeSet[-1]
    with delay_rules_without_processing(lagging_node.nodeIbStasher, cDelay()):
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client,
                                  tconf.Max3PCBatchSize * CHK_FREQ * 2)
    waitNodeDataEquality(looper, lagging_node, *txnPoolNodeSet[:-1])
    # Epsilon got lost transactions via catch-up.
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2)

    master_replica = lagging_node.master_replica

    check_stable_checkpoint(master_replica, 10)
    check_num_received_checkpoints(master_replica, 0)

    assert master_replica.h == 10
    assert master_replica.H == 25

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    for replica in lagging_node.replicas.values():
        assert replica.h == 10
        assert replica.H == 25

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6)
    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.runFor(stabilization_timeout)

    for replica in lagging_node.replicas.values():
        check_stable_checkpoint(replica, 15)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 15
        assert replica.H == 30

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    looper.runFor(stabilization_timeout)

    for replica in lagging_node.replicas.values():
        check_stable_checkpoint(replica, 20)
        check_num_unstable_checkpoints(replica, 0)

        # nothing is stashed since it's ordered during catch-up
        check_num_received_checkpoints(replica, 0)

        assert replica.h == 20
        assert replica.H == 35
def test_lagged_checkpoint_completion(chkFreqPatched, looper, txnPoolNodeSet,
                                      sdk_wallet_client, sdk_pool_handle):
    """
    One node in a pool lags to order the last 3PC-batch in a checkpoint so that
    when it eventually orders this 3PC-batch and thus completes the checkpoint
    it has already received and stashed the corresponding checkpoint messages
    from all the other nodes. The test verifies that the node successfully
    processes the stashed checkpoint messages and stabilizes the checkpoint.
    """
    slow_node = txnPoolNodeSet[-1]

    # All the nodes in the pool normally orders all the 3PC-batches in a
    # checkpoint except the last 3PC-batch. The last 3PC-batch in the
    # checkpoint is ordered by all the nodes except one slow node because this
    # node lags to receive Commits.
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 4)

    slow_node.nodeIbStasher.delay(cDelay())

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    # All the other nodes complete the checkpoint and send Checkpoint messages
    # to others. The slow node receives and stashes these messages because it
    # has not completed the checkpoint.
    def check():
        for replica in slow_node.replicas.values():
            check_stable_checkpoint(replica, 0)
            check_num_unstable_checkpoints(replica, 0)
            check_num_received_checkpoints(replica, 1)
            check_received_checkpoint_votes(replica,
                                            pp_seq_no=5,
                                            num_votes=len(txnPoolNodeSet) - 1)

    stabilization_timeout = \
        waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))
    looper.run(eventually(check, timeout=stabilization_timeout))

    # Eventually the slow node receives Commits, orders the last 3PC-batch in
    # the checkpoint and thus completes it, processes the stashed checkpoint
    # messages and stabilizes the checkpoint.
    slow_node.nodeIbStasher.reset_delays_and_process_delayeds()

    looper.runFor(waits.expectedOrderingTime(len(txnPoolNodeSet)))

    for replica in slow_node.replicas.values():
        check_stable_checkpoint(replica, 5)
        check_num_unstable_checkpoints(replica, 0)
        check_num_received_checkpoints(replica, 0)
Пример #7
0
def test_backup_replica_resumes_ordering_on_lag_in_checkpoints(
        looper, chkFreqPatched, reqs_for_checkpoint,
        one_replica_and_others_in_backup_instance, sdk_pool_handle,
        sdk_wallet_client, view_change_done, txnPoolNodeSet):
    """
    Verifies resumption of ordering 3PC-batches on a backup replica
    on detection of a lag in checkpoints
    """
    slow_replica, other_replicas = one_replica_and_others_in_backup_instance
    view_no = slow_replica.viewNo
    batches_count = slow_replica.last_ordered_3pc[1]

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    batches_count += 1
    low_watermark = slow_replica.h

    looper.run(
        eventually(lambda: assert_eq(slow_replica.last_ordered_3pc,
                                     (view_no, batches_count)),
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Don't receive Commits from two replicas
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[0].node.name))
    slow_replica.node.nodeIbStasher.delay(
        cDelay(instId=1, sender_filter=other_replicas[1].node.name))

    # Send a request for which the replica will not be able to order the batch
    # due to an insufficient count of Commits
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Recover reception of Commits
    slow_replica.node.nodeIbStasher.drop_delayeds()
    slow_replica.node.nodeIbStasher.resetDelays()

    # Send requests but in a quantity insufficient
    # for catch-up number of checkpoints
    reqs_until_checkpoints = reqs_for_checkpoint - other_replicas[
        0].last_ordered_3pc[1]
    sdk_send_random_requests(
        looper, sdk_pool_handle, sdk_wallet_client,
        Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP * reqs_until_checkpoints)
    looper.runFor(waits.expectedTransactionExecutionTime(nodeCount))

    # Ensure that the replica has not ordered any batches
    # after the very first one
    assert slow_replica.last_ordered_3pc == (view_no, batches_count)

    # Ensure that the watermarks have not been shifted since the view start
    assert slow_replica.h == low_watermark
    assert slow_replica.H == low_watermark + LOG_SIZE

    # Ensure that the collections related to requests, batches and
    # own checkpoints are not empty.
    # (Note that a primary replica removes requests from requestQueues
    # when creating a batch with them.)
    if slow_replica.isPrimary:
        assert slow_replica._ordering_service.sent_preprepares
    else:
        assert slow_replica._ordering_service.requestQueues[DOMAIN_LEDGER_ID]
        assert slow_replica._ordering_service.prePrepares
    assert slow_replica._ordering_service.prepares
    assert slow_replica._ordering_service.commits
    assert slow_replica._ordering_service.batches

    check_num_unstable_checkpoints(slow_replica, 0)
    check_num_quorumed_received_checkpoints(slow_replica, 1)

    # Send more requests to reach catch-up number of checkpoints
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, reqs_for_checkpoint)
    batches_count += 1
    batches_count += reqs_until_checkpoints
    batches_count += reqs_for_checkpoint
    # Ensure that the replica has adjusted last_ordered_3pc to the end
    # of the last checkpoint
    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc == \
                        (view_no, batches_count)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))

    # Ensure that the watermarks have been shifted so that the lower watermark
    # has the same value as last_ordered_3pc
    assert slow_replica.h == low_watermark + (
        Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ
    assert slow_replica.H == low_watermark + (
        Replica.STASHED_CHECKPOINTS_BEFORE_CATCHUP + 1) * CHK_FREQ + LOG_SIZE

    # Ensure that the collections related to requests, batches and
    # own checkpoints have been cleared
    assert not slow_replica._ordering_service.requestQueues[DOMAIN_LEDGER_ID]
    assert not slow_replica._ordering_service.sent_preprepares
    assert not slow_replica._ordering_service.prePrepares
    assert not slow_replica._ordering_service.prepares
    assert not slow_replica._ordering_service.commits
    assert not slow_replica._ordering_service.batches

    check_num_unstable_checkpoints(slow_replica, 0)
    check_num_quorumed_received_checkpoints(slow_replica, 0)

    # Send a request and ensure that the replica orders the batch for it
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1)
    batches_count += 1

    looper.run(
        eventually(lambda *args: assertExp(slow_replica.last_ordered_3pc ==
                                           (view_no, batches_count)),
                   slow_replica,
                   retryWait=1,
                   timeout=waits.expectedTransactionExecutionTime(nodeCount)))
    slow_replica._checkpointer._received_checkpoints.clear()
    batches_count = get_pp_seq_no(txnPoolNodeSet)