Пример #1
0
def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1,
                     stewardWallet, allPluginsPath):
    """
    Add 2 new nodes to trigger replica addition and primary election
    """
    for nodeName in ("Zeta", "Eta"):
        newStewardName = "testClientSteward"+randomString(3)
        newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper,
                                                   steward1,
                                                   stewardWallet,
                                                   newStewardName,
                                                   nodeName,
                                                   tdirWithPoolTxns, tconf,
                                                   allPluginsPath)
        txnPoolNodeSet.append(newNode)
        looper.run(checkNodesConnected(txnPoolNodeSet))
        logger.debug("{} connected to the pool".format(newNode))
        looper.run(eventually(checkNodeLedgersForEquality, newNode,
                              *txnPoolNodeSet[:-1], retryWait=1, timeout=7))

    f = getMaxFailures(len(txnPoolNodeSet))

    def checkFValue():
        for node in txnPoolNodeSet:
            assert node.f == f
            assert len(node.replicas) == (f + 1)

    looper.run(eventually(checkFValue, retryWait=1, timeout=5))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1,
                               timeout=5)
def testPrimarySelectionAfterViewChange(  # noqa
        looper,
        txnPoolNodeSet,
        primaryReplicas,
        catchup_complete_count):
    """
    Test that primary replica of a protocol instance shifts to a new node after
    a view change.
    """
    # TODO: This test can fail due to view change.

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    for n in txnPoolNodeSet:
        assert n.spylog.count(
            n.allLedgersCaughtUp) > catchup_complete_count[n.name]

    # Primary replicas before view change
    prBeforeVC = primaryReplicas

    # Primary replicas after view change
    instanceCount = getNoInstances(nodeCount)
    prAfterVC = [getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount)]

    # Primary replicas have moved to the next node
    for br, ar in zip(prBeforeVC, prAfterVC):
        assert ar.node.rank - br.node.rank == 1

    check_rank_consistent_across_each_node(txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
Пример #3
0
def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, nodeReg,
                                       conf):
    tdir, looper = tdirAndLooper
    nodes = []
    names = list(nodeReg.keys())
    logger.debug("Node names: {}".format(names))

    def create(name):
        node = TestNode(name,
                        nodeReg,
                        basedirpath=tdir,
                        pluginPaths=allPluginsPath)
        looper.add(node)
        nodes.append(node)

    # TODO: This will be moved to a fixture
    if conf.UseZStack:
        genKeys(tdir, names + [_ + CLIENT_STACK_SUFFIX for _ in names])

    for name in names[:3]:
        create(name)

    looper.run(checkNodesConnected(nodes))

    # wait for the election to complete with the first three nodes
    looper.runFor(10)

    # create the fourth and see that it learns who the primaries are
    # from the other nodes
    create(names[3])
    checkProtocolInstanceSetup(looper, nodes, timeout=10)
    stopNodes(nodes, looper)
def testElectionsAfterViewChange(delayedPerf, looper: Looper,
                                 nodeSet: TestNodeSet, up, wallet1, client1):
    """
    Test that a primary election does happen after a view change
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls
    # and view changes
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelay(10, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    # Ensure view change happened for both node and its primary elector
    for node in nodeSet:
        looper.run(
            eventually(partial(checkViewChangeInitiatedForNode, node, 1),
                       retryWait=1,
                       timeout=20))

    # Ensure elections are done again and pool is setup again with appropriate
    # protocol instances and each protocol instance is setup properly too
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
Пример #5
0
def test_no_propagated_future_view_change_while_view_change(
        txnPoolNodeSet, looper):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate view change in progress
    lagged_node.view_changer.view_change_in_progress = True
    old_view_no = checkViewNoForNodes([lagged_node])

    initial_vhdc = \
        lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   numInstances=2)
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # check that lagged node recived 3 Future VCD, but didn't start new view change
        assert len(other_nodes) + initial_vhdc ==\
               lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)
        assert old_view_no == checkViewNoForNodes([lagged_node])
def elections_done(case_6_setup, looper, txnPoolNodeSet):
    # Make sure elections are done successfully
    A, B, C, D = txnPoolNodeSet
    looper.run(checkNodesConnected(txnPoolNodeSet))

    inst_ids = (0, 1)

    def chk():
        # Check that each Primary is received by A before A has sent any
        # Primary
        primary_recv_times = {
            i: [entry.starttime for entry in A.elector.spylog.getAll(
                A.elector.processPrimary) if entry.params['prim'].instId == i]
            for i in inst_ids
        }
        primary_send_times = {
            i: [entry.starttime for entry in A.elector.spylog.getAll(
                A.elector.sendPrimary) if entry.params['instId'] == 0]
            for i in inst_ids
        }

        for i in inst_ids:
            assert primary_send_times[i][0] > max(primary_recv_times[i])

    looper.run(eventually(chk, retryWait=1, timeout=15))
    checkProtocolInstanceSetup(looper=looper, nodes=txnPoolNodeSet, retryWait=1)

    # Make sure no Nominations or Primary are received by A from B
    for i in inst_ids:
        assert B.replicas[i].name not in A.elector.nominations[i]
        assert B.replicas[i].name not in A.elector.primaryDeclarations[i]
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet,
                                             client1, wallet1, one_node_added,
                                             client1Connected, tdir,
                                             client_tdir, tdirWithPoolTxns,
                                             steward1, stewardWallet,
                                             allPluginsPath):
    """
    Send pool and domain ledger requests such that they interleave, and do
    view change in between and verify the pool is functional
    """
    new_node = one_node_added
    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # Send domain ledger requests but don't wait for replies
    requests = sendRandomRequests(wallet1, client1, 2)
    # Add another node by sending pool ledger request
    _, _, new_theta = nodeThetaAdded(looper,
                                     txnPoolNodeSet,
                                     tdir,
                                     client_tdir,
                                     tconf,
                                     steward1,
                                     stewardWallet,
                                     allPluginsPath,
                                     name='new_theta')

    # Send more domain ledger requests but don't wait for replies
    requests.extend(sendRandomRequests(wallet1, client1, 3))

    # Do view change without waiting for replies
    ensure_view_change(looper, nodes=txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    # Make sure all requests are completed
    waitForSufficientRepliesForRequests(looper, client1, requests=requests)

    ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)

    new_steward, new_steward_wallet = addNewSteward(looper, client_tdir,
                                                    steward1, stewardWallet,
                                                    'another_ste')

    # Send another pool ledger request (NODE) but don't wait for completion of
    # request
    next_node_name = 'next_node'
    r = sendAddNewNode(tdir, tconf, next_node_name, new_steward,
                       new_steward_wallet)
    node_req = r[0]

    # Send more domain ledger requests but don't wait for replies
    requests = [
        node_req, *sendRandomRequests(new_steward_wallet, new_steward, 5)
    ]

    # Make sure all requests are completed
    waitForSufficientRepliesForRequests(looper, new_steward, requests=requests)

    # Make sure pool is functional
    ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper,
                                                       mode):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[lagged_node_index]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate catchup by setting non-synced status
    lagged_node.mode = mode
    old_view_no = checkViewNoForNodes([lagged_node])

    check_future_vcd_count(lagged_node, 0)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   numInstances=2)
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        check_no_view_change(looper, lagged_node)
        assert old_view_no == checkViewNoForNodes([lagged_node])

        # emulate finishing of catchup by setting Participating status
        lagged_node.mode = Mode.participating

        # make sure that View Change happened on lagging node
        waitForViewChange(looper, [lagged_node],
                          expectedViewNo=old_view_no + 1,
                          customTimeout=10)
        ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
Пример #10
0
def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1,
                     stewardWallet, allPluginsPath):
    """
    Add 2 new nodes to trigger replica addition and primary election
    """
    for nodeName in ("Zeta", "Eta"):
        newStewardName = "testClientSteward" + randomString(3)
        newSteward, newStewardWallet, newNode = addNewStewardAndNode(
            looper, steward1, stewardWallet, newStewardName, nodeName,
            tdirWithPoolTxns, tconf, allPluginsPath)
        txnPoolNodeSet.append(newNode)
        looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=30))
        logger.debug("{} connected to the pool".format(newNode))
        looper.run(
            eventually(checkNodeLedgersForEquality,
                       newNode,
                       *txnPoolNodeSet[:-1],
                       retryWait=1,
                       timeout=7))

    f = getMaxFailures(len(txnPoolNodeSet))

    def checkFValue():
        for node in txnPoolNodeSet:
            assert node.f == f
            assert len(node.replicas) == (f + 1)

    looper.run(eventually(checkFValue, retryWait=1, timeout=5))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, timeout=5)
Пример #11
0
def testNodesConnectsWhenOneNodeIsLate(allPluginsPath, tdirAndLooper, nodeReg):
    tdir, looper = tdirAndLooper
    nodes = []
    names = list(nodeReg.keys())
    logger.debug("Node names: {}".format(names))

    def create(name):
        node = TestNode(name, nodeReg, basedirpath=tdir,
                        pluginPaths=allPluginsPath)
        looper.add(node)
        node.startKeySharing()
        nodes.append(node)

    for name in names[:3]:
        create(name)

    looper.run(checkNodesConnected(nodes))

    # wait for the election to complete with the first three nodes
    looper.runFor(10)

    # create the fourth and see that it learns who the primaries are
    # from the other nodes
    create(names[3])
    checkProtocolInstanceSetup(looper, nodes, timeout=10)
    stopNodes(nodes, looper)
def elections_done(case_6_setup, looper, keySharedNodes):
    # Make sure elections are done successfully
    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()
    looper.run(checkNodesConnected(nodeSet))

    inst_ids = (0, 1)

    def chk():
        # Check that each Primary is received by A before A has sent any
        # Primary
        primary_recv_times = {
            i: [entry.starttime for entry in A.elector.spylog.getAll(
                A.elector.processPrimary) if entry.params['prim'].instId == i]
            for i in inst_ids
        }
        primary_send_times = {
            i: [entry.starttime for entry in A.elector.spylog.getAll(
                A.elector.sendPrimary) if entry.params['instId'] == 0]
            for i in inst_ids
        }

        for i in inst_ids:
            assert primary_send_times[i][0] > max(primary_recv_times[i])

    looper.run(eventually(chk, retryWait=1, timeout=15))
    checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1)

    # Make sure no Nominations or Primary are received by A from B
    for i in inst_ids:
        assert B.replicas[i].name not in A.elector.nominations[i]
        assert B.replicas[i].name not in A.elector.primaryDeclarations[i]
def testPrimaryElectionWithTie(electTieFixture, looper, keySharedNodes):
    """
    Primary selection (Rainy Day)
    A, B, C, D, E
    A, B, C, D startup. E is lagging.
    A sees the minimum number of nodes, and then sends Nominate(A)
    At the same exact time, B sees the minimum number of nodes, and then sends out Nominate(B)
    A sees B sending Nominate(B), but it has already nominated itself, so it does nothing
    B sees A sending Nominate(A), but it has already nominated itself, so it does nothing
    C sees A sending Nominate(A), and sends Nominate(A)
    D sees B sending Nominate(B), and sends Nominate(B)
    There's a split. C and A think A is the primary, B and D think B is the primary
    All nodes can see that there is a split. Each sends out Reelection([A,B])

    A and B both see Reelection([A,B]) from themselves as well as the other 3 (the number from others should be at least f+1),

    1. they wait a random amount of time (between 0 and 2 seconds),
    2. they each send out a Nominate(self)

    Voting is repeated until we have a good election.
    """

    # TODO optimize the sending messages in batches, for example, we don't
    #     send messages more often than 400 milliseconds. Once those 400
    #     millis have passed, we send the several queued messages in one
    #     batch.

    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()

    checkPoolReady(looper, nodeSet.nodes.values())

    for node in nodeSet.nodes.values():
        for instId, replica in enumerate(node.elector.replicas):
            logger.debug("replica {} {} with votes {}".
                          format(replica.name, replica.instId,
                                 node.elector.nominations.get(instId, {})))

    logger.debug("Check nomination")
    # Checking whether Node A nominated itself
    looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=10))

    # Checking whether Node B nominated itself
    looper.run(eventually(checkNomination, B, B.name, retryWait=1, timeout=10))

    # Checking whether Node C nominated Node A
    looper.run(eventually(checkNomination, C, A.name, retryWait=1, timeout=10))

    # Checking whether Node D nominated Node D
    looper.run(eventually(checkNomination, D, B.name, retryWait=1, timeout=10))

    # No node should be primary
    for node in nodeSet.nodes.values():
        assert node.hasPrimary is False

    for node in nodeSet.nodes.values():
        node.resetDelays()

    checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1,
                               timeout=60)
Пример #14
0
def testPrimarySelectionAfterViewChange(  # noqa
        looper, txnPoolNodeSet, primaryReplicas, catchup_complete_count,
        view_change_done):
    """
    Test that primary replica of a protocol instance shifts to a new node after
    a view change.
    """
    # TODO: This test can fail due to view change.

    for n in txnPoolNodeSet:
        assert n.spylog.count(
            n.allLedgersCaughtUp) > catchup_complete_count[n.name]

    # Primary replicas before view change
    prBeforeVC = primaryReplicas

    # Primary replicas after view change
    instanceCount = getNoInstances(nodeCount)
    prAfterVC = [
        getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount)
    ]

    # Primary replicas have moved to the next node
    for br, ar in zip(prBeforeVC, prAfterVC):
        assert ar.node.rank - br.node.rank == 1

    check_rank_consistent_across_each_node(txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
Пример #15
0
def test_catchup_to_next_view_during_view_change_0_to_2(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward):
    '''
    1) Lagging node is not a primary for new views
    2) All nodes except the lagging one go to view=1
    3) All nodes except the lagging one order txns on view=1
    4) All nodes except the lagging one go to view=2
    5) All nodes except the lagging one order txns on view=2
    6) Lagging node gets InstanceChanges for view=1 and view=2 => it changes to view=2, and catches up till txns from view=2
    7) Make sure that the lagging node is up to date, and can participate in consensus
    '''
    lagging_node = txnPoolNodeSet[0]
    other_nodes = txnPoolNodeSet[1:]
    initial_view_no = checkViewNoForNodes(txnPoolNodeSet)
    initial_last_ordered = lagging_node.master_last_ordered_3PC

    with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=0),
                     delay_for_view(viewNo=1), delay_for_view(viewNo=2)):
        # view change to viewNo=1
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper,
                          other_nodes,
                          expectedViewNo=initial_view_no + 1)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   instances=range(3))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # order some txns
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, 5)

        # view change to viewNo=2
        trigger_view_change(txnPoolNodeSet)
        waitForViewChange(looper,
                          other_nodes,
                          expectedViewNo=initial_view_no + 2)
        checkProtocolInstanceSetup(looper=looper,
                                   nodes=other_nodes,
                                   instances=range(3))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # order some txns
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_steward, 5)

        assert initial_view_no == lagging_node.viewNo
        assert initial_last_ordered == lagging_node.master_last_ordered_3PC

    # make sure that the second View Change happened on the lagging node
    waitForViewChange(looper, [lagging_node],
                      expectedViewNo=initial_view_no + 2,
                      customTimeout=20)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

    # make sure that the pool is functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, client1,
                                          wallet1, client1Connected):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1,
                                        6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.is_catchup_needed,
                             compare_val_to=True))

    def caught_up_for_current_view_count():
        return len(
            getAllReturnVals(bad_node,
                             bad_node.caught_up_for_current_view,
                             compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = caught_up_for_current_view_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    # The bad_node caught up due to receiving sufficient ViewChangeDone
    # messages
    assert caught_up_for_current_view_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
Пример #17
0
def check_newly_added_nodes(looper, all_nodes, new_nodes):
    # New nodes should be give in the order they were added
    assert [n in all_nodes for n in new_nodes]
    check_rank_consistent_across_each_node(all_nodes)
    old_nodes = [node for node in all_nodes if node not in new_nodes]
    for new_node in new_nodes:
        assert all(new_node.rank > n.rank for n in old_nodes)
        old_nodes.append(new_node)
    checkProtocolInstanceSetup(looper, all_nodes, retryWait=1)
Пример #18
0
def check_newly_added_nodes(looper, all_nodes, new_nodes):
    # New nodes should be give in the order they were added
    assert [n in all_nodes for n in new_nodes]
    check_rank_consistent_across_each_node(all_nodes)
    old_nodes = [node for node in all_nodes if node not in new_nodes]
    for new_node in new_nodes:
        assert all(new_node.rank > n.rank for n in old_nodes)
        old_nodes.append(new_node)
    checkProtocolInstanceSetup(looper, all_nodes, retryWait=1)
def test_catchup_to_next_view_during_view_change_by_primary(
        txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward):
    '''
    1) Lagging node is a primary for view=1
    2) All nodes except the lagging one start a view change (to view=1)
    3) The nodes can not finish it on time since the Primary for view=1 is lagging
    4) All nodes except the lagging one go to view=2 then
    5) All nodes except the lagging one order txns on view=2
    6) Lagging node gets InstanceChanges for view=1 => it changes to view=2, and catches up till txns from view=2
    7) Lagging node gets InstanceChanges for view=2 => it changes to view=2
    8) Make sure that the lagging node is up to date, and can participate in consensus
    '''
    lagging_node = txnPoolNodeSet[1]
    other_nodes = list(set(txnPoolNodeSet) - {lagging_node})
    initial_view_no = checkViewNoForNodes(txnPoolNodeSet)
    initial_last_ordered = lagging_node.master_last_ordered_3PC

    with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=2)):
        with delay_rules(lagging_node.nodeIbStasher, delay_for_view(viewNo=0),
                         delay_for_view(viewNo=1)):
            # view change to viewNo=2 since a primary for viewNo=1 is a lagging node
            for n in txnPoolNodeSet:
                n.view_changer.on_master_degradation()
            waitForViewChange(looper,
                              other_nodes,
                              expectedViewNo=initial_view_no + 2,
                              customTimeout=30)
            checkProtocolInstanceSetup(looper=looper,
                                       nodes=other_nodes,
                                       instances=range(3))
            ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

            # order some txns
            sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_steward, 5)

            assert initial_view_no == lagging_node.viewNo
            assert initial_last_ordered == lagging_node.master_last_ordered_3PC
            assert len(lagging_node.master_replica._ordering_service.
                       requestQueues[DOMAIN_LEDGER_ID]) > 0

        # make sure that the first View Change happened on lagging node
        waitForViewChange(looper, [lagging_node],
                          expectedViewNo=initial_view_no + 1,
                          customTimeout=20)
        assert initial_view_no + 1 == lagging_node.viewNo

    # make sure that the second View Change happened on lagging node
    waitForViewChange(looper, [lagging_node],
                      expectedViewNo=initial_view_no + 2,
                      customTimeout=20)
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

    # make sure that the pool is functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward,
                               sdk_pool_handle)
def testPrimaryElectionWithAClearWinner(electContFixture, looper,
                                        keySharedNodes):
    """
    Primary selection (Sunny Day)
    A, B, C, D, E
    A, B, C, D startup. E is lagging.
    A sees the minimum number of nodes first, and then sends out a NOMINATE(A) message
    B, C, D all see the NOMINATE(A) message from A, and respond with NOMINATE(A) message to all other nodes

    A sees three other NOMINATE(A) votes (from B, C, D)
    A sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    B sees two more NOMINATE(A) votes (from C and D)
    B sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    C sees two more NOMINATE(A) votes (from B and D)
    C sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    D sees two more NOMINATE(A) votes (from B and C)
    D sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    A sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary

    B sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary

    C sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary

    D sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary
    """

    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()
    nodesBCD = [B, C, D]

    checkPoolReady(looper, nodeSet)

    # Checking whether one of the replicas of Node A nominated itself
    timeout = waits.expectedPoolNominationTimeout(len(nodeSet))
    looper.run(
        eventually(checkNomination, A, A.name, retryWait=1, timeout=timeout))

    timeout = waits.expectedPoolNominationTimeout(len(nodeSet))
    for n in nodesBCD:
        # Checking whether Node B, C and D nominated Node A
        looper.run(
            eventually(checkNomination,
                       n,
                       A.name,
                       retryWait=1,
                       timeout=timeout))

    checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1)
    assert A.hasPrimary
Пример #21
0
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client,
        one_node_added):
    """
    Delay 3PC messages to one node and view change messages to some others
    (including primary) so the node that does not receive enough 3PC messages is
    behind but learns of the view change quickly and starts catchup.
    Other nodes learn of the view change late and thus keep on processing
    requests
    """
    new_node = one_node_added
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node
    slow_node = nprs[-1]
    # nodes_slow_to_inst_chg = [primary_node] + nprs[:2]
    nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 100
    delay_ic = 5

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    delay_3pc_messages([slow_node], 0, delay_3pc)

    for n in nodes_slow_to_inst_chg:
        n.nodeIbStasher.delay(icDelay(delay_ic))

    def start_count():
        return sum([
            1 for e in slow_node.ledgerManager.spylog.getAll(
                slow_node.ledgerManager.startCatchUpProcess.__name__)
            if e.params['ledgerId'] == DOMAIN_LEDGER_ID
        ])

    s = start_count()
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client,
                                        10 * Max3PCBatchSize)

    ensure_view_change(looper,
                       nodes=txnPoolNodeSet,
                       exclude_from_check=nodes_slow_to_inst_chg)

    sdk_get_and_check_replies(looper, requests)

    waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1])

    e = start_count()
    assert e - s >= 2

    looper.run(eventually(checkViewNoForNodes, slow_node.viewNo))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        tconf,
        looper,
        txnPoolNodeSet,
        sdk_pool_handle,
        sdk_wallet_client,
        one_node_added):
    """
    Delay 3PC messages to one node and view change messages to some others
    (including primary) so the node that does not receive enough 3PC messages is
    behind but learns of the view change quickly and starts catchup.
    Other nodes learn of the view change late and thus keep on processing
    requests
    """
    new_node = one_node_added
    nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)]
    primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node
    slow_node = nprs[-1]
    # nodes_slow_to_inst_chg = [primary_node] + nprs[:2]
    nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node]
    delay_3pc = 100
    delay_ic = 5

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    delay_3pc_messages([slow_node], 0, delay_3pc)

    for n in nodes_slow_to_inst_chg:
        n.nodeIbStasher.delay(icDelay(delay_ic))

    def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll(
        slow_node.ledgerManager.startCatchUpProcess.__name__)
                                   if e.params['ledgerId'] == DOMAIN_LEDGER_ID])

    s = start_count()
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, 10 * Max3PCBatchSize)

    ensure_view_change(looper, nodes=txnPoolNodeSet,
                       exclude_from_check=nodes_slow_to_inst_chg)

    sdk_get_and_check_replies(looper, requests)

    waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1])

    e = start_count()
    assert e - s >= 2

    looper.run(eventually(checkViewNoForNodes, slow_node.viewNo))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)

    waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
Пример #23
0
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet,
                                          nodeSetWithNodeAddedAfterSomeTxns,
                                          newNodeCaughtUp, tdirWithPoolTxns,
                                          tconf, allPluginsPath):
    """
    Node discards 3-phase and election messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, nodeX, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    viewNo = nodeX.viewNo

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, 10)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 4)
    looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet,
                                  viewNo + 1), retryWait=1, timeout=20))

    newStewardName = "testClientSteward" + randomString(3)
    nodeName = "Theta"
    _, _, nodeTheta = addNewStewardAndNode(looper, client,
                                           wallet,
                                           newStewardName,
                                           nodeName,
                                           tdirWithPoolTxns, tconf,
                                           allPluginsPath)
    txnPoolNodeSet.append(nodeTheta)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    looper.run(client.ensureConnectedToNodes())
    looper.run(eventually(checkNodeLedgersForEquality, nodeTheta,
                          *txnPoolNodeSet[:-1], retryWait=1, timeout=5))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1,
                               timeout=10)
    electMsg = Nomination(nodeX.name, 0, viewNo)
    threePMsg = PrePrepare(
            0,
            viewNo,
            10,
            wallet.defaultId,
            wallet._getIdData().lastReqId+1,
            "random digest",
            time.time()
            )
    ridTheta = nodeX.nodestack.getRemote(nodeTheta.name).uid
    nodeX.send(electMsg, ridTheta)
    nodeX.send(threePMsg, ridTheta)
    nodeX.send(electMsg, ridTheta)
    looper.run(eventually(checkDiscardMsg, [nodeTheta, ], electMsg,
                          'un-acceptable viewNo', retryWait=1, timeout=5))
    nodeX.send(threePMsg, ridTheta)
    looper.run(eventually(checkDiscardMsg, [nodeTheta, ], threePMsg,
                          'un-acceptable viewNo', retryWait=1, timeout=5))
def test_view_change_without_primary(nodeSet, looper,
                                     patched_view_change_timeout):

    first, others = stop_nodes_and_remove_first(looper, nodeSet)

    start_and_connect_nodes(looper, others)

    timeout = waits.expectedPoolElectionTimeout(len(nodeSet)) + patched_view_change_timeout

    checkProtocolInstanceSetup(looper=looper, nodes=others, retryWait=1,
                               customTimeout=timeout,
                               numInstances=getRequiredInstances(len(nodeSet)))
Пример #25
0
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet,
                                          nodeSetWithNodeAddedAfterSomeTxns,
                                          newNodeCaughtUp, tdirWithPoolTxns,
                                          tconf, allPluginsPath):
    """
    Node discards 3-phase and election messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, nodeX, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    viewNo = nodeX.viewNo

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's performance falls and view changes
    delayNonPrimaries(txnPoolNodeSet, 0, 10)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 4)
    looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet,
                                  viewNo + 1), retryWait=1, timeout=20))

    newStewardName = "testClientSteward" + randomString(3)
    nodeName = "Theta"
    _, _, nodeTheta = addNewStewardAndNode(looper, client,
                                           wallet,
                                           newStewardName,
                                           nodeName,
                                           tdirWithPoolTxns, tconf,
                                           allPluginsPath)
    txnPoolNodeSet.append(nodeTheta)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    looper.run(client.ensureConnectedToNodes())
    looper.run(eventually(checkNodeLedgersForEquality, nodeTheta,
                          *txnPoolNodeSet[:-1], retryWait=1, timeout=5))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1,
                               timeout=10)
    electMsg = Nomination(nodeX.name, 0, viewNo)
    threePMsg = PrePrepare(
            0,
            viewNo,
            10,
            wallet.defaultId,
            wallet._getIdData().lastReqId+1,
            "random digest",
            time.time()
            )
    ridTheta = nodeX.nodestack.getRemote(nodeTheta.name).uid
    nodeX.send(electMsg, ridTheta)
    nodeX.send(threePMsg, ridTheta)
    nodeX.send(electMsg, ridTheta)
    looper.run(eventually(checkDiscardMsg, [nodeTheta, ], electMsg,
                          'un-acceptable viewNo', retryWait=1, timeout=5))
    nodeX.send(threePMsg, ridTheta)
    looper.run(eventually(checkDiscardMsg, [nodeTheta, ], threePMsg,
                          'un-acceptable viewNo', retryWait=1, timeout=5))
def test_view_change_without_primary(txnPoolNodeSet, looper,
                                     patched_view_change_timeout):
    first, others = stop_nodes_and_remove_first(looper, txnPoolNodeSet)

    start_and_connect_nodes(looper, others)

    timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + patched_view_change_timeout

    #looper.runFor(40)

    checkProtocolInstanceSetup(looper=looper, nodes=txnPoolNodeSet, retryWait=1,
                               customTimeout=timeout,
                               instances=range(getRequiredInstances(len(txnPoolNodeSet))))
def testPrimaryElectionWithAClearWinner(electContFixture, looper, keySharedNodes):
    """
    Primary selection (Sunny Day)
    A, B, C, D, E
    A, B, C, D startup. E is lagging.
    A sees the minimum number of nodes first, and then sends out a NOMINATE(A) message
    B, C, D all see the NOMINATE(A) message from A, and respond with NOMINATE(A) message to all other nodes

    A sees three other NOMINATE(A) votes (from B, C, D)
    A sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    B sees two more NOMINATE(A) votes (from C and D)
    B sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    C sees two more NOMINATE(A) votes (from B and D)
    C sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    D sees two more NOMINATE(A) votes (from B and C)
    D sees that A is the clear winner (2f+1 total), and sends PRIMARY(A) to all nodes

    A sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary

    B sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary

    C sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary

    D sees at least two other PRIMARY(A) votes (3 including it's own)
    selects A as primary
    """

    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()
    nodesBCD = [B, C, D]

    checkPoolReady(looper, nodeSet)

    # Checking whether one of the replicas of Node A nominated itself
    looper.run(eventually(checkNomination, A, A.name, retryWait=1, timeout=10))

    for n in nodesBCD:
        # Checking whether Node B, C and D nominated Node A
        looper.run(eventually(checkNomination, n, A.name, retryWait=1,
                              timeout=10))

    checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1,
                               timeout=10)
    assert A.hasPrimary
Пример #28
0
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet,
                                          nodeSetWithNodeAddedAfterSomeTxns,
                                          newNodeCaughtUp, tdirWithPoolTxns,
                                          tconf, allPluginsPath):
    """
    Node discards 3-phase or ViewChangeDone messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, nodeX, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    viewNo = nodeX.viewNo

    # Force two view changes: node discards msgs which have viewNo
    # at least two less than node's. Current protocol implementation
    # needs to hold messages from the previous view as well as
    # from the current view.
    for i in range(2):
        ensure_view_change(looper, txnPoolNodeSet)
        waitNodeDataEquality(looper, nodeX, *txnPoolNodeSet[:-1])
        checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sender = txnPoolNodeSet[0]
    rid_x_node = sender.nodestack.getRemote(nodeX.name).uid
    messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime()

    # 3 pc msg (PrePrepare) needs to be discarded
    primaryRepl = getPrimaryReplica(txnPoolNodeSet)
    three_pc = PrePrepare(
        0,
        viewNo,
        10,
        time.time(),
        [[wallet.defaultId,
          wallet._getIdData().lastReqId + 1]],
        1,
        "random digest",
        DOMAIN_LEDGER_ID,
        primaryRepl.stateRootHash(DOMAIN_LEDGER_ID),
        primaryRepl.txnRootHash(DOMAIN_LEDGER_ID),
    )
    sender.send(three_pc, rid_x_node)
    looper.run(
        eventually(checkDiscardMsg, [
            nodeX,
        ],
                   three_pc,
                   'un-acceptable viewNo',
                   retryWait=1,
                   timeout=messageTimeout))
def testPrimaryElectionContested(electContFixture, looper, txnPoolNodeSet):
    """
    Primary selection (Rainy Day)
    A, B, C, D, E
    A, B, C, D startup. E is lagging.
    A sees the minimum number of nodes, and then sends Nominate(A)
    At the same exact time, B sees the minimum number of nodes, and then sends out Nominate(B)
    A sees B sending Nominate(B), but it has already nominated itself, so it does nothing
    B sees A sending Nominate(A), but it has already nominated itself, so it does nothing
    C sees A sending Nominate(A), and sends Nominate(A)
    D sees A sending Nominate(A), and sends Nominate(A)
    All nodes see that B nominated B and A, C, and D all nominated A
    Because the votes for A exceeds the votes for B, all send out Primary(A)
    TODO's (see below)
    All see the others have sent Primary A, and then the nodes record who is the Primary.
    """

    A, B, C, D = txnPoolNodeSet

    checkPoolReady(looper, txnPoolNodeSet)

    logger.debug("Check nomination")
    timeout = waits.expectedPoolNominationTimeout(nodeCount)

    # Checking whether Node A nominated itself
    looper.run(
        eventually(checkNomination, A, A.name, retryWait=1, timeout=timeout))

    # Checking whether Node B nominated itself
    looper.run(
        eventually(checkNomination, B, B.name, retryWait=1, timeout=timeout))

    for n in [C, D]:
        # Checking whether Node C and Node D nominated Node A
        looper.run(
            eventually(checkNomination,
                       n,
                       A.name,
                       retryWait=1,
                       timeout=timeout))

    checkProtocolInstanceSetup(looper=looper,
                               nodes=txnPoolNodeSet,
                               retryWait=1)

    # Node D should not be primary
    assert not D.hasPrimary
    # A should have at least one primary
    assert A.hasPrimary
def testNodeDiscardMessageFromUnknownView(
        txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns,
        sdk_new_node_caught_up, allPluginsPath, wallet1):
    """
    Node discards 3-phase or ViewChangeDone messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns
    viewNo = new_node.viewNo

    # Force two view changes: node discards msgs which have viewNo
    # at least two less than node's. Current protocol implementation
    # needs to hold messages from the previous view as well as
    # from the current view.
    for i in range(2):
        ensure_view_change(looper, txnPoolNodeSet)
        waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
        checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sender = txnPoolNodeSet[0]
    rid_x_node = sender.nodestack.getRemote(new_node.name).uid
    messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime()

    # 3 pc msg (PrePrepare) needs to be discarded
    primaryRepl = getPrimaryReplica(txnPoolNodeSet)
    three_pc = PrePrepare(
        0,
        viewNo,
        10,
        get_utc_epoch(),
        [[wallet1.defaultId, Request.gen_req_id()]],
        1,
        "random digest",
        DOMAIN_LEDGER_ID,
        primaryRepl.stateRootHash(DOMAIN_LEDGER_ID),
        primaryRepl.txnRootHash(DOMAIN_LEDGER_ID),
    )
    sender.send(three_pc, rid_x_node)
    looper.run(
        eventually(checkDiscardMsg, [
            new_node,
        ],
                   three_pc,
                   'un-acceptable viewNo',
                   retryWait=1,
                   timeout=messageTimeout))
def test_view_change_without_primary(txnPoolNodeSet, looper, tconf):
    first, others = stop_nodes_and_remove_first(looper, txnPoolNodeSet)

    start_and_connect_nodes(looper, others)

    timeout = waits.expectedPoolElectionTimeout(
        len(txnPoolNodeSet)) + tconf.NEW_VIEW_TIMEOUT

    # looper.runFor(40)

    checkProtocolInstanceSetup(looper=looper,
                               nodes=txnPoolNodeSet,
                               retryWait=1,
                               customTimeout=timeout,
                               instances=range(
                                   getRequiredInstances(len(txnPoolNodeSet))))
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet,
                                          sdk_node_set_with_node_added_after_some_txns,
                                          sdk_new_node_caught_up,
                                          allPluginsPath, sdk_wallet_client):
    """
    Node discards 3-phase or ViewChangeDone messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns
    viewNo = new_node.viewNo

    # Force two view changes: node discards msgs which have viewNo
    # at least two less than node's. Current protocol implementation
    # needs to hold messages from the previous view as well as
    # from the current view.
    for i in range(2):
        ensure_view_change(looper, txnPoolNodeSet)
        waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
        checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    sender = txnPoolNodeSet[0]
    rid_x_node = sender.nodestack.getRemote(new_node.name).uid
    messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime()

    # 3 pc msg (PrePrepare) needs to be discarded
    _, did = sdk_wallet_client
    primaryRepl = getPrimaryReplica(txnPoolNodeSet)
    three_pc = PrePrepare(
        0,
        viewNo,
        10,
        get_utc_epoch(),
        ["random request digest"],
        init_discarded(),
        "random digest",
        DOMAIN_LEDGER_ID,
        primaryRepl.stateRootHash(DOMAIN_LEDGER_ID),
        primaryRepl.txnRootHash(DOMAIN_LEDGER_ID),
        0,
        True
    )
    sender.send(three_pc, rid_x_node)
    looper.run(eventually(checkDiscardMsg, [new_node, ], three_pc,
                          'un-acceptable viewNo',
                          retryWait=1, timeout=messageTimeout))
def testPrimarySelectionAfterPoolReady(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward):
    """
    Once the pool is ready(node has connected to at least 3 other nodes),
    appropriate primary replicas should be selected.
    """

    def checkPrimaryPlacement():
        # Node names sorted by rank
        sortedNodes = sorted(txnPoolNodeSet,
                             key=operator.attrgetter("rank"))

        for idx, node in enumerate(sortedNodes):
            # For instance 0, the primary replica should be on the node with
            # rank 0
            if idx == 0:
                Replica.generateName(sortedNodes[idx].name, 0)
                assert node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 1, the primary replica should be on the node with
            # rank 1
            if idx == 1:
                Replica.generateName(sortedNodes[idx].name, 1)
                assert not node.replicas[0].isPrimary
                assert node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 2, the primary replica should be on the node with
            # rank 2
            if idx == 2:
                Replica.generateName(sortedNodes[idx].name, 2)
                assert not node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert node.replicas[2].isPrimary

    check_rank_consistent_across_each_node(txnPoolNodeSet)
    # Check if the primary is on the correct node
    timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet))
    looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=timeout))
    # Check if every protocol instance has one and only one primary and any node
    #  has no more than one primary
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
def testPrimarySelectionAfterPoolReady(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward):
    """
    Once the pool is ready(node has connected to at least 3 other nodes),
    appropriate primary replicas should be selected.
    """

    def checkPrimaryPlacement():
        # Node names sorted by rank
        sortedNodes = sorted(txnPoolNodeSet,
                             key=operator.attrgetter("rank"))

        for idx, node in enumerate(sortedNodes):
            # For instance 0, the primary replica should be on the node with
            # rank 0
            if idx == 0:
                Replica.generateName(sortedNodes[idx].name, 0)
                assert node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 1, the primary replica should be on the node with
            # rank 1
            if idx == 1:
                Replica.generateName(sortedNodes[idx].name, 1)
                assert not node.replicas[0].isPrimary
                assert node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 2, the primary replica should be on the node with
            # rank 2
            if idx == 2:
                Replica.generateName(sortedNodes[idx].name, 2)
                assert not node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert node.replicas[2].isPrimary

    check_rank_consistent_across_each_node(txnPoolNodeSet)
    # Check if the primary is on the correct node
    timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet))
    looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=timeout))
    # Check if every protocol instance has one and only one primary and any node
    #  has no more than one primary
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
Пример #35
0
def testNodeDiscardMessageFromUnknownView(
        txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns,
        sdk_new_node_caught_up, allPluginsPath, sdk_wallet_client):
    """
    Node discards 3-phase or ViewChangeDone messages from view nos that it does not
    know of (view nos before it joined the pool)
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns
    viewNo = new_node.viewNo

    pp_seq_no = get_pp_seq_no(txnPoolNodeSet)
    # Force two view changes: node discards msgs which have viewNo
    # at least two less than node's. Current protocol implementation
    # needs to hold messages from the previous view as well as
    # from the current view.
    for i in range(1):
        ensure_view_change(looper, txnPoolNodeSet)
        waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
        checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
        pp_seq_no += 1

    sender = txnPoolNodeSet[1]
    rid_x_node = sender.nodestack.getRemote(new_node.name).uid
    messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime()

    # 3 pc msg (PrePrepare) needs to be discarded
    _, did = sdk_wallet_client
    primaryRepl = getPrimaryReplica(txnPoolNodeSet)
    inst_id = 0
    three_pc = create_pre_prepare_no_bls(
        primaryRepl.node.db_manager.get_state_root_hash(DOMAIN_LEDGER_ID),
        viewNo,
        pp_seq_no=pp_seq_no + 1,
        inst_id=inst_id)
    sender.send(three_pc, rid_x_node)
    looper.run(
        eventually(checkDiscardMsg, [
            new_node.replicas[inst_id].stasher,
        ],
                   three_pc,
                   OLD_VIEW,
                   retryWait=1,
                   timeout=messageTimeout))
def testPrimarySelectionAfterViewChange(looper, nodeSet, ready,
                                        primaryReplicas, viewChangeDone):
    """
    Test that primary replica of a protocol instance shifts to a new node after
    a view change.
    """

    # Primary replicas before view change
    prBeforeVC = primaryReplicas

    # Primary replicas after view change
    instanceCount = getNoInstances(nodeCount)
    prAfterVC = [getPrimaryReplica(nodeSet, i) for i in range(instanceCount)]

    # Primary replicas have moved to the next node
    for br, ar in zip(prBeforeVC, prAfterVC):
        assert ar.node.rank - br.node.rank == 1

    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=5)
Пример #37
0
def testPrimarySelectionAfterViewChange(looper, nodeSet, ready, primaryReplicas,
                                        viewChangeDone):
    """
    Test that primary replica of a protocol instance shifts to a new node after
    a view change.
    """

    # Primary replicas before view change
    prBeforeVC = primaryReplicas

    # Primary replicas after view change
    instanceCount = getNoInstances(nodeCount)
    prAfterVC = [getPrimaryReplica(nodeSet, i) for i in range(instanceCount)]

    # Primary replicas have moved to the next node
    for br, ar in zip(prBeforeVC, prAfterVC):
        assert ar.node.rank - br.node.rank == 1

    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=5)
def testPrimaryElectionContested(electContFixture, looper, txnPoolNodeSet):
    """
    Primary selection (Rainy Day)
    A, B, C, D, E
    A, B, C, D startup. E is lagging.
    A sees the minimum number of nodes, and then sends Nominate(A)
    At the same exact time, B sees the minimum number of nodes, and then sends out Nominate(B)
    A sees B sending Nominate(B), but it has already nominated itself, so it does nothing
    B sees A sending Nominate(A), but it has already nominated itself, so it does nothing
    C sees A sending Nominate(A), and sends Nominate(A)
    D sees A sending Nominate(A), and sends Nominate(A)
    All nodes see that B nominated B and A, C, and D all nominated A
    Because the votes for A exceeds the votes for B, all send out Primary(A)
    TODO's (see below)
    All see the others have sent Primary A, and then the nodes record who is the Primary.
    """

    A, B, C, D = txnPoolNodeSet

    checkPoolReady(looper, txnPoolNodeSet)

    logger.debug("Check nomination")
    timeout = waits.expectedPoolNominationTimeout(nodeCount)

    # Checking whether Node A nominated itself
    looper.run(eventually(checkNomination, A, A.name,
                          retryWait=1, timeout=timeout))

    # Checking whether Node B nominated itself
    looper.run(eventually(checkNomination, B, B.name,
                          retryWait=1, timeout=timeout))

    for n in [C, D]:
        # Checking whether Node C and Node D nominated Node A
        looper.run(eventually(checkNomination, n, A.name,
                              retryWait=1, timeout=timeout))

    checkProtocolInstanceSetup(looper=looper, nodes=txnPoolNodeSet, retryWait=1)

    # Node D should not be primary
    assert not D.hasPrimary
    # A should have at least one primary
    assert A.hasPrimary
Пример #39
0
def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1,
                     stewardWallet, allPluginsPath):
    """
    Add 2 new nodes to trigger replica addition and primary election
    """
    new_nodes = add_2_nodes(looper, txnPoolNodeSet, steward1, stewardWallet,
                            tdirWithPoolTxns, tconf, allPluginsPath)
    for n in new_nodes:
        logger.debug("{} connected to the pool".format(n))

    f = getMaxFailures(len(txnPoolNodeSet))

    def checkFValue():
        for node in txnPoolNodeSet:
            assert node.f == f
            assert len(node.replicas) == (f + 1)

    timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet))
    looper.run(eventually(checkFValue, retryWait=1, timeout=timeout))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
Пример #40
0
def testPrimarySelectionAfterPoolReady(looper, nodeSet, ready):
    """
    Once the pool is ready(node has connected to at least 3 other nodes),
    appropriate primary replicas should be selected.
    """

    def checkPrimaryPlacement():
        # Node names sorted by rank
        sortedNodeNames = sorted(nodeSet.nodes.values(),
                                 key=operator.attrgetter("rank"))

        for idx, node in enumerate(sortedNodeNames):
            # For instance 0, the primary replica should be on the node with rank 0
            if idx == 0:
                Replica.generateName(sortedNodeNames[idx], 0)
                assert node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 1, the primary replica should be on the node with rank 1
            if idx == 1:
                Replica.generateName(sortedNodeNames[idx], 1)
                assert not node.replicas[0].isPrimary
                assert node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 2, the primary replica should be on the node with rank 2
            if idx == 2:
                Replica.generateName(sortedNodeNames[idx], 2)
                assert not node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert node.replicas[2].isPrimary

    # Check if the primary is on the correct node
    looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=10))
    # Check if every protocol instance has one and only one primary and any node
    #  has no more than one primary
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=5)
def testElectionsAfterViewChange(delayedPerf, looper: Looper,
                                 nodeSet: TestNodeSet, up, wallet1, client1):
    """
    Test that a primary election does happen after a view change
    """

    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls
    # and view changes
    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelay(10, 0))

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4)

    # Ensure view change happened for both node and its primary elector
    for node in nodeSet:
        looper.run(eventually(partial(checkViewChangeInitiatedForNode, node, 1),
                              retryWait=1, timeout=20))

    # Ensure elections are done again and pool is setup again with appropriate
    # protocol instances and each protocol instance is setup properly too
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
def testAdd2NewNodes(looper, txnPoolNodeSet,
                     sdk_pool_handle, sdk_wallet_steward,
                     tdir, tconf, allPluginsPath):
    """
    Add 2 new nodes to trigger replica addition and primary election
    """
    new_nodes = sdk_add_2_nodes(looper, txnPoolNodeSet,
                                sdk_pool_handle, sdk_wallet_steward,
                                tdir, tconf, allPluginsPath)
    for n in new_nodes:
        logger.debug("{} connected to the pool".format(n))

    f = getMaxFailures(len(txnPoolNodeSet))

    def checkFValue():
        for node in txnPoolNodeSet:
            assert node.f == f
            assert len(node.replicas) == (f + 1)

    timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet))
    looper.run(eventually(checkFValue, retryWait=1, timeout=timeout))
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    sdk_pool_refresh(looper, sdk_pool_handle)
def testPrimarySelectionAfterPoolReady(looper, nodeSet, ready):
    """
    Once the pool is ready(node has connected to at least 3 other nodes),
    appropriate primary replicas should be selected.
    """
    def checkPrimaryPlacement():
        # Node names sorted by rank
        sortedNodeNames = sorted(nodeSet.nodes.values(),
                                 key=operator.attrgetter("rank"))

        for idx, node in enumerate(sortedNodeNames):
            # For instance 0, the primary replica should be on the node with rank 0
            if idx == 0:
                Replica.generateName(sortedNodeNames[idx], 0)
                assert node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 1, the primary replica should be on the node with rank 1
            if idx == 1:
                Replica.generateName(sortedNodeNames[idx], 1)
                assert not node.replicas[0].isPrimary
                assert node.replicas[1].isPrimary
                assert not node.replicas[2].isPrimary

            # For instance 2, the primary replica should be on the node with rank 2
            if idx == 2:
                Replica.generateName(sortedNodeNames[idx], 2)
                assert not node.replicas[0].isPrimary
                assert not node.replicas[1].isPrimary
                assert node.replicas[2].isPrimary

    # Check if the primary is on the correct node
    looper.run(eventually(checkPrimaryPlacement, retryWait=1, timeout=10))
    # Check if every protocol instance has one and only one primary and any node
    #  has no more than one primary
    checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=5)
def test_no_propagated_future_view_change_while_view_change(txnPoolNodeSet, looper):
    # the last node is a lagging one, which will receive ViewChangeDone messages for future view
    viewNo = checkViewNoForNodes(txnPoolNodeSet)
    lagged_node = txnPoolNodeSet[-1]
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    # emulate view change in progress
    lagged_node.view_changer.view_change_in_progress = True
    old_view_no = checkViewNoForNodes([lagged_node])

    initial_vhdc = \
        lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)

    # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change
    with delay_rules(lagged_node.nodeIbStasher, icDelay()):
        # make sure that View Change happened on all nodes but the lagging one
        ensure_view_change(looper, other_nodes)
        checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2))
        ensure_all_nodes_have_same_data(looper, nodes=other_nodes)

        # check that lagged node recived 3 Future VCD, but didn't start new view change
        assert len(other_nodes) + initial_vhdc ==\
               lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__)
        assert old_view_no == checkViewNoForNodes([lagged_node])
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet,
                                             sdk_one_node_added,
                                             tdir,
                                             tdirWithPoolTxns,
                                             allPluginsPath,
                                             sdk_pool_handle, sdk_wallet_client,
                                             sdk_wallet_steward):
    """
    Send pool and domain ledger requests such that they interleave, and do
    view change in between and verify the pool is functional
    """
    new_node = sdk_one_node_added
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    # Send domain ledger requests but don't wait for replies
    requests = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, 2)

    # Add another node by sending pool ledger request
    _, new_theta = sdk_node_theta_added(looper,
                                        txnPoolNodeSet,
                                        tdir,
                                        tconf,
                                        sdk_pool_handle,
                                        sdk_wallet_steward,
                                        allPluginsPath,
                                        name='new_theta')

    # Send more domain ledger requests but don't wait for replies
    requests.extend(sdk_send_random_requests(looper, sdk_pool_handle,
                                             sdk_wallet_client, 3))

    # Do view change without waiting for replies
    ensure_view_change(looper, nodes=txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)

    # Make sure all requests are completed
    total_timeout = sdk_eval_timeout(len(requests), len(txnPoolNodeSet))
    sdk_get_and_check_replies(looper, requests, timeout=total_timeout)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client, sdk_pool_handle)
    new_steward_wallet, steward_did = sdk_add_new_nym(looper,
                                                      sdk_pool_handle,
                                                      sdk_wallet_steward,
                                                      'another_ste',
                                                      role='STEWARD')

    # Send another pool ledger request (NODE) but don't wait for completion of
    # request
    next_node_name = 'next_node'

    sigseed, verkey, bls_key, nodeIp, nodePort, clientIp, clientPort, key_proof = \
        prepare_new_node_data(tconf, tdir, next_node_name)
    node_req = looper.loop.run_until_complete(
        prepare_node_request(steward_did,
                             new_node_name=next_node_name,
                             clientIp=clientIp,
                             clientPort=clientPort,
                             nodeIp=nodeIp,
                             nodePort=nodePort,
                             bls_key=bls_key,
                             sigseed=sigseed,
                             key_proof=key_proof))

    sdk_wallet = (new_steward_wallet, steward_did)
    request_couple = sdk_sign_and_send_prepared_request(looper, sdk_wallet,
                                                        sdk_pool_handle,
                                                        node_req)

    # Send more domain ledger requests but don't wait for replies
    request_couples = [request_couple, *
    sdk_send_random_requests(looper, sdk_pool_handle,
                             sdk_wallet_client, 5)]

    # Make sure all requests are completed
    total_timeout = sdk_eval_timeout(len(request_couples), len(txnPoolNodeSet))
    sdk_get_and_check_replies(looper, request_couples, timeout=total_timeout)

    # Make sure pool is functional
    sdk_ensure_pool_functional(looper, txnPoolNodeSet,
                               sdk_wallet_client, sdk_pool_handle)
Пример #46
0
def pool(looper, nodeSet):
    for n in nodeSet:  # type: TestNode
        n.startKeySharing()
    looper.run(checkNodesConnected(nodeSet))
    checkProtocolInstanceSetup(looper, nodeSet, timeout=5)
    return adict(looper=looper, nodeset=nodeSet)
Пример #47
0
def test_slow_nodes_catchup_before_selecting_primary_in_new_view(
        looper, txnPoolNodeSet, steward1, stewardWallet, tconf, slow_node):
    """
    Delay 3PC to 1 node and then cause view change so by the time the view
    change happens(each node gets >n-f `INSTANCE_CHANGE`s), the slow node is
    behind other nodes. The should initiate catchup to come to the same state
    as other nodes.
    """

    fast_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    delay = tconf.PerfCheckFreq

    # Bad network introduced
    slow_node.nodeIbStasher.delay(ppDelay(delay, 0))
    slow_node.nodeIbStasher.delay(pDelay(2 * delay, 0))
    slow_node.nodeIbStasher.delay(cDelay(3 * delay, 0))
    for i in range(2):
        sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1,
                                            20)
        waitNodeDataInequality(looper, slow_node, *fast_nodes)

    catchup_reply_counts = {
        n.name: n.ledgerManager.spylog.count(n.ledgerManager.processCatchupRep)
        for n in txnPoolNodeSet
    }
    catchup_done_counts = {
        n.name: n.spylog.count(n.allLedgersCaughtUp)
        for n in txnPoolNodeSet
    }

    def slow_node_processed_some():
        assert slow_node.master_replica.batches

    # The slow node has received some PRE-PREPAREs
    looper.run(eventually(slow_node_processed_some, retryWait=1,
                          timeout=delay))

    # No reverts have been called by the slow node
    rv = getAllReturnVals(slow_node.replicas[0],
                          TestReplica.revert_unordered_batches)
    assert not rv or max(rv) == 0

    # Delay reception of catchup replies so ViewChangeDone can be received
    # before catchup completes
    delay_catchup_reply = 2
    slow_node.nodeIbStasher.delay(cr_delay(delay_catchup_reply))

    ensure_view_change(looper, txnPoolNodeSet)
    # `slow_node` will not have elections done but others will.
    checkProtocolInstanceSetup(looper,
                               fast_nodes,
                               numInstances=len(slow_node.replicas),
                               retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    # `slow_node` does catchup, `fast_nodes` don't
    for n in txnPoolNodeSet:
        assert n.spylog.count(
            n.allLedgersCaughtUp) > catchup_done_counts[n.name]
        if n == slow_node:
            assert n.ledgerManager.spylog.count(
                n.ledgerManager.processCatchupRep) > catchup_reply_counts[
                    n.name]
        else:
            assert n.ledgerManager.spylog.count(
                n.ledgerManager.processCatchupRep) == catchup_reply_counts[
                    n.name]

    # Greater than 0 batches were reverted by the slow node
    assert max(
        getAllReturnVals(
            slow_node.master_replica,
            slow_node.master_replica.revert_unordered_batches)) > 0

    # Bad network repaired
    slow_node.reset_delays_and_process_delayeds()

    # Make sure pool is functional
    sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, steward1, 5)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    One of the node experiences poor network and loses 3PC messages. It has to
    do multiple rounds of catchup to be caught up
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)

    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    bad_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != bad_node]
    orig_method = bad_node.master_replica.dispatchThreePhaseMsg

    # Bad node does not process any 3 phase messages, equivalent to messages
    # being lost
    def bad_method(self, m, s):
        pass

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        bad_method, bad_node.master_replica)

    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    bad_node.nodeIbStasher.delay(lsDelay(1000))

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize)
    waitNodeDataInequality(looper, bad_node, *other_nodes)

    # Patch all nodes to return ConsistencyProof of a smaller ledger to the
    # bad node but only once, so that the bad_node needs to do catchup again.

    make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID,
                              Max3PCBatchSize)

    def is_catchup_needed_count():
        return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                    compare_val_to=True))

    def is_catchup_not_needed_count():
        return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed,
                                    compare_val_to=False))

    def has_ordered_till_last_prepared_certificate_count():
        return len(getAllReturnVals(bad_node,
                                    bad_node.has_ordered_till_last_prepared_certificate,
                                    compare_val_to=True))

    old_count_1 = is_catchup_needed_count()
    old_count_2 = has_ordered_till_last_prepared_certificate_count()
    old_count_3 = is_catchup_not_needed_count()
    ensure_view_change(looper, txnPoolNodeSet)
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)

    assert is_catchup_needed_count() > old_count_1
    assert is_catchup_not_needed_count() > old_count_3
    # The bad_node caught up due to ordering till last prepared certificate
    assert has_ordered_till_last_prepared_certificate_count() > old_count_2

    bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType(
        orig_method, bad_node.master_replica)
def testPrimaryElectionWithTie(electTieFixture, looper, keySharedNodes):
    """
    Primary selection (Rainy Day)
    A, B, C, D, E
    A, B, C, D startup. E is lagging.
    A sees the minimum number of nodes, and then sends Nominate(A)
    At the same exact time, B sees the minimum number of nodes, and then sends out Nominate(B)
    A sees B sending Nominate(B), but it has already nominated itself, so it does nothing
    B sees A sending Nominate(A), but it has already nominated itself, so it does nothing
    C sees A sending Nominate(A), and sends Nominate(A)
    D sees B sending Nominate(B), and sends Nominate(B)
    There's a split. C and A think A is the primary, B and D think B is the primary
    All nodes can see that there is a split. Each sends out Reelection([A,B])

    A and B both see Reelection([A,B]) from themselves as well as the other 3 (the number from others should be at least f+1),

    1. they wait a random amount of time (between 0 and 2 seconds),
    2. they each send out a Nominate(self)

    Voting is repeated until we have a good election.
    """

    # TODO optimize the sending messages in batches, for example, we don't
    #     send messages more often than 400 milliseconds. Once those 400
    #     millis have passed, we send the several queued messages in one
    #     batch.

    nodeSet = keySharedNodes
    A, B, C, D = nodeSet.nodes.values()

    checkPoolReady(looper, nodeSet.nodes.values())

    for node in nodeSet.nodes.values():
        for instId, replica in enumerate(node.elector.replicas):
            logger.debug("replica {} {} with votes {}".format(
                replica.name, replica.instId,
                node.elector.nominations.get(instId, {})))

    nominationTimeout = waits.expectedPoolNominationTimeout(len(nodeSet))
    logger.debug("Check nomination")
    # Checking whether Node A nominated itself
    looper.run(
        eventually(checkNomination,
                   A,
                   A.name,
                   retryWait=1,
                   timeout=nominationTimeout))

    # Checking whether Node B nominated itself
    looper.run(
        eventually(checkNomination,
                   B,
                   B.name,
                   retryWait=1,
                   timeout=nominationTimeout))

    # Checking whether Node C nominated Node A
    looper.run(
        eventually(checkNomination,
                   C,
                   A.name,
                   retryWait=1,
                   timeout=nominationTimeout))

    # Checking whether Node D nominated Node D
    looper.run(
        eventually(checkNomination,
                   D,
                   B.name,
                   retryWait=1,
                   timeout=nominationTimeout))

    # No node should be primary
    for node in nodeSet.nodes.values():
        assert node.hasPrimary is False

    for node in nodeSet.nodes.values():
        node.resetDelays()

    checkProtocolInstanceSetup(looper=looper, nodes=nodeSet, retryWait=1)
def test_slow_node_reverts_unordered_state_during_catchup(looper,
                                                          txnPoolNodeSet,
                                                          sdk_pool_handle,
                                                          sdk_wallet_client):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 25

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))
    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    slow_node.nodeIbStasher.delay(lsDelay(1000))

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                             delay_batches * Max3PCBatchSize)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    # start view change (and hence catchup)
    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        lst_3pc = check_last_ordered_3pc(n1, n2)

    def chk1():
        # `slow_node` has prepared all 3PC messages which
        # `other_nodes` have ordered
        assertEquality(slow_master_replica.last_prepared_before_view_change, lst_3pc)

    looper.run(eventually(chk1, retryWait=1))

    old_pc_count = slow_master_replica.spylog.count(
        slow_master_replica.can_process_since_view_change_in_progress)

    assert len(slow_node.stashedOrderedReqs) == 0

    # Repair the network so COMMITs are received, processed and stashed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica.spylog.count(
            slow_master_replica.can_process_since_view_change_in_progress) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # COMMITs are stashed
        assert len(slow_node.stashedOrderedReqs) == delay_batches * Max3PCBatchSize

    looper.run(eventually(chk3, retryWait=1, timeout=15))

    # fix catchup, so the node gets a chance to be caught-up
    repair_node_catchup_less(other_nodes)

    def chk4():
        # Some COMMITs were ordered but stashed and they were processed
        rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs)
        assert delay_batches in rv

    looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 5))

    def chk5():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(
            chk5,
            retryWait=1,
            timeout=waits.expectedPoolCatchupTime(
                len(txnPoolNodeSet))))

    # make sure that the pool is functional
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
Пример #51
0
def pool(looper, nodeSet):
    for n in nodeSet:  # type: TestNode
        n.startKeySharing()
    looper.run(checkNodesConnected(nodeSet))
    checkProtocolInstanceSetup(looper, nodeSet, timeout=5)
    return adict(looper=looper, nodeset=nodeSet)
def test_slow_node_reverts_unordered_state_during_catchup(
        looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Delay COMMITs to a node such that when it needs to catchup, it needs to
    revert some unordered state. Also till this time the node should have
    receive all COMMITs such that it will apply some of the COMMITs (
    for which it has not received txns from catchup).
    For this delay COMMITs by long, do catchup for a little older than the
    state received in LedgerStatus, once catchup completes, reset delays and
    try to process delayed COMMITs, some COMMITs will be rejected but some will
    be processed since catchup was done for older ledger.
    """
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 3 * Max3PCBatchSize)
    nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    slow_node = nprs[-1].node
    other_nodes = [n for n in txnPoolNodeSet if n != slow_node]
    slow_master_replica = slow_node.master_replica

    commit_delay = 150
    catchup_rep_delay = 25

    # Delay COMMITs to one node
    slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0))
    # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the
    # node catch-ups 2 times.
    # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all
    # missing txns, so no stashed ones are applied
    slow_node.nodeIbStasher.delay(lsDelay(1000))

    # Make the slow node receive txns for a smaller ledger so it still finds
    # the need to catchup
    delay_batches = 2
    make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID,
                             delay_batches * Max3PCBatchSize)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 6 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, other_nodes)
    waitNodeDataInequality(looper, slow_node, *other_nodes)

    old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp)

    # `slow_node` is slow to receive CatchupRep, so that it
    # gets a chance to order COMMITs
    slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay))

    old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc

    # start view change (and hence catchup)
    ensure_view_change(looper, txnPoolNodeSet)

    # Check last ordered of `other_nodes` is same
    for n1, n2 in combinations(other_nodes, 2):
        check_last_ordered_3pc(n1, n2)

    assert slow_master_replica.last_prepared_before_view_change == old_last_ordered

    old_pc_count = slow_master_replica._ordering_service.spylog.count(
        slow_master_replica._ordering_service._validate)

    assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0

    # Repair the network so COMMITs are received, processed and stashed
    slow_node.reset_delays_and_process_delayeds(COMMIT)

    def chk2():
        # COMMITs are processed for prepared messages
        assert slow_master_replica._ordering_service.spylog.count(
            slow_master_replica._ordering_service._validate) > old_pc_count

    looper.run(eventually(chk2, retryWait=1, timeout=5))

    def chk3():
        # (delay_batches * Max3PCBatchSize * commits_count_in_phase) COMMITs are stashed
        assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == \
               delay_batches * Max3PCBatchSize * (len(txnPoolNodeSet) - 1)

    looper.run(eventually(chk3, retryWait=1, timeout=15))

    # fix catchup, so the node gets a chance to be caught-up
    repair_node_catchup_less(other_nodes)

    def chk4():
        # Some COMMITs were received but stashed and
        # they will processed after catchup
        assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0

    looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 50))

    def chk5():
        # Catchup was done once
        assert slow_node.spylog.count(
            slow_node.allLedgersCaughtUp) > old_lcu_count

    looper.run(
        eventually(chk5,
                   retryWait=1,
                   timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet))))

    # make sure that the pool is functional
    checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 2 * Max3PCBatchSize)
    ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)