def test_view_change_on_empty_ledger(txnPoolNodeSet, looper): """ Check that view change is done when no txns in the ldegr """ ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_restarted_node_complete_vc_by_current_state(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath): node_to_restart = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart, stopNode=True) looper.removeProdable(node_to_restart) old_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) ensure_view_change(looper, txnPoolNodeSet[:-1]) ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT) current_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) assert current_completed_view_no > old_completed_view_no # Delay VIEW_CHANGE_DONE messages for all nodes for node in txnPoolNodeSet[:-1]: node.nodeIbStasher.delay(vcd_delay(1000)) ensure_view_change(looper, txnPoolNodeSet[:-1]) # Start stopped node until other nodes do view_change node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) node_to_restart.nodeIbStasher.delay(vcd_delay(1000)) # check, that restarted node use last completed view no from pool, instead of proposed looper.run( eventually(complete_propagate_primary, node_to_restart, current_completed_view_no, timeout=tconf.VIEW_CHANGE_TIMEOUT))
def testPrimarySelectionAfterViewChange( # noqa looper, txnPoolNodeSet, primaryReplicas, catchup_complete_count): """ Test that primary replica of a protocol instance shifts to a new node after a view change. """ # TODO: This test can fail due to view change. ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) for n in txnPoolNodeSet: assert n.spylog.count( n.allLedgersCaughtUp) > catchup_complete_count[n.name] # Primary replicas before view change prBeforeVC = primaryReplicas # Primary replicas after view change instanceCount = getNoInstances(nodeCount) prAfterVC = [getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount)] # Primary replicas have moved to the next node for br, ar in zip(prBeforeVC, prAfterVC): assert ar.node.rank - br.node.rank == 1 check_rank_consistent_across_each_node(txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
def test_different_ledger_request_interleave(tconf, looper, txnPoolNodeSet, client1, wallet1, one_node_added, client1Connected, tdir, client_tdir, tdirWithPoolTxns, steward1, stewardWallet, allPluginsPath): """ Send pool and domain ledger requests such that they interleave, and do view change in between and verify the pool is functional """ new_node = one_node_added sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Send domain ledger requests but don't wait for replies requests = sendRandomRequests(wallet1, client1, 2) # Add another node by sending pool ledger request _, _, new_theta = nodeThetaAdded(looper, txnPoolNodeSet, tdir, client_tdir, tconf, steward1, stewardWallet, allPluginsPath, name='new_theta') # Send more domain ledger requests but don't wait for replies requests.extend(sendRandomRequests(wallet1, client1, 3)) # Do view change without waiting for replies ensure_view_change(looper, nodes=txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) # Make sure all requests are completed waitForSufficientRepliesForRequests(looper, client1, requests=requests) ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) new_steward, new_steward_wallet = addNewSteward(looper, client_tdir, steward1, stewardWallet, 'another_ste') # Send another pool ledger request (NODE) but don't wait for completion of # request next_node_name = 'next_node' r = sendAddNewNode(tdir, tconf, next_node_name, new_steward, new_steward_wallet) node_req = r[0] # Send more domain ledger requests but don't wait for replies requests = [ node_req, *sendRandomRequests(new_steward_wallet, new_steward, 5) ] # Make sure all requests are completed waitForSufficientRepliesForRequests(looper, new_steward, requests=requests) # Make sure pool is functional ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1)
def test_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) primary_on_backup = txnPoolNodeSet[2] assert primary_on_backup.replicas._replicas[1].isPrimary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_on_backup, stopNode=True) looper.removeProdable(primary_on_backup) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, LOG_SIZE) restarted_node = start_stopped_node(primary_on_backup, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[2] = restarted_node ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.NEW_VIEW_TIMEOUT) # Gamma catchup 1 txn assert restarted_node.replicas._replicas[1].isPrimary assert restarted_node.replicas._replicas[1].h == 1 assert restarted_node.replicas._replicas[1].H == LOG_SIZE + 1
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet) lagged_node = txnPoolNodeSet[lagged_node_index] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate catchup by setting non-synced status lagged_node.mode = mode old_view_no = checkViewNoForNodes([lagged_node]) check_future_vcd_count(lagged_node, 0) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) check_no_view_change(looper, lagged_node) assert old_view_no == checkViewNoForNodes([lagged_node]) # emulate finishing of catchup by setting Participating status lagged_node.mode = Mode.participating # make sure that View Change happened on lagging node waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1, customTimeout=10) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_resend_inst_ch_in_progress_v_ch(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): old_view = viewNoForNodes(txnPoolNodeSet) # disconnect two nodes. One of them should be next master primary in case of view change. for node in [txnPoolNodeSet[1], txnPoolNodeSet[-1]]: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node, stopNode=True) looper.removeProdable(node) txnPoolNodeSet.remove(node) # delay I_CH on every node except last one and initiate view change stashers = [n.nodeIbStasher for n in txnPoolNodeSet[:-1]] with delay_rules_without_processing(stashers, icDelay(viewNo=2)): ensure_view_change(looper, txnPoolNodeSet) looper.runFor(tconf.NEW_VIEW_TIMEOUT + 1) # checks def checks(): assert all(not node.view_change_in_progress for node in txnPoolNodeSet) assert all(node.viewNo == old_view + 2 for node in txnPoolNodeSet) looper.run( eventually(checks, timeout=tconf.NEW_VIEW_TIMEOUT * 2.5, retryWait=1)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def all_nodes_view_change(looper, txnPoolNodeSet, stewardWallet, steward1, client1, wallet1, client1Connected): for _ in range(5): send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_nodes_removes_request_keys_for_ordered(setup, looper, txnPoolNodeSet, client1, wallet1): """ A node does not order requests since it is missing some 3PC messages, gets them from catchup. It then clears them from its request queues """ slow_node, fast_nodes = setup reqs = send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 10, 5) ensure_all_nodes_have_same_data(looper, fast_nodes) assert slow_node.master_replica.last_ordered_3pc != \ fast_nodes[0].master_replica.last_ordered_3pc def chk(key, nodes, present): for node in nodes: assert (key in node.master_replica.requestQueues[DOMAIN_LEDGER_ID] ) == present for req in reqs: chk(req.key, fast_nodes, False) chk(req.key, [slow_node], True) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) for req in reqs: chk(req.key, txnPoolNodeSet, False)
def testPrimarySelectionAfterViewChange( # noqa looper, txnPoolNodeSet, primaryReplicas, catchup_complete_count): """ Test that primary replica of a protocol instance shifts to a new node after a view change. """ # TODO: This test can fail due to view change. ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) for n in txnPoolNodeSet: assert n.spylog.count( n.allLedgersCaughtUp) > catchup_complete_count[n.name] # Primary replicas before view change prBeforeVC = primaryReplicas # Primary replicas after view change instanceCount = getNoInstances(nodeCount) prAfterVC = [ getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount) ] # Primary replicas have moved to the next node for br, ar in zip(prBeforeVC, prAfterVC): assert ar.node.rank - br.node.rank == 1 check_rank_consistent_across_each_node(txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
def test_not_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) primary_on_backup = txnPoolNodeSet[2] assert primary_on_backup.replicas._replicas[1].isPrimary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_on_backup, stopNode=True) looper.removeProdable(primary_on_backup) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, LOG_SIZE) restarted_node = start_stopped_node(primary_on_backup, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[2] = restarted_node ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT) assert restarted_node.replicas._replicas[1].isPrimary assert restarted_node.replicas._replicas[1].h == 0 assert restarted_node.replicas._replicas[1].H == LOG_SIZE
def testQueueingReqFromFutureView(delayed_perf_chk, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view. - Delay reception and processing of view change messages by a non primary for master instance => it starts receiving 3 phase commit messages for next view """ lagging_node = get_last_master_non_primary_node(txnPoolNodeSet) old_view_no = lagging_node.viewNo # Delay processing of InstanceChange and ViewChangeDone so node stashes # 3PC messages delay_ic = 60 lagging_node.nodeIbStasher.delay(icDelay(delay_ic)) lagging_node.nodeIbStasher.delay(vcd_delay(delay_ic)) logger.debug('{} will delay its view change'.format(lagging_node)) def chk_fut_view(view_no, is_empty): length = len(lagging_node.msgsForFutureViews.get(view_no, ())) if is_empty: assert length == 0 else: assert length > 0 return length # No messages queued for future view chk_fut_view(old_view_no + 1, is_empty=True) logger.debug( '{} does not have any messages for future views'.format(lagging_node)) # Every node except Node A should do a view change ensure_view_change(looper, [n for n in txnPoolNodeSet if n != lagging_node], [lagging_node]) # send more requests that will be queued for the lagged node reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) l = looper.run( eventually(chk_fut_view, old_view_no + 1, False, retryWait=1)) logger.debug('{} has {} messages for future views'.format(lagging_node, l)) sdk_get_replies(looper, reqs) # reset delays for the lagging_node node so that it finally makes view # change lagging_node.reset_delays_and_process_delayeds() # Eventually no messages queued for future view looper.run( eventually(chk_fut_view, old_view_no + 1, True, retryWait=1, timeout=delay_ic + 10)) logger.debug( '{} exhausted pending messages for future views'.format(lagging_node)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2)
def test_missing_pp_before_starting_vc(tconf, txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward): ''' - all nodes delay PrePrepares for viewNo=1 with ppSeqNo<4 - all nodes go to view=1 - 10 batches are sent (they will not be ordered because of missing (delayed) PrePrepares - all nodes start a view change to view=2 - So all nodes finish view change successfully and can continue ordering ''' all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] # 1. delay PrePrepares with ppSeqNo<4 with delay_rules(all_stashers, delay_3pc(view_no=1, before=4, msgs=PrePrepare)): # 2. do view change for view=1 ensure_view_change(looper, txnPoolNodeSet) looper.run(eventually(check_not_in_view_change, txnPoolNodeSet)) # 3. send requests sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_steward, 10) # 4. do view change for view=2 ensure_view_change(looper, txnPoolNodeSet) # 5. ensure everything is fine ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_revert_during_view_change_all_nodes_set_fees( tconf, nodeSetWithIntegratedTokenPlugin, fees_set, helpers, looper): """ Check that SET_FEES transaction will be written after view change when PREPARE quorum for it is reached """ nodes = nodeSetWithIntegratedTokenPlugin node_set = [n.nodeIbStasher for n in nodeSetWithIntegratedTokenPlugin] _old_pp_seq_no = get_ppseqno_from_all_nodes( nodeSetWithIntegratedTokenPlugin) helpers.general.set_fees_without_waiting({ATTRIB_FEES_ALIAS: 3}) assert _old_pp_seq_no == get_ppseqno_from_all_nodes( nodeSetWithIntegratedTokenPlugin) with delay_rules(node_set, cDelay()): # should be changed for auth rule helpers.general.set_fees_without_waiting({ATTRIB_FEES_ALIAS: 4}) looper.run( eventually( functools.partial(check_batch_ordered, _old_pp_seq_no, nodeSetWithIntegratedTokenPlugin))) ensure_view_change(looper, nodes) ensureElectionsDone(looper=looper, nodes=nodes) ensure_all_nodes_have_same_data(looper, nodes) for n in nodes: looper.run(eventually(lambda: assertExp(n.mode == Mode.participating))) for n in nodes: looper.run(eventually(check_state, n, True, retryWait=0.2, timeout=15)) fees = helpers.general.do_get_fees() assert fees[FEES][ATTRIB_FEES_ALIAS] == 4
def test_no_propagated_future_view_change_until_synced(txnPoolNodeSet, looper, mode): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node_index = (viewNo + 3) % len(txnPoolNodeSet) lagged_node = txnPoolNodeSet[lagged_node_index] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate catchup by setting non-synced status lagged_node.mode = mode old_view_no = checkViewNoForNodes([lagged_node]) check_future_vcd_count(lagged_node, 0) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, instances=range(2)) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) check_no_view_change(looper, lagged_node) assert old_view_no == checkViewNoForNodes([lagged_node]) # emulate finishing of catchup by setting Participating status lagged_node.mode = Mode.participating # make sure that View Change happened on lagging node waitForViewChange(looper, [lagged_node], expectedViewNo=old_view_no + 1, customTimeout=10) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary after next view change) are already ordered transaction on master and slow_nodes are not. Check ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) master_instance = txnPoolNodeSet[0].master_replica.instId slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, cDelay()): # send one request requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, master_instance, (old_view_no, old_last_ordered[1] + 1))) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) replies = sdk_get_replies(looper, requests) for reply in replies: sdk_check_reply(reply) check_last_ordered(slow_nodes, master_instance, (old_view_no, last_ordered_for_slow[1] + 1)) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_nodes_removes_request_keys_for_ordered(setup, looper, txnPoolNodeSet, client1, wallet1): """ A node does not order requests since it is missing some 3PC messages, gets them from catchup. It then clears them from its request queues """ slow_node, fast_nodes = setup reqs = send_reqs_batches_and_get_suff_replies( looper, wallet1, client1, 10, 5) ensure_all_nodes_have_same_data(looper, fast_nodes) assert slow_node.master_replica.last_ordered_3pc != \ fast_nodes[0].master_replica.last_ordered_3pc def chk(key, nodes, present): for node in nodes: assert ( key in node.master_replica.requestQueues[DOMAIN_LEDGER_ID]) == present for req in reqs: chk(req.key, fast_nodes, False) chk(req.key, [slow_node], True) # Reset catchup reply delay so that catchup can complete slow_node.nodeIbStasher.reset_delays_and_process_delayeds(CatchupRep.typename) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) for req in reqs: chk(req.key, txnPoolNodeSet, False) # Needed for the next run due to the parametrised fixture slow_node.reset_delays_and_process_delayeds()
def test_no_propagated_future_view_change_while_view_change( txnPoolNodeSet, looper): # the last node is a lagging one, which will receive ViewChangeDone messages for future view viewNo = checkViewNoForNodes(txnPoolNodeSet) lagged_node = txnPoolNodeSet[-1] other_nodes = list(set(txnPoolNodeSet) - {lagged_node}) # emulate view change in progress lagged_node.view_changer.view_change_in_progress = True old_view_no = checkViewNoForNodes([lagged_node]) initial_vhdc = \ lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__) # delay INSTANCE CHANGE on lagged nodes, so all nodes except the lagging one finish View Change with delay_rules(lagged_node.nodeIbStasher, icDelay()): # make sure that View Change happened on all nodes but the lagging one ensure_view_change(looper, other_nodes) checkProtocolInstanceSetup(looper=looper, nodes=other_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=other_nodes) # check that lagged node recived 3 Future VCD, but didn't start new view change assert len(other_nodes) + initial_vhdc ==\ lagged_node.view_changer.spylog.count(lagged_node.view_changer.process_future_view_vchd_msg.__name__) assert old_view_no == checkViewNoForNodes([lagged_node])
def test_revert_works_for_fees_after_view_change(looper, helpers, nodeSetWithIntegratedTokenPlugin, sdk_pool_handle, fees_set, mint_tokens, addresses, fees): node_set = nodeSetWithIntegratedTokenPlugin current_amount = get_amount_from_token_txn(mint_tokens) seq_no = get_seq_no(mint_tokens) reverted_node = nodeSetWithIntegratedTokenPlugin[-1] current_amount, seq_no, _ = send_and_check_nym_with_fees(helpers, fees_set, seq_no, looper, addresses, current_amount) current_amount, seq_no, _ = send_and_check_transfer(helpers, addresses, fees, looper, current_amount, seq_no) with delay_rules_without_processing(reverted_node.nodeIbStasher, delay_3pc(view_no=0, msgs=Commit)): len_batches_before = len(reverted_node.master_replica._ordering_service.batches) current_amount, seq_no, _ = send_and_check_transfer(helpers, addresses, fees, looper, current_amount, seq_no) current_amount, seq_no, _ = send_and_check_nym_with_fees(helpers, fees_set, seq_no, looper, addresses, current_amount) looper.runFor(waits.expectedPrePrepareTime(len(nodeSetWithIntegratedTokenPlugin))) len_batches_after = len(reverted_node.master_replica._ordering_service.batches) """ Checks, that we have a 2 new batches """ assert len_batches_after - len_batches_before == 2 for n in node_set: n.view_changer.on_master_degradation() ensure_view_change(looper, nodeSetWithIntegratedTokenPlugin) looper.run(eventually(lambda: assertExp(reverted_node.mode == Mode.participating))) ensure_all_nodes_have_same_data(looper, node_set) send_and_check_nym_with_fees(helpers, fees_set, seq_no, looper, addresses, current_amount) ensure_all_nodes_have_same_data(looper, node_set)
def test_new_primary_lagging_behind(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf): initial_view_no = checkViewNoForNodes(txnPoolNodeSet) next_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 1) next_primary = [n for n in txnPoolNodeSet if n.name == next_primary_name][0] other_nodes = [n for n in txnPoolNodeSet if n != next_primary] expected_primary_name = get_next_primary_name(txnPoolNodeSet, initial_view_no + 2) # Next primary cannot stabilize 1 checkpoint with delay_rules(next_primary.nodeIbStasher, cDelay(), pDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_view_change(looper, txnPoolNodeSet) looper.run( eventually(check_not_in_view_change, txnPoolNodeSet, timeout=2 * tconf.NEW_VIEW_TIMEOUT)) ensureElectionsDone(looper=looper, nodes=other_nodes, customTimeout=2 * tconf.NEW_VIEW_TIMEOUT, instances_list=[0, 1]) assert next_primary_name != expected_primary_name assert checkViewNoForNodes(txnPoolNodeSet) == initial_view_no + 2 # send CHK_FREQ reqs so that slow node will start catch-up sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, CHK_FREQ) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=30)
def test_vc_by_current_state(txnPoolNodeSet, looper, tdir, tconf, allPluginsPath): node_to_stop = txnPoolNodeSet[-1] old_view_no = node_to_stop.view_changer.last_completed_view_no disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_stop, stopNode=True) looper.removeProdable(node_to_stop) ensure_view_change(looper, txnPoolNodeSet[:-1]) ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT) new_view_no = txnPoolNodeSet[0].view_changer.last_completed_view_no assert new_view_no > old_view_no node_to_stop = start_stopped_node(node_to_stop, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_stop ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT) assert node_to_stop.view_changer.last_completed_view_no == new_view_no
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, client1, wallet1, client1Connected): """ A new node joins the pool and is able to function properly without """ _, new_node, _, _, _, _ = nodeSetWithNodeAddedAfterSomeTxns old_susp_count = get_timestamp_suspicion_count(new_node) # Don't wait for node to catchup, start sending requests sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 10) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # No suspicions were raised by new_node assert get_timestamp_suspicion_count(new_node) == old_susp_count # All nodes should reply send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize * 3) # No suspicions were raised by new_node assert get_timestamp_suspicion_count(new_node) == old_susp_count suspicions = { node.name: get_timestamp_suspicion_count(node) for node in txnPoolNodeSet } ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, Max3PCBatchSize * 3) for node in txnPoolNodeSet: assert suspicions[node.name] == get_timestamp_suspicion_count(node)
def test_backup_can_order_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): # We expect that after VC Gamma will be primary on backup delayed_node = txnPoolNodeSet[-2] with delay_rules_without_processing(delayed_node.nodeIbStasher, pDelay(instId=MASTER_REPLICA_INDEX), cDelay(instId=MASTER_REPLICA_INDEX), ppDelay(instId=MASTER_REPLICA_INDEX)): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], old_view_pp_request_delay()): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) assert delayed_node.replicas._replicas[BACKUP_INST_ID].isPrimary # Check, that backup cannot order sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) for n in txnPoolNodeSet: assert n.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[ 1] == 0 # Forcing catchup delayed_node.start_catchup() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Check, that backup can order after catchup b_pp_seq_no_before = delayed_node.replicas._replicas[ BACKUP_INST_ID].last_ordered_3pc[1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) assert delayed_node.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[1] == \ b_pp_seq_no_before + REQUEST_COUNT
def test_view_change_on_empty_ledger(nodeSet, up, looper): """ Check that view change is done when no txns in the ldegr """ ensure_view_change(looper, nodeSet) ensureElectionsDone(looper=looper, nodes=nodeSet) ensure_all_nodes_have_same_data(looper, nodes=nodeSet)
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' 1. Send random request 2. Make 3 node on backup instance slow in getting prepares 3. Send random request 4. do view change 5. reset delays => we expect that all nodes and all instances have the same last ordered ''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, pDelay(instId=slow_instance)): with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(is_prepared, fast_nodes, 2, slow_instance)) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(check_last_ordered, txnPoolNodeSet, slow_instance, (txnPoolNodeSet[0].viewNo, 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_backup_stabilized_checkpoint_on_view_change(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # Delta:1 backup = txnPoolNodeSet[-1].replicas[1] count_of_replicas = len(txnPoolNodeSet[0].replicas) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], ppDelay(instId=0)): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, REQ_COUNT) looper.run( eventually( lambda r: assertExp(r.last_ordered_3pc == (0, REQ_COUNT)), backup)) # assert that all of requests are propagated for n in txnPoolNodeSet: for req in n.requests.values(): assert req.forwardedTo == count_of_replicas ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) # check, that all requests was freed on backups for n in txnPoolNodeSet: for req in n.requests.values(): assert req.forwardedTo == count_of_replicas - 1
def test_view_change_after_some_txns(txnPoolNodesLooper, txnPoolNodeSet, some_txns_done, testNodeClass, viewNo, # noqa sdk_pool_handle, sdk_wallet_client, node_config_helper_class, tconf, tdir, allPluginsPath, tmpdir_factory): """ Check that view change is done after processing some of txns """ ensure_view_change(txnPoolNodesLooper, txnPoolNodeSet) ensureElectionsDone(looper=txnPoolNodesLooper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(txnPoolNodesLooper, nodes=txnPoolNodeSet) sdk_send_random_and_check(txnPoolNodesLooper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) ensure_all_nodes_have_same_data(txnPoolNodesLooper, txnPoolNodeSet) for node in txnPoolNodeSet: txnPoolNodesLooper.removeProdable(node) node.stop() config = getConfigOnce() reload_modules_for_replay(tconf) replayable_node_class, basedirpath = get_replayable_node_class( tmpdir_factory, tdir, testNodeClass, config) print('-------------Replaying now---------------------') for node in txnPoolNodeSet: create_replayable_node_and_check(txnPoolNodesLooper, txnPoolNodeSet, node, replayable_node_class, node_config_helper_class, tconf, basedirpath, allPluginsPath)
def test_master_primary_different_from_previous(txnPoolNodeSet, looper, client1, wallet1, client1Connected): """ After a view change, primary must be different from previous primary for master instance, it does not matter for other instance. The primary is benign and does not vote for itself. """ pr = slow_primary(txnPoolNodeSet, 0, delay=10) old_pr_node_name = pr.node.name # View change happens ensure_view_change(looper, txnPoolNodeSet) logger.debug("VIEW HAS BEEN CHANGED!") # Elections done ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # New primary is not same as old primary assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node_name pr.outBoxTestStasher.resetDelays() # The new primary can still process requests sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5)
def test_restarted_node_complete_vc_by_current_state(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath): node_to_restart = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart, stopNode=True) looper.removeProdable(node_to_restart) old_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) ensure_view_change(looper, txnPoolNodeSet[:-1]) ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT) current_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) assert current_completed_view_no > old_completed_view_no # Delay VIEW_CHANGE_DONE messages for all nodes for node in txnPoolNodeSet[:-1]: node.nodeIbStasher.delay(vcd_delay(1000)) ensure_view_change(looper, txnPoolNodeSet[:-1]) # Start stopped node until other nodes do view_change node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) node_to_restart.nodeIbStasher.delay(vcd_delay(1000)) # check, that restarted node use last completed view no from pool, instead of proposed looper.run(eventually(complete_propagate_primary, node_to_restart, current_completed_view_no, timeout=tconf.VIEW_CHANGE_TIMEOUT))
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ One of the node experiences poor network and loses 3PC messages. It has to do multiple rounds of catchup to be caught up """ sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) bad_node = nprs[-1].node other_nodes = [n for n in txnPoolNodeSet if n != bad_node] orig_method = bad_node.master_replica.dispatchThreePhaseMsg # Bad node does not process any 3 phase messages, equivalent to messages # being lost def bad_method(self, m, s): pass bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType( bad_method, bad_node.master_replica) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 6 * Max3PCBatchSize) waitNodeDataInequality(looper, bad_node, *other_nodes) # Patch all nodes to return ConsistencyProof of a smaller ledger to the # bad node but only once, so that the bad_node needs to do catchup again. make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID, Max3PCBatchSize) def is_catchup_needed_count(): return len( getAllReturnVals(bad_node, bad_node.is_catchup_needed, compare_val_to=True)) def caught_up_for_current_view_count(): return len( getAllReturnVals(bad_node, bad_node.caught_up_for_current_view, compare_val_to=True)) old_count_1 = is_catchup_needed_count() old_count_2 = caught_up_for_current_view_count() ensure_view_change(looper, txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) assert is_catchup_needed_count() > old_count_1 # The bad_node caught up due to receiving sufficient ViewChangeDone # messages assert caught_up_for_current_view_count() > old_count_2 bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType( orig_method, bad_node.master_replica)
def test_no_propagate_request_on_different_last_ordered_on_backup_before_vc( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' 1. Send random request 2. Make 3 node on backup instance slow in getting commits 3. Send random reuest 4. do view change 5. reset delays => we expect that all nodes and all instances have the same last ordered ''' global batches_count sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:4] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].replicas[ slow_instance].last_ordered_3pc batches_count = old_last_ordered[1] with delay_rules(nodes_stashers, cDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) batches_count += 1 old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, slow_instance, (old_view_no, old_last_ordered[1] + 1))) check_last_ordered(slow_nodes, slow_instance, old_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) batches_count += 1 primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] looper.run( eventually(check_last_ordered, non_primaries, slow_instance, (old_view_no + 1, batches_count))) # Backup primary replica set new_view and seq_no == 1, because of primary batch looper.run( eventually(check_last_ordered, [primary], slow_instance, (old_view_no + 1, batches_count))) looper.run( eventually(check_last_ordered, txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0] + 1, batches_count))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) batches_count += 1 assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_all_replicas_hold_request_keys( perf_chk_patched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ All replicas whether primary or non primary hold request keys of forwarded requests. Once requests are ordered, they request keys are removed from replica. """ tconf = perf_chk_patched delay_3pc = 2 delay_3pc_messages(txnPoolNodeSet, 0, delay_3pc) delay_3pc_messages(txnPoolNodeSet, 1, delay_3pc) def chk(count): # All replicas have same amount of forwarded request keys and all keys # are finalised. for node in txnPoolNodeSet: for r in node.replicas.values(): if r.isPrimary is False: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == count for i in range(count): k = r.requestQueues[DOMAIN_LEDGER_ID][i] assert r.requests[k].finalised elif r.isPrimary is True: assert len(r.requestQueues[DOMAIN_LEDGER_ID]) == 0 reqs = sdk_signed_random_requests(looper, sdk_wallet_client, tconf.Max3PCBatchSize - 1) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) # Only non primary replicas should have all request keys with them looper.run(eventually(chk, tconf.Max3PCBatchSize - 1)) sdk_get_replies(looper, req_resps, timeout=sdk_eval_timeout( tconf.Max3PCBatchSize - 1, len(txnPoolNodeSet), add_delay_to_timeout=delay_3pc)) # Replicas should have no request keys with them since they are ordered looper.run(eventually(chk, 0)) # Need to wait since one node might not # have processed it. delay = 1 for node in txnPoolNodeSet: node.nodeIbStasher.delay(nom_delay(delay)) ensure_view_change(looper, txnPoolNodeSet) reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 2 * tconf.Max3PCBatchSize) req_resps = sdk_send_signed_requests(sdk_pool_handle, reqs) looper.run(eventually(chk, 2 * tconf.Max3PCBatchSize)) # Since each nomination is delayed and there will be multiple nominations # so adding some extra time timeout = waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) + \ len(txnPoolNodeSet) * delay ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) sdk_get_replies(looper, req_resps, timeout=timeout) looper.run(eventually(chk, 0))
def all_nodes_view_change(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): for _ in range(5): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_done_delayed(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ A node is slow so is behind other nodes, after view change, it catches up but it also gets view change message as delayed, a node should start participating only when caught up and ViewChangeCone quorum received. """ nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] slow_node = nprs[-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 10 delay_vcd = 25 delay_3pc_messages([slow_node], 0, delay_3pc) slow_node.nodeIbStasher.delay(vcd_delay(delay_vcd)) def chk(node): assert node.view_changer.has_acceptable_view_change_quorum assert node.view_changer._primary_verified assert node.isParticipating assert None not in {r.isPrimary for r in node.replicas.values()} sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5 * 4, 4) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the slow node successfully completes catchup waitNodeDataEquality(looper, slow_node, *other_nodes) # Other nodes complete view change, select primary and participate for node in other_nodes: looper.run(eventually(chk, node, retryWait=1)) # Since `ViewChangeCone` is delayed, slow_node is not able to select primary # and participate assert not slow_node.view_changer.has_acceptable_view_change_quorum assert not slow_node.view_changer._primary_verified assert not slow_node.isParticipating assert {r.isPrimary for r in slow_node.replicas.values()} == {None} # Send requests to make sure pool is functional sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Repair network slow_node.reset_delays_and_process_delayeds() # `slow_node` selects primary and participate looper.run(eventually(chk, slow_node, retryWait=1)) # Processes requests received during lack of primary waitNodeDataEquality(looper, slow_node, *other_nodes) # Send more requests and compare data of all nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_no_catchup_if_got_from_3pc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A node is slow to receive COMMIT messages so after a view change it starts catchup. But before it can start requesting txns, the COMMITs messages are received and are ordered. The node should not request any transactions. :return: """ send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay_cm = 30 delat_cp = 100 slow_node.nodeIbStasher.delay(cDelay(delay_cm)) # The slow node receives consistency proofs after some delay, this delay # gives the opportunity to deliver all 3PC messages slow_node.nodeIbStasher.delay(cpDelay(delat_cp)) # Count of `getCatchupReqs` which is called to construct the `CatchupReq` # to be sent def domain_cr_count(): return sum(1 for entry in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.getCatchupReqs) if entry.params['consProof'].ledgerId == DOMAIN_LEDGER_ID) old_count = domain_cr_count() sent_batches = 10 send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * sent_batches, sent_batches) ensure_view_change(looper, nodes=txnPoolNodeSet) # After view change, the `slow_node` is behind waitNodeDataInequality(looper, slow_node, *other_nodes) # Unstash only COMMIT messages slow_node.nodeIbStasher.reset_delays_and_process_delayeds(Commit.typename) looper.runFor(2) slow_node.nodeIbStasher.reset_delays_and_process_delayeds( ConsistencyProof.typename) waitNodeDataEquality(looper, slow_node, *other_nodes) # No `CatchupReq`s constructed, hence no `CatchupReq`s could have # been sent assert domain_cr_count() == old_count # Some stashed ordered requests have been processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert sent_batches in rv sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_selection_f_plus_one_quorum(looper, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_client): """ Check that quorum f + 1 is used for primary selection when initiated by CurrentState messages. Assumes that view change quorum is n - f. Assumes that primaries selection in round robin fashion. """ # Ensure that we have 4 nodes in total all_nodes = list(txnPoolNodeSet) assert 4 == len(all_nodes) alpha, beta, delta, gamma = all_nodes initial_view_no = alpha.viewNo # Make one node lagging by switching it off for some time lagging_node = gamma non_lagging_nodes = [alpha, beta, delta] disconnect_node_and_ensure_disconnected(looper, all_nodes, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Make nodes to perform view change ensure_view_change(looper, non_lagging_nodes) ensureElectionsDone(looper=looper, nodes=non_lagging_nodes, instances_list=range(2)) ensure_all_nodes_have_same_data(looper, nodes=non_lagging_nodes) # Stop two more of active nodes # (but not primary, which is Beta (because of round robin selection)) stopped_nodes = [alpha] # TODO: add one more here for stopped_node in stopped_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, stopped_node, stopNode=True) looper.removeProdable(stopped_node) # Start lagging node back restarted_node = start_stopped_node(lagging_node, looper, tconf, tdir, allPluginsPath) active_nodes = [beta, delta, restarted_node] # Check that primary selected expected_view_no = initial_view_no + 1 ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ The node should do only a fixed rounds of catchup. For this delay Prepares and Commits for 2 non-primary nodes by a large amount which is equivalent to loss of Prepares and Commits. Make sure 2 nodes have a different last prepared certificate from other two. Then do a view change, make sure view change completes and the pool does not process the request that were prepared by only a subset of the nodes """ sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ledger_summary = txnPoolNodeSet[0].ledger_summary slow_nodes = [r.node for r in getNonPrimaryReplicas( txnPoolNodeSet, 0)[-2:]] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] # Make node slow to process Prepares and Commits for node in slow_nodes: node.nodeIbStasher.delay(pDelay(120, 0)) node.nodeIbStasher.delay(cDelay(120, 0)) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 5) looper.runFor(3) ensure_view_change(looper, nodes=txnPoolNodeSet) def last_prepared(nodes): lst = [n.master_replica.last_prepared_certificate_in_view() for n in nodes] # All nodes have same last prepared assert check_if_all_equal_in_list(lst) return lst[0] last_prepared_slow = last_prepared(slow_nodes) last_prepared_fast = last_prepared(fast_nodes) # Check `slow_nodes` and `fast_nodes` set different last_prepared assert last_prepared_fast != last_prepared_slow # View change complete ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # The requests which were prepared by only a subset of the nodes were # not ordered assert txnPoolNodeSet[0].ledger_summary == ledger_summary for node in slow_nodes: node.nodeIbStasher.reset_delays_and_process_delayeds() # Make sure pool is functional sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10, 2) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) last_prepared(txnPoolNodeSet)
def test_view_change_after_max_catchup_rounds(txnPoolNodeSet, looper, wallet1, client1, client1Connected): """ The node should do only a fixed rounds of catchup. For this delay Prepares and Commits for 2 non-primary nodes by a large amount which is equivalent to loss of Prepares and Commits. Make sure 2 nodes have a different last prepared certificate from other two. Then do a view change, make sure view change completes and the pool does not process the request that were prepared by only a subset of the nodes """ send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) ledger_summary = txnPoolNodeSet[0].elector.ledger_summary slow_nodes = [ r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)[-2:] ] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] # Make node slow to process Prepares and Commits for node in slow_nodes: node.nodeIbStasher.delay(pDelay(120, 0)) node.nodeIbStasher.delay(cDelay(120, 0)) sendRandomRequests(wallet1, client1, 5) looper.runFor(3) ensure_view_change(looper, nodes=txnPoolNodeSet) def last_prepared(nodes): lst = [ n.master_replica.last_prepared_certificate_in_view() for n in nodes ] # All nodes have same last prepared assert check_if_all_equal_in_list(lst) return lst[0] last_prepared_slow = last_prepared(slow_nodes) last_prepared_fast = last_prepared(fast_nodes) # Check `slow_nodes` and `fast_nodes` set different last_prepared assert last_prepared_fast != last_prepared_slow # View change complete ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # The requests which were prepared by only a subset of the nodes were # not ordered assert txnPoolNodeSet[0].elector.ledger_summary == ledger_summary for node in slow_nodes: node.nodeIbStasher.reset_delays_and_process_delayeds() # Make sure pool is functional ensure_pool_functional(looper, txnPoolNodeSet, wallet1, client1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) last_prepared(txnPoolNodeSet)
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (2,3 - with primary backup replica) will have prepare or send preprepare on backup replicas and slow_nodes are have not and transaction will ordered on all master replicas. Check last ordered after view change and after another one request.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, pDelay(instId=slow_instance)): with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(is_prepared, fast_nodes, 2, slow_instance)) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(check_last_ordered, txnPoolNodeSet, slow_instance, (txnPoolNodeSet[0].viewNo, 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_node_notified_about_primary_election_result(txnPoolNodeSet, looper): old_counts = {node.name: get_count( node, node.primary_selected) for node in txnPoolNodeSet} ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert get_count(node, node.primary_selected) > old_counts[node.name]
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def all_nodes_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): for _ in range(5): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_view_change_after_some_txns(looper, txnPoolNodeSet, viewNo, sdk_pool_handle, sdk_wallet_client): """ Check that view change is done after processing some of txns """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_propagate_primary_after_primary_restart_view_1( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indices correctly case viewNo > 0 """ ensure_view_change(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def test_no_propagate_request_on_different_last_ordered_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary backup replicas) are already ordered transaction on master and some backup replica and slow_nodes are not on backup replica. Wait ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].replicas[slow_instance].last_ordered_3pc with delay_rules(nodes_stashers, cDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, slow_instance, (old_view_no, old_last_ordered[1] + 1))) check_last_ordered(slow_nodes, slow_instance, old_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_selection_f_plus_one_quorum(looper, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_client): """ Check that quorum f + 1 is used for primary selection when initiated by CurrentState messages. Assumes that view change quorum is n - f. Assumes that primaries selection in round robin fashion. """ # Ensure that we have 4 nodes in total all_nodes = list(txnPoolNodeSet) assert 4 == len(all_nodes) alpha, beta, delta, gamma = all_nodes initial_view_no = alpha.viewNo # Make one node lagging by switching it off for some time lagging_node = gamma non_lagging_nodes = [alpha, beta, delta] disconnect_node_and_ensure_disconnected(looper, all_nodes, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Make nodes to perform view change ensure_view_change(looper, non_lagging_nodes) ensureElectionsDone(looper=looper, nodes=non_lagging_nodes, instances_list=range(2)) ensure_all_nodes_have_same_data(looper, nodes=non_lagging_nodes) # Stop two more of active nodes # (but not primary, which is Beta (because of round robin selection)) stopped_nodes = [alpha] # TODO: add one more here for stopped_node in stopped_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, stopped_node, stopNode=True) looper.removeProdable(stopped_node) # Start lagging node back restarted_node = start_stopped_node( lagging_node, looper, tconf, tdir, allPluginsPath) active_nodes = [beta, delta, restarted_node] # Check that primary selected expected_view_no = initial_view_no + 1 ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_send_more_after_view_change(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that we can send more requests after view change """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10)
def test_repeated_request_not_processed_after_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delta = txnPoolNodeSet[3] initial_ledger_size = delta.domainLedger.size one_req = sdk_signed_random_requests(looper, sdk_wallet_client, 1) sdk_send_and_check(one_req, looper, txnPoolNodeSet, sdk_pool_handle) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) sdk_send_signed_requests(sdk_pool_handle, one_req) looper.runFor(waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))) for node in txnPoolNodeSet: assert node.domainLedger.size - initial_ledger_size == 1
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_new_node_caught_up, allPluginsPath, sdk_wallet_client): """ Node discards 3-phase or ViewChangeDone messages from view nos that it does not know of (view nos before it joined the pool) :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns viewNo = new_node.viewNo # Force two view changes: node discards msgs which have viewNo # at least two less than node's. Current protocol implementation # needs to hold messages from the previous view as well as # from the current view. for i in range(2): ensure_view_change(looper, txnPoolNodeSet) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sender = txnPoolNodeSet[0] rid_x_node = sender.nodestack.getRemote(new_node.name).uid messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime() # 3 pc msg (PrePrepare) needs to be discarded _, did = sdk_wallet_client primaryRepl = getPrimaryReplica(txnPoolNodeSet) three_pc = PrePrepare( 0, viewNo, 10, get_utc_epoch(), ["random request digest"], init_discarded(), "random digest", DOMAIN_LEDGER_ID, primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), primaryRepl.txnRootHash(DOMAIN_LEDGER_ID), 0, True ) sender.send(three_pc, rid_x_node) looper.run(eventually(checkDiscardMsg, [new_node, ], three_pc, 'un-acceptable viewNo', retryWait=1, timeout=messageTimeout))
def test_checkpoint_across_views(sent_batches, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test checkpointing across views. This test checks that checkpointing and garbage collection works correctly no matter if view change happened before a checkpoint or after a checkpoint """ batch_size = chkFreqPatched.Max3PCBatchSize sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, batch_size * sent_batches, sent_batches) # Check that correct garbage collection happens non_gced_batch_count = (sent_batches - CHK_FREQ) if sent_batches >= CHK_FREQ else sent_batches looper.run(eventually(checkRequestCounts, txnPoolNodeSet, batch_size * non_gced_batch_count, non_gced_batch_count, retryWait=1)) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # Check that after view change, proper clean up is done for node in txnPoolNodeSet: for r in node.replicas.values(): assert not r.checkpoints # No stashed checkpoint for previous view assert not [view_no for view_no in r.stashedRecvdCheckpoints if view_no < r.viewNo] assert r._h == 0 assert r._lastPrePrepareSeqNo == 0 assert r.h == 0 assert r.H == r._h + chkFreqPatched.LOG_SIZE checkRequestCounts(txnPoolNodeSet, 0, 0) # Even after view change, chekpointing works sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, batch_size * sent_batches, sent_batches) looper.run(eventually(checkRequestCounts, txnPoolNodeSet, batch_size * non_gced_batch_count, non_gced_batch_count, retryWait=1)) # Send more batches so one more checkpoint happens. This is done so that # when this test finishes, all requests are garbage collected and the # next run of this test (with next param) has the calculations correct more = CHK_FREQ - non_gced_batch_count sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, batch_size * more, more) looper.run(eventually(checkRequestCounts, txnPoolNodeSet, 0, 0, retryWait=1))
def test_belated_propagate_not_processed_after_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delta = txnPoolNodeSet[3] initial_ledger_size = delta.domainLedger.size delta.nodeIbStasher.delay(ppgDelay(300, 'Gamma')) one_req = sdk_signed_random_requests(looper, sdk_wallet_client, 1) sdk_send_and_check(one_req, looper, txnPoolNodeSet, sdk_pool_handle) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) delta.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))) for node in txnPoolNodeSet: assert node.domainLedger.size - initial_ledger_size == 1
def test_set_H_greater_then_last_ppseqno(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath): # send LOG_SIZE requests and check, that all watermarks on all replicas is not changed # and now is (0, LOG_SIZE) """Send random requests for moving watermarks""" sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, LOG_SIZE) # check, that all of node set up watermark greater, then default and # ppSeqNo with number LOG_SIZE + 1 will be out from default watermark for n in txnPoolNodeSet: for r in n.replicas._replicas.values(): assert r.h >= LOG_SIZE assert r.H >= LOG_SIZE + LOG_SIZE """Adding new node, for scheduling propagate primary procedure""" new_node = add_new_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) """Check, that backup replicas set watermark as (0, maxInt)""" # Check, replica.h is set from last_ordered_3PC and replica.H is set to maxsize for r in new_node.replicas.values(): assert r.h == r.last_ordered_3pc[1] if r.isMaster: assert r.H == r.last_ordered_3pc[1] + LOG_SIZE else: assert r.H == sys.maxsize """Send requests and check. that backup replicas does not stashing it by outside watermarks reason""" sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) # check, that there is no any stashed "outside watermark" messages. for r in new_node.replicas.values(): assert len(r.stashingWhileOutsideWaterMarks) == 0 """Force view change and check, that all backup replicas setup H as a default (not propagate primary logic)""" """This need to ensure, that next view_change does not break watermark setting logic""" ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) for r in new_node.replicas.values(): if not r.isMaster: assert r.h == 0 assert r.H == LOG_SIZE
def test_complete_with_delayed_view_change(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle): def chk_len_stashed_msgs(): assert len(stashed_vc_done_msgs) >= slow_node.quorums.view_change_done.value - 1 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, REQ_COUNT) slow_node = txnPoolNodeSet[-1] not_processing_view_change_done(slow_node) ensure_view_change(looper, txnPoolNodeSet[:-1]) looper.run(eventually(chk_len_stashed_msgs)) while stashed_ic_msgs: slow_node.nodeInBox.append(stashed_ic_msgs.popleft()) while stashed_vc_done_msgs: slow_node.nodeInBox.append(stashed_vc_done_msgs.popleft()) slow_node.processNodeInBox = functools.partial(TestNode.processNodeInBox, slow_node) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_nodes_removes_request_keys_for_ordered(setup, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A node does not order requests since it is missing some 3PC messages, gets them from catchup. It then clears them from its request queues """ slow_node, fast_nodes = setup reqs = sdk_json_couples_to_request_list( send_reqs_batches_and_get_suff_replies( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10, 5)) ensure_all_nodes_have_same_data(looper, fast_nodes) assert slow_node.master_replica.last_ordered_3pc != \ fast_nodes[0].master_replica.last_ordered_3pc def chk(key, nodes, present): for node in nodes: assert ( key in node.master_replica.requestQueues[DOMAIN_LEDGER_ID]) == present for req in reqs: chk(req.digest, fast_nodes, False) chk(req.digest, [slow_node], True) # Reset catchup reply delay so that catchup can complete slow_node.nodeIbStasher.reset_delays_and_process_delayeds(CatchupRep.typename) ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) for req in reqs: chk(req.digest, txnPoolNodeSet, False) # Needed for the next run due to the parametrised fixture slow_node.reset_delays_and_process_delayeds()
def test_old_non_primary_restart_after_view_change(new_node_in_correct_view, looper, txnPoolNodeSet, tdir, allPluginsPath, tconf, sdk_pool_handle, sdk_wallet_client): """ An existing non-primary node crashes and then view change happens, the crashed node comes back up after view change """ node_to_stop = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node old_view_no = node_to_stop.viewNo # Stop non-primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_stop, stopNode=True) looper.removeProdable(node_to_stop) remaining_nodes = list(set(txnPoolNodeSet) - {node_to_stop}) # Send some requests before view change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_view_change(looper, remaining_nodes, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper, remaining_nodes) # Send some requests after view change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) restarted_node = start_stopped_node(node_to_stop, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remaining_nodes + [restarted_node] looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=30)) assert len(getAllReturnVals(restarted_node.view_changer, restarted_node.view_changer._start_view_change_if_possible, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert not restarted_node.view_changer._next_view_indications
def test_new_node_accepts_timestamp(tconf, looper, txnPoolNodeSet, sdk_node_created_after_some_txns, sdk_wallet_client, sdk_pool_handle): """ A new node joins the pool and is able to function properly without """ _, new_node, _, _ = sdk_node_created_after_some_txns old_susp_count = get_timestamp_suspicion_count(new_node) # Don't wait for node to catchup, start sending requests sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=10) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # No suspicions were raised by new_node assert get_timestamp_suspicion_count(new_node) == old_susp_count # All nodes should reply sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=Max3PCBatchSize * 3) # No suspicions were raised by new_node assert get_timestamp_suspicion_count(new_node) == old_susp_count suspicions = {node.name: get_timestamp_suspicion_count( node) for node in txnPoolNodeSet} ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=Max3PCBatchSize * 3) for node in txnPoolNodeSet: assert suspicions[node.name] == get_timestamp_suspicion_count(node)
def test_no_propagate_request_on_different_last_ordered_on_master_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary after next view change) are already ordered transaction on master and slow_nodes are not. Check ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) master_instance = txnPoolNodeSet[0].master_replica.instId slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, cDelay()): # send one request requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) last_ordered_for_slow = slow_nodes[0].master_replica.last_ordered_3pc old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, master_instance, (old_view_no, old_last_ordered[1] + 1))) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) replies = sdk_get_replies(looper, requests) for reply in replies: sdk_check_reply(reply) check_last_ordered(slow_nodes, master_instance, (old_view_no, last_ordered_for_slow[1] + 1)) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_node_erases_last_sent_pp_key_on_view_change( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=3, num_batches=3, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 3)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Make the pool perform view change ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) # Verify that the node has erased the stored last sent PrePrepare key assert LAST_SENT_PRE_PREPARE not in node.nodeStatusDB # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (1, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))