def test_handle_delayed_preprepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ Make a node send PREPREPARE again after the slow node has ordered """ slow_node, other_nodes, primary_node, other_non_primary_nodes = \ split_nodes(txnPoolNodeSet) # This node will send PRE-PREPARE again orig_method = primary_node.handlers[PREPREPARE].serve last_pp = None def patched_method(self, msg): nonlocal last_pp last_pp = orig_method(msg) return last_pp primary_node.handlers[PREPREPARE].serve = types.MethodType(patched_method, primary_node.handlers[ PREPREPARE]) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) slow_master_replica = slow_node.master_replica count_pr_req = get_count(slow_master_replica, slow_master_replica.process_requested_pre_prepare) count_pr_tpc = get_count(slow_master_replica, slow_master_replica.processThreePhaseMsg) primary_node.sendToNodes(MessageRep(**{ f.MSG_TYPE.nm: PREPREPARE, f.PARAMS.nm: { f.INST_ID.nm: last_pp.instId, f.VIEW_NO.nm: last_pp.viewNo, f.PP_SEQ_NO.nm: last_pp.ppSeqNo }, f.MSG.nm: last_pp }), names=[slow_node.name, ]) def chk(): # `process_requested_pre_prepare` is called but # `processThreePhaseMsg` is not called assert get_count( slow_master_replica, slow_master_replica.process_requested_pre_prepare) > count_pr_req assert get_count( slow_master_replica, slow_master_replica.processThreePhaseMsg) == count_pr_tpc looper.run(eventually(chk, retryWait=1))
def chk(): # `process_requested_pre_prepare` is called but # `processThreePhaseMsg` is not called assert get_count( slow_master_replica, slow_master_replica.process_requested_pre_prepare) > count_pr_req assert get_count( slow_master_replica, slow_master_replica.processThreePhaseMsg) == count_pr_tpc
def test_node_notified_about_primary_election_result(txnPoolNodeSet, looper): old_counts = {node.name: get_count( node, node.primary_selected) for node in txnPoolNodeSet} ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert get_count(node, node.primary_selected) > old_counts[node.name]
def test_no_reauth(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle): auth_obj = txnPoolNodeSet[0].authNr(0).core_authenticator auth_count_before = get_count(auth_obj, auth_obj.authenticate) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, REQ_COUNT) auth_count_after = get_count(auth_obj, auth_obj.authenticate) assert auth_count_after - auth_count_before == REQ_COUNT
def testPropagateRecvdBeforeRequest(setup, looper, txnPoolNodeSet, sent1): A, B, C, D = txnPoolNodeSet def x(): # A should not have received a request from the client assert len(recvdRequest(A)) == 0 # A should have received only one PROPAGATE assert len(recvdPropagate(A)) == 1 # A should have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 timeout = waits.expectedNodeToNodeMessageDeliveryTime() + delaySec - 2 looper.run(eventually(x, retryWait=.5, timeout=timeout)) def y(): # A should have received a request from the client assert len(recvdRequest(A)) == 1 # A should still have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 timeout = waits.expectedNodeToNodeMessageDeliveryTime() + delaySec + 2 looper.run(eventually(y, retryWait=.5, timeout=timeout)) def chk(): # A should have forwarded the request assertLength(forwardedRequest(A), 1) timeout = waits.expectedClientRequestPropagationTime( len(txnPoolNodeSet)) + delaySec looper.run(eventually(chk, retryWait=1, timeout=timeout)) auth_obj = A.authNr(0).core_authenticator auth_calling_count = get_count(auth_obj, auth_obj.authenticate) assert auth_calling_count == reqCount
def testPropagateRecvdAfterRequest(setup, looper, txnPoolNodeSet): A, B, C, D = txnPoolNodeSet # type: TestNode sent1 = sdk_json_to_request_object(setup[0][0]) def x(): # A should have received a request from the client assert len(recvdRequest(A)) == 1 # A should not have received a PROPAGATE assert len(recvdPropagate(A)) == 0 # A should have sent a PROPAGATE assert len(sentPropagate(A)) == 1 timeout = howlong - 2 looper.run(eventually(x, retryWait=.5, timeout=timeout)) for n in txnPoolNodeSet: n.nodeIbStasher.resetDelays() def y(): # A should have received 3 PROPAGATEs assert len(recvdPropagate(A)) == 3 # A should have total of 4 PROPAGATEs (3 from other nodes and 1 from # itself) key = sent1.digest assert key in A.requests assert len(A.requests[key].propagates) == 4 # A should still have sent only one PROPAGATE assert len(sentPropagate(A)) == 1 timeout = howlong + 2 looper.run(eventually(y, retryWait=.5, timeout=timeout)) auth_obj = A.authNr(0).core_authenticator auth_calling_count = get_count(auth_obj, auth_obj.authenticate) assert auth_calling_count == reqCount
def test_node_request_propagates(looper, setup, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One of node lacks sufficient propagates """ faulty_node, recv_client_requests = setup old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate) old_count_recv_req = get_count(faulty_node, faulty_node.processRequest) old_count_request_propagates = get_count( faulty_node, faulty_node.request_propagates) def sum_of_sent_batches(): return faulty_node.replicas[0].lastPrePrepareSeqNo + \ faulty_node.replicas[1].lastPrePrepareSeqNo old_sum_of_sent_batches = sum_of_sent_batches() sent_reqs = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sent_reqs) assert get_count( faulty_node, faulty_node.processPropagate) > old_count_recv_ppg if recv_client_requests: assert get_count( faulty_node, faulty_node.processRequest) > old_count_recv_req else: assert get_count( faulty_node, faulty_node.processRequest) == old_count_recv_req # Attempt to request PROPAGATEs was made as many number of times as the # number of sent batches in both replicas since both replicas # independently request PROPAGATEs assert get_count(faulty_node, faulty_node.request_propagates) - \ old_count_request_propagates == (sum_of_sent_batches() - old_sum_of_sent_batches) requested_propagate_counts = getAllReturnVals( faulty_node, faulty_node.request_propagates) # The last attempt to request PROPAGATEs was not successful assert requested_propagate_counts[0] == 0 # The first attempt to request PROPAGATEs was successful as PROPAGATEs # were requested for all nodes assert requested_propagate_counts[1] == sent_reqs faulty_node.nodeIbStasher.reset_delays_and_process_delayeds() sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, num_reqs=4)
def test_node_requests_missing_preprepare(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ A node has bad network with primary and thus loses PRE-PREPARE, it requests PRE-PREPARE from primary once it has sufficient PREPAREs """ slow_node, other_nodes, primary_node, other_non_primary_nodes = split_nodes( txnPoolNodeSet) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) old_count_pp = get_count(slow_node.master_replica, slow_node.master_replica.processPrePrepare) old_count_mrq = {n.name: get_count(n, n.process_message_req) for n in other_nodes} old_count_mrp = get_count(slow_node, slow_node.process_message_rep) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=15, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) assert not slow_node.master_replica.requested_pre_prepares # `slow_node` processed PRE-PREPARE assert get_count(slow_node.master_replica, slow_node.master_replica.processPrePrepare) > old_count_pp # `slow_node` did receive `MessageRep` assert get_count(slow_node, slow_node.process_message_rep) > old_count_mrp # Primary node should received `MessageReq` and other nodes shouldn't recv_reqs = set() for n in other_non_primary_nodes: if get_count(n, n.process_message_req) > old_count_mrq[n.name]: recv_reqs.add(n.name) assert get_count(primary_node, primary_node.process_message_req) > \ old_count_mrq[primary_node.name] assert len(recv_reqs) == 0 # All nodes including the `slow_node` ordered the same requests assert check_if_all_equal_in_list([n.master_replica.ordered for n in txnPoolNodeSet])
def test_node_catchup_after_restart_no_txns( sdk_new_node_caught_up, txnPoolNodeSet, tdir, tconf, sdk_node_set_with_node_added_after_some_txns, tdirWithPoolTxns, allPluginsPath): """ A node restarts but no transactions have happened while it was down. It would then use the `LedgerStatus` to catchup """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_set_with_node_added_after_some_txns waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) logger.debug("Stopping node {} with pool ledger size {}". format(new_node, new_node.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node) looper.removeProdable(name=new_node.name) logger.debug("Starting the stopped node, {}".format(new_node)) nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha) config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir) new_node = TestNode( new_node.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) looper.add(new_node) txnPoolNodeSet[-1] = new_node looper.run(checkNodesConnected(txnPoolNodeSet)) def chk(): for node in txnPoolNodeSet[:-1]: check_last_ordered_3pc(new_node, node) looper.run(eventually(chk, retryWait=1)) # sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # Did not receive any consistency proofs assert get_count(new_node.ledgerManager, new_node.ledgerManager.processConsistencyProof) == 0
def view_change_completed_count(node): vc_service = node.master_replica._view_change_service return get_count(vc_service, vc_service._finish_view_change)
def _check_view_change_completed_count(node): return get_count(node, node._check_view_change_completed)
def test_send_to_observers_each_reply_no_observers(observable, fake_msg_batch_committed): assert 0 == get_count(observable, observable.send_to_observers) observable.process_new_batch(fake_msg_batch_committed, "sender1") assert 0 == get_count(observable, observable.send_to_observers) assert 0 == len(observable._outbox)
def test_send_to_observers_each_reply_with_observers(observable, fake_msg_batch_committed): observable.add_observer("observer1", ObserverSyncPolicyType.EACH_BATCH) assert 0 == get_count(observable, observable.send_to_observers) observable.process_new_batch(fake_msg_batch_committed, "sender1") assert 1 == get_count(observable, observable.send_to_observers) assert 1 == len(observable._outbox)
def count_requested_preprepare_req(node): # Returns the number of times an attempt was made to request PRE-PREPARE sr = node.master_replica return get_count(sr, sr._request_pre_prepare_for_prepare)
def test_handle_delayed_preprepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ Make a node send PREPREPARE again after the slow node has ordered """ slow_node, other_nodes, primary_node, other_non_primary_nodes = \ split_nodes(txnPoolNodeSet) # This node will send PRE-PREPARE again confused_node = other_non_primary_nodes[0] orig_method = confused_node.handlers[PREPREPARE].serve last_pp = None def patched_method(self, msg): nonlocal last_pp last_pp = orig_method(msg) return last_pp confused_node.handlers[PREPREPARE].serve = types.MethodType( patched_method, confused_node.handlers[PREPREPARE]) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) slow_master_replica = slow_node.master_replica count_pr_req = get_count(slow_master_replica, slow_master_replica.process_requested_pre_prepare) count_pr_tpc = get_count(slow_master_replica, slow_master_replica.processThreePhaseMsg) confused_node.sendToNodes(MessageRep( **{ f.MSG_TYPE.nm: PREPREPARE, f.PARAMS.nm: { f.INST_ID.nm: last_pp.instId, f.VIEW_NO.nm: last_pp.viewNo, f.PP_SEQ_NO.nm: last_pp.ppSeqNo }, f.MSG.nm: last_pp }), names=[ slow_node.name, ]) def chk(): # `process_requested_pre_prepare` is called but # `processThreePhaseMsg` is not called assert get_count( slow_master_replica, slow_master_replica.process_requested_pre_prepare) > count_pr_req assert get_count( slow_master_replica, slow_master_replica.processThreePhaseMsg) == count_pr_tpc looper.run(eventually(chk, retryWait=1))
def count_requested_preprepare_req(node): # Returns the number of times an attempt was made to request PRE-PREPARE sr = node.master_replica return get_count(sr._ordering_service, sr._ordering_service._request_pre_prepare_for_prepare)
def test_view_change_timeout(nodeSet, looper, up, wallet1, client1): """ Check view change restarted if it is not completed in time """ m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) initial_view_no = waitForViewChange(looper, nodeSet) # Setting view change timeout to low value to make test pass quicker for node in nodeSet: node._view_change_timeout = 5 # Delaying view change messages to make first view change fail # due to timeout for node in nodeSet: node.nodeIbStasher.delay(vcd_delay(delay=50)) # Delaying preprepae messages from nodes and # sending request to force view change # for i in range(3): # delayNonPrimaries(nodeSet, instId=i, delay=10) #sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) times = {} for node in nodeSet: times[node.name] = { 'called': get_count(node, node._check_view_change_completed), 'returned_true': len( getAllReturnVals(node, node._check_view_change_completed, compare_val_to=True)) } for node in nodeSet: node.startViewChange(initial_view_no + 1) # First view change should fail, because of delayed # instance change messages. This then leads to new view change that we # need. with pytest.raises(AssertionError): ensureElectionsDone(looper=looper, nodes=nodeSet, customTimeout=10) # Resetting delays to let second view change go well reset_delays_and_process_delayeds(nodeSet) # This view change should be completed with no problems ensureElectionsDone(looper=looper, nodes=nodeSet) ensure_all_nodes_have_same_data(looper, nodes=nodeSet) new_m_primary_node = get_master_primary_node(list(nodeSet.nodes.values())) assert m_primary_node.name != new_m_primary_node.name # The timeout method has been called at least once for node in nodeSet: assert get_count( node, node._check_view_change_completed) > times[node.name]['called'] assert len( getAllReturnVals( node, node._check_view_change_completed, compare_val_to=True)) > times[node.name]['returned_true'] # Multiple view changes have been initiated for node in nodeSet: assert (node.viewNo - initial_view_no) > 1 ensure_pool_functional(looper, nodeSet, wallet1, client1)
def test_node_detecting_lag_from_view_change_done_messages( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf): """ A node is slow and after view change starts, it marks it's `last_prepared` to less than others, after catchup it does not get any txns from others and finds it has already ordered it's `last_prepared`, but when it gets ViewChangeDone messages, it starts catchup again and this time gets the txns. To achieve this delay all 3PC messages to a node so before view change it has different last_prepared from others. Also delay processing of COMMITs and INSTANCE_CHANGEs by other nodes """ send_reqs_batches_and_get_suff_replies(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * 3, 3) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_master_replica = slow_node.master_replica fast_master_replicas = [n.master_replica for n in fast_nodes] delay_3pc = 50 delay_ic = tconf.PerfCheckFreq + 5 delay_commit = delay_ic + 10 delay_3pc_messages([slow_node], 0, delay_3pc) for n in fast_nodes: n.nodeIbStasher.delay(icDelay(delay_ic)) n.nodeIbStasher.delay(cDelay(delay_commit)) reqs = [] for i in range(10): # fix if unskip reqs = reqs + sdk_send_random_requests() looper.runFor(.2) def chk1(): for rep in fast_master_replicas: assert compare_3PC_keys( slow_master_replica.last_prepared_certificate_in_view(), rep.last_prepared_certificate_in_view()) > 0 assert slow_master_replica.last_ordered_3pc == rep.last_ordered_3pc looper.run(eventually(chk1)) no_more_catchup_count = get_count(slow_node, slow_node.no_more_catchups_needed) # Track last prepared for master replica of each node prepareds = {} orig_methods = {} for node in txnPoolNodeSet: orig_methods[node.name] = node.master_replica.on_view_change_start def patched_on_view_change_start(self): orig_methods[self.node.name]() prepareds[self.node.name] = self.last_prepared_before_view_change node.master_replica.on_view_change_start = types.MethodType( patched_on_view_change_start, node.master_replica) ensure_view_change(looper, txnPoolNodeSet, exclude_from_check=fast_nodes) def chk2(): # last_prepared of slow_node is less than fast_nodes for rep in fast_master_replicas: assert compare_3PC_keys(prepareds[slow_master_replica.node.name], prepareds[rep.node.name]) > 0 looper.run(eventually(chk2, timeout=delay_ic + 5)) last_start_catchup_call_at = None no_more_catchup_call_at = None def chk3(): # no_more_catchups_needed was called since node found no need of # catchup nonlocal last_start_catchup_call_at, no_more_catchup_call_at assert (get_count(slow_node, slow_node.no_more_catchups_needed) - no_more_catchup_count) > 0 no_more_catchup_call_at = slow_node.spylog.getLast( slow_node.no_more_catchups_needed).starttime last_start_catchup_call_at = slow_node.spylog.getLast( slow_node.start_catchup).starttime looper.run(eventually(chk3, timeout=delay_commit)) for n in fast_nodes: n.nodeIbStasher.reset_delays_and_process_delayeds() n.nodeIbStasher.reset_delays_and_process_delayeds() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) assert slow_node.spylog.getLast( slow_node.start_catchup).starttime > no_more_catchup_call_at assert slow_node.spylog.getLast( slow_node.start_catchup).starttime > last_start_catchup_call_at
def checkpoint_check(nodes): for node in nodes: assert get_count(node.master_replica, node.master_replica.markCheckPointStable) > 0
def checkpoint_check(nodes): for node in nodes: assert get_count( node.master_replica._checkpointer, node.master_replica._checkpointer._mark_checkpoint_stable) > 0
def count_requested_preprepare_req(node): # Returns the number of times an attempt was made to request PRE-PREPARE sr = node.master_replica return get_count(sr, sr._request_pre_prepare_if_possible)
def test_append_input(node_observable, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): assert 0 == get_count(node_observable, node_observable.append_input) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) assert 1 == get_count(node_observable, node_observable.append_input)