def test_slow_catchup_while_ordering(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): lagging_node = txnPoolNodeSet[-1] other_lagging_node = txnPoolNodeSet[-2] other_nodes = txnPoolNodeSet[:-1] other_stashers = [node.nodeIbStasher for node in other_nodes] def lagging_node_state() -> NodeLeecherService.State: return lagging_node.ledgerManager._node_leecher._state def check_lagging_node_is_not_syncing_audit(): assert lagging_node_state() != NodeLeecherService.State.SyncingAudit # Prevent lagging node from ordering with delay_rules(lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): # Order request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Prevent lagging node from catching up domain ledger (and finishing catchup) with delay_rules(other_stashers, delay_domain_ledger_catchup()): # Start catchup on lagging node lagging_node.ledgerManager.start_catchup() assert lagging_node_state( ) == NodeLeecherService.State.SyncingAudit # Ensure that audit ledger is caught up by lagging node looper.run(eventually(check_lagging_node_is_not_syncing_audit)) assert lagging_node_state() != NodeLeecherService.State.Idle # Order one more request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Now lagging node can catch up domain ledger which contains more transactions # than it was when audit ledger was caught up # Now delayed 3PC messages reach lagging node, so any transactions missed during # catch up can be ordered, ensure that all nodes will have same data after that ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Ensure that even if we disable some other node pool is still functional # (it won't be the case if old lagging node is nonfunctional) with delay_rules(other_lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # Ensure that all nodes will eventually have same data ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_node_handles_forced_upgrade_on_propagate(looper, nodeSet, sdk_pool_handle, sdk_wallet_trustee, validUpgradeExpForceTrue): """ Verifies that POOL_UPGRADE force=true request is handled immediately when the node receives it in a PROPAGATE from any other node """ slow_node = getNonPrimaryReplicas(nodeSet, instId=0)[-1].node # Stash all except PROPAGATEs from Gamma slow_node.clientIbStasher.delay(req_delay()) slow_node.nodeIbStasher.delay(ppgDelay(sender_filter='Alpha')) slow_node.nodeIbStasher.delay(ppgDelay(sender_filter='Beta')) slow_node.nodeIbStasher.delay(ppDelay()) slow_node.nodeIbStasher.delay(pDelay()) slow_node.nodeIbStasher.delay(cDelay()) sdk_send_upgrade(looper, sdk_pool_handle, sdk_wallet_trustee, validUpgradeExpForceTrue) looper.run( eventually(checkUpgradeScheduled, [slow_node], validUpgradeExpForceTrue[VERSION], retryWait=1, timeout=waits.expectedUpgradeScheduled()))
def test_view_change_with_different_prepare_certificate( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that a node without pre-prepare but with quorum of prepares wouldn't use this transaction as a last in prepare certificate """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_node = txnPoolNodeSet[-1] # delay preprepares and message response with preprepares. with delay_rules(slow_node.nodeIbStasher, ppDelay(delay=sys.maxsize)): with delay_rules( slow_node.nodeIbStasher, msg_rep_delay(delay=sys.maxsize, types_to_delay=[ PREPREPARE, ])): last_ordered = slow_node.master_replica.last_ordered_3pc sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) looper.run( eventually(check_prepare_certificate, txnPoolNodeSet[0:-1], last_ordered[1] + 1)) for n in txnPoolNodeSet: n.view_changer.on_master_degradation() assert slow_node.master_replica.last_prepared_certificate_in_view() == \ (0, last_ordered[1]) ensureElectionsDone(looper, txnPoolNodeSet)
def test_primary_recvs_3phase_message_outside_watermarks(perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This happens since other nodes are slow in processing its PRE-PREPARE. Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ tconf = perf_chk_patched delay = 2 instId = 0 reqs_to_send = 2 * reqs_for_logsize + 1 logger.debug('Will send {} requests'.format(reqs_to_send)) npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) orderedCount = pr.stats.get(TPCStat.OrderSent) for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) r.node.nodeIbStasher.delay(pDelay(delay, instId)) tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize) total_timeout = (tm_exec_1_batch + delay) * batch_count def chk(): assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send) looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
def test_check_cdp_pp_storages(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): def check_all_empty(replica, reverse=False): check_preprepared_empty(replica, reverse) check_prepared_empty(replica, reverse) def check_preprepared_empty(replica, reverse=False): statement_pp = bool(replica._consensus_data.preprepared) statement_pp ^= reverse assert not statement_pp def check_prepared_empty(replica, reverse=False): statement_p = bool(replica._consensus_data.prepared) statement_p ^= reverse assert not statement_p def operation_for_replicas(operation, node_set=txnPoolNodeSet, reverse=False): for node in node_set: operation(node.master_replica, reverse) node_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(node_stashers, cDelay()): with delay_rules(node_stashers, pDelay()): with delay_rules(node_stashers, ppDelay()): sdk_add_new_nym_without_waiting(looper, sdk_pool_handle, sdk_wallet_steward) looper.run(eventually(operation_for_replicas, check_all_empty, txnPoolNodeSet[1:])) looper.run(eventually(operation_for_replicas, check_preprepared_empty, txnPoolNodeSet[0:1], True)) looper.run(eventually(operation_for_replicas, check_preprepared_empty, txnPoolNodeSet, True)) looper.run(eventually(operation_for_replicas, check_prepared_empty, txnPoolNodeSet, True)) looper.run(eventually(operation_for_replicas, check_all_empty, txnPoolNodeSet, True))
def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1, wallet1): """ Send commits and prepares but delay pre-prepare such that enough prepares and commits are received, now the request should not be ordered until pre-prepare is received and ordering should just happen once, """ nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) slowRep = nonPrimReps[0] slowNode = slowRep.node slowNode.nodeIbStasher.delay(ppDelay(10, 0)) stash = [] origMethod = slowRep.processReqDigest def patched(self, msg): stash.append(msg) patchedMethod = types.MethodType(patched, slowRep) slowRep.processReqDigest = patchedMethod def chk1(): assert len(slowRep.commitsWaitingForPrepare) > 0 sendRandomRequest(wallet1, client1) looper.run(eventually(chk1, timeout=4)) for item in stash: origMethod(item) def chk2(): assert len(slowRep.commitsWaitingForPrepare) == 0 assert slowRep.spylog.count(slowRep.doOrder.__name__) == 1 looper.run(eventually(chk2, timeout=12))
def test_view_change_with_different_prepare_certificate(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that a node without pre-prepare but with quorum of prepares wouldn't use this transaction as a last in prepare certificate """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_node = txnPoolNodeSet[-1] # delay preprepares and message response with preprepares. with delay_rules(slow_node.nodeIbStasher, ppDelay(delay=sys.maxsize)): with delay_rules(slow_node.nodeIbStasher, msg_rep_delay(delay=sys.maxsize, types_to_delay=[PREPREPARE, ])): last_ordered = slow_node.master_replica.last_ordered_3pc sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) looper.run(eventually(check_prepare_certificate, txnPoolNodeSet[0:-1], last_ordered[1] + 1)) for n in txnPoolNodeSet: n.view_changer.on_master_degradation() assert slow_node.master_replica.last_prepared_certificate_in_view() == \ (0, last_ordered[1]) ensureElectionsDone(looper, txnPoolNodeSet)
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' 1. Send random request 2. Make 3 node on backup instance slow in getting prepares 3. Send random request 4. do view change 5. reset delays => we expect that all nodes and all instances have the same last ordered ''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, pDelay(instId=slow_instance)): with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(is_prepared, fast_nodes, 2, slow_instance)) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(check_last_ordered, txnPoolNodeSet, slow_instance, (txnPoolNodeSet[0].viewNo, 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def test_check_cdp_pp_storages(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): def check_all_empty(replica): assert not bool(replica._consensus_data.preprepared) assert not bool(replica._consensus_data.prepared) def check_preprepared_not_empty(replica): assert bool(replica._consensus_data.preprepared) def check_prepared_not_empty(replica): assert bool(replica._consensus_data.prepared) def operation_for_replicas(operation, node_set=txnPoolNodeSet): for node in node_set: operation(node.master_replica) node_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] with delay_rules(node_stashers, pDelay()): with delay_rules(node_stashers, ppDelay()): sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) looper.run( eventually(operation_for_replicas, check_all_empty, txnPoolNodeSet[1:])) looper.run( eventually(operation_for_replicas, check_preprepared_not_empty, txnPoolNodeSet[0:1])) looper.run( eventually(operation_for_replicas, check_preprepared_not_empty, txnPoolNodeSet)) looper.run( eventually(operation_for_replicas, check_prepared_not_empty, txnPoolNodeSet))
def test_backup_can_order_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): # We expect that after VC Gamma will be primary on backup delayed_node = txnPoolNodeSet[-2] with delay_rules_without_processing(delayed_node.nodeIbStasher, pDelay(instId=MASTER_REPLICA_INDEX), cDelay(instId=MASTER_REPLICA_INDEX), ppDelay(instId=MASTER_REPLICA_INDEX)): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], old_view_pp_request_delay()): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) assert delayed_node.replicas._replicas[BACKUP_INST_ID].isPrimary # Check, that backup cannot order sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) for n in txnPoolNodeSet: assert n.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[ 1] == 0 # Forcing catchup delayed_node.start_catchup() ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) # Check, that backup can order after catchup b_pp_seq_no_before = delayed_node.replicas._replicas[ BACKUP_INST_ID].last_ordered_3pc[1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQUEST_COUNT) assert delayed_node.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[1] == \ b_pp_seq_no_before + REQUEST_COUNT
def test_dequeue_and_validate_commits(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 50 with delay_rules(slow_node.nodeIbStasher, pDelay(delay), msg_rep_delay(delay, [PREPARE, PREPREPARE])): with delay_rules(slow_node.nodeIbStasher, ppDelay(delay)): sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1) assert not slow_node.master_replica._ordering_service.prePrepares assert not slow_node.master_replica._ordering_service.prepares assert not slow_node.master_replica._ordering_service.commits assert len(slow_node.master_replica._ordering_service. commitsWaitingForPrepare) > 0 waitNodeDataEquality(looper, slow_node, *other_nodes) assert check_if_all_equal_in_list([ n.master_replica._ordering_service.ordered for n in txnPoolNodeSet ]) assert slow_node.master_replica._ordering_service.prePrepares assert slow_node.master_replica._ordering_service.prepares assert slow_node.master_replica._ordering_service.commits assert not slow_node.master_replica._ordering_service.commitsWaitingForPrepare assert all(slow_node.master_replica.last_ordered_3pc == n.master_replica.last_ordered_3pc for n in other_nodes)
def test_commits_recvd_first(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 50 slow_node.nodeIbStasher.delay(ppDelay(delay, 0)) slow_node.nodeIbStasher.delay(pDelay(delay, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=20, num_batches=4) assert not slow_node.master_replica.prePrepares assert not slow_node.master_replica.prepares assert not slow_node.master_replica.commits assert len(slow_node.master_replica.commitsWaitingForPrepare) > 0 slow_node.reset_delays_and_process_delayeds() waitNodeDataEquality(looper, slow_node, *other_nodes) assert check_if_all_equal_in_list([n.master_replica.ordered for n in txnPoolNodeSet]) assert slow_node.master_replica.prePrepares assert slow_node.master_replica.prepares assert slow_node.master_replica.commits assert not slow_node.master_replica.commitsWaitingForPrepare
def test_primary_recvs_3phase_message_outside_watermarks(perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This happens since other nodes are slow in processing its PRE-PREPARE. Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ tconf = perf_chk_patched delay = 5 instId = 0 reqs_to_send = 2 * reqs_for_logsize + 1 logger.debug('Will send {} requests'.format(reqs_to_send)) npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) from plenum.server.replica import TPCStat orderedCount = pr.stats.get(TPCStat.OrderSent) for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) r.node.nodeIbStasher.delay(pDelay(delay, instId)) tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize) total_timeout = (tm_exec_1_batch + delay) * batch_count def chk(): assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send) looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
def testOrderingWhenPrePrepareNotReceived(looper, nodeSet, up, client1, wallet1): """ Send commits and prepares but delay pre-prepare such that enough prepares and commits are received, now the request should not be ordered until pre-prepare is received and ordering should just happen once, """ nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) slowRep = nonPrimReps[0] slowNode = slowRep.node slowNode.nodeIbStasher.delay(ppDelay(10, 0)) sendRandomRequest(wallet1, client1) stash = [] origMethod = slowRep.processReqDigest def patched(self, msg): stash.append(msg) patchedMethod = types.MethodType(patched, slowRep) slowRep.processReqDigest = patchedMethod def chk1(): assert len(slowRep.commitsWaitingForPrepare) > 0 looper.run(eventually(chk1, timeout=4)) for item in stash: origMethod(item) def chk2(): assert len(slowRep.commitsWaitingForPrepare) == 0 assert slowRep.spylog.count(slowRep.doOrder.__name__) == 1 looper.run(eventually(chk2, timeout=12))
def testElectionsAfterViewChange(delayedPerf, looper: Looper, nodeSet: TestNodeSet, up, wallet1, client1): """ Test that a primary election does happen after a view change """ # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls # and view changes nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelay(10, 0)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) # Ensure view change happened for both node and its primary elector for node in nodeSet: looper.run( eventually(partial(checkViewChangeInitiatedForNode, node, 1), retryWait=1, timeout=20)) # Ensure elections are done again and pool is setup again with appropriate # protocol instances and each protocol instance is setup properly too checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
def test_commits_recvd_first(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): slow_node = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)][-1] other_nodes = [n for n in txnPoolNodeSet if n != slow_node] delay = 50 slow_node.nodeIbStasher.delay(ppDelay(delay, 0)) slow_node.nodeIbStasher.delay(pDelay(delay, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=20, num_batches=4) assert not slow_node.master_replica.prePrepares assert not slow_node.master_replica.prepares assert not slow_node.master_replica.commits assert len(slow_node.master_replica.commitsWaitingForPrepare) > 0 slow_node.reset_delays_and_process_delayeds() waitNodeDataEquality(looper, slow_node, *other_nodes) assert check_if_all_equal_in_list( [n.master_replica.ordered for n in txnPoolNodeSet]) assert slow_node.master_replica.prePrepares assert slow_node.master_replica.prepares assert slow_node.master_replica.commits assert not slow_node.master_replica.commitsWaitingForPrepare
def test_stable_checkpoint_when_one_instance_slow(chkFreqPatched, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): delay = 5 pr = getPrimaryReplica(txnPoolNodeSet, 1) slowNode = pr.node otherNodes = [n for n in txnPoolNodeSet if n != slowNode] for n in otherNodes: n.nodeIbStasher.delay(ppDelay(delay, 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) timeout = waits.expectedTransactionExecutionTime( len(txnPoolNodeSet)) + delay next_checkpoint = tconf.CHK_FREQ looper.run( eventually(check_for_nodes, txnPoolNodeSet, check_stable_checkpoint, next_checkpoint, retryWait=1, timeout=timeout)) check_for_nodes(txnPoolNodeSet, check_num_unstable_checkpoints, 0)
def test_pp_obsolescence_check_fail_for_delayed(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): delay = PATCHED_ACCEPTABLE_DEVIATION_PREPREPARE_SECS + 1 lagging_node = txnPoolNodeSet[-1] # Prevent lagging node from ordering with delay_rules(lagging_node.nodeIbStasher, ppDelay(), pDelay(), cDelay()): # Order request on all nodes except lagging one sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run(asyncio.sleep(delay)) # Now delayed 3PC messages reach lagging node, so any delayed transactions # can be processed (PrePrepare would be discarded but requested after that), # ensure that all nodes will have same data after that ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) pp_count = get_count(lagging_node.master_replica, lagging_node.master_replica.processPrePrepare) assert pp_count > 0 assert get_timestamp_suspicion_count(lagging_node) == pp_count
def test_handle_delayed_preprepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ Make a node send PREPREPARE again after the slow node has ordered """ slow_node, other_nodes, primary_node, other_non_primary_nodes = \ split_nodes(txnPoolNodeSet) # This node will send PRE-PREPARE again orig_method = primary_node.handlers[PREPREPARE].serve last_pp = None def patched_method(self, msg): nonlocal last_pp last_pp = orig_method(msg) return last_pp primary_node.handlers[PREPREPARE].serve = types.MethodType(patched_method, primary_node.handlers[ PREPREPARE]) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) slow_master_replica = slow_node.master_replica count_pr_req = get_count(slow_master_replica, slow_master_replica.process_requested_pre_prepare) count_pr_tpc = get_count(slow_master_replica, slow_master_replica.processThreePhaseMsg) primary_node.sendToNodes(MessageRep(**{ f.MSG_TYPE.nm: PREPREPARE, f.PARAMS.nm: { f.INST_ID.nm: last_pp.instId, f.VIEW_NO.nm: last_pp.viewNo, f.PP_SEQ_NO.nm: last_pp.ppSeqNo }, f.MSG.nm: last_pp }), names=[slow_node.name, ]) def chk(): # `process_requested_pre_prepare` is called but # `processThreePhaseMsg` is not called assert get_count( slow_master_replica, slow_master_replica.process_requested_pre_prepare) > count_pr_req assert get_count( slow_master_replica, slow_master_replica.processThreePhaseMsg) == count_pr_tpc looper.run(eventually(chk, retryWait=1))
def test_deletion_non_forwarded_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] [behind_node.replicas.values()[1].discard_req_key(1, key) for key in behind_node.requests] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) behind_node.quorums.propagate = Quorum(len(txnPoolNodeSet) + 1) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We clear caughtup requests assert len(behind_node.requests) == 0 assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_deletion_non_forwarded_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] [behind_node.replicas.values()[1].discard_req_key(1, key) for key in behind_node.requests] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) behind_node.quorums.propagate = Quorum(len(txnPoolNodeSet) + 1) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We clear caughtup requests looper.run(eventually(lambda: assertExp(len(behind_node.requests) == 0))) assert all([len(q) == 0 for r in behind_node.replicas.values() for q in r._ordering_service.requestQueues.values()]) assert len(behind_node.clientAuthNr._verified_reqs) == 0 assert len(behind_node.requestSender) == 0
def test_freeing_forwarded_not_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules( behind_node.nodeIbStasher, chk_delay(delay=sys.maxsize, instId=behind_node.replicas.values()[-1])): with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run( eventually(node_caughtup, behind_node, count, retryWait=1)) looper.run( eventually( lambda: assertExp(len(behind_node.requests) == req_num))) # We execute caughtup requests looper.run( eventually(lambda: assertExp(len(behind_node.requests) == req_num))) assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1])
def testPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This happens since other nodes are slow in processing its PRE-PREPARE. Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ instId = 1 reqsToSend = 2*chkFreqPatched.LOG_SIZE + 1 npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) from plenum.server.replica import TPCStat orderedCount = pr.stats.get(TPCStat.OrderSent) for r in npr: r.node.nodeIbStasher.delay(ppDelay(10, instId)) def chk(): assert orderedCount + reqsToSend == pr.stats.get(TPCStat.OrderSent) print('Sending {} requests'.format(reqsToSend)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) looper.run(eventually(chk, retryWait=1, timeout=80))
def test_backup_stabilized_checkpoint_on_view_change(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # Delta:1 backup = txnPoolNodeSet[-1].replicas[1] count_of_replicas = len(txnPoolNodeSet[0].replicas) with delay_rules_without_processing( [n.nodeIbStasher for n in txnPoolNodeSet], ppDelay(instId=0)): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, REQ_COUNT) looper.run( eventually( lambda r: assertExp(r.last_ordered_3pc == (0, REQ_COUNT)), backup)) # assert that all of requests are propagated for n in txnPoolNodeSet: for req in n.requests.values(): assert req.forwardedTo == count_of_replicas ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) # check, that all requests was freed on backups for n in txnPoolNodeSet: for req in n.requests.values(): assert req.forwardedTo == count_of_replicas - 1
def test_handle_delayed_preprepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown, monkeypatch): """ Make a node send PREPREPARE again after the slow node has ordered """ slow_node, other_nodes, primary_node, other_non_primary_nodes = \ split_nodes(txnPoolNodeSet) # This node will send PRE-PREPARE again orig_method = primary_node.master_replica._message_req_service.handlers[ PREPREPARE].process_message_req handler = primary_node.master_replica._message_req_service.handlers[ PREPREPARE] last_pp = None def patched_method(self, msg): nonlocal last_pp last_pp = orig_method(msg) return last_pp handler.process_message_req = types.MethodType(patched_method, handler) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) slow_master_replica = slow_node.master_replica count_pr_req = get_count( slow_master_replica._message_req_service, slow_master_replica._message_req_service.process_message_rep) primary_node.sendToNodes(MessageRep( **{ f.MSG_TYPE.nm: PREPREPARE, f.PARAMS.nm: { f.INST_ID.nm: last_pp.instId, f.VIEW_NO.nm: last_pp.viewNo, f.PP_SEQ_NO.nm: last_pp.ppSeqNo }, f.MSG.nm: last_pp }), names=[ slow_node.name, ]) def chk(): assert get_count( slow_master_replica._message_req_service, slow_master_replica. _message_req_service.process_message_rep) > count_pr_req assert slow_master_replica._ordering_service.spylog.getLast( "_validate").result[0] == -1 looper.run(eventually(chk, retryWait=1))
def setup(request, looper, txnPoolNodeSet, client1, wallet1, client1Connected): slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_node.nodeIbStasher.delay(pDelay(100, 0)) slow_node.nodeIbStasher.delay(cDelay(100, 0)) if request.param == 'all': slow_node.nodeIbStasher.delay(ppDelay(100, 0)) return slow_node, fast_nodes
def test_node_requests_missing_preprepare_malicious(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, malicious_setup, teardown): """ A node has bad network with primary and thus loses PRE-PREPARE, it requests PRE-PREPARE from non-primaries once it has sufficient PREPAREs but one of the non-primary does not send the PRE-PREPARE """ # primary_node = get_master_primary_node(txnPoolNodeSet) # slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node # other_nodes = [n for n in txnPoolNodeSet if n != slow_node] # bad_node = [n for n in other_nodes if n != primary_node][0] # good_non_primary_node = [n for n in other_nodes if n != slow_node # and n != bad_node and n != primary_node][0] primary_node, bad_node, good_non_primary_node, slow_node, other_nodes, \ bad_method, orig_method = malicious_setup slow_node.nodeIbStasher.delay(ppDelay(300, 0)) def get_reply_count_frm(node): return sum([ 1 for entry in slow_node.spylog.getAll(slow_node.process_message_rep) if entry.params['msg'].msg_type == PREPREPARE and entry.params['frm'] == node.name ]) old_reply_count_from_bad_node = get_reply_count_frm(bad_node) old_reply_count_from_good_node = get_reply_count_frm(good_non_primary_node) old_discarded = countDiscarded(slow_node.master_replica, 'does not have ' 'expected state') sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=2) waitNodeDataEquality(looper, slow_node, *other_nodes) assert check_if_all_equal_in_list( [n.master_replica.ordered for n in txnPoolNodeSet]) assert not slow_node.master_replica.requested_pre_prepares if bad_method.__name__ == 'do_not_send': assert get_reply_count_frm(bad_node) == old_reply_count_from_bad_node else: assert countDiscarded(slow_node.master_replica, 'does not have expected state') > old_discarded assert get_reply_count_frm(good_non_primary_node) > \ old_reply_count_from_good_node slow_node.reset_delays_and_process_delayeds() bad_node.nodeMsgRouter.routes[MessageReq] = orig_method
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (2,3 - with primary backup replica) will have prepare or send preprepare on backup replicas and slow_nodes are have not and transaction will ordered on all master replicas. Check last ordered after view change and after another one request.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, pDelay(instId=slow_instance)): with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(is_prepared, fast_nodes, 2, slow_instance)) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(check_last_ordered, txnPoolNodeSet, slow_instance, (txnPoolNodeSet[0].viewNo, 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected, reqs_for_logsize): """ A node is slow in processing PRE-PREPAREs and PREPAREs such that lot of requests happen and the slow node has started getting 3 phase messages outside of it watermarks. Check that it queues up requests outside watermarks and once it has received stable checkpoint it processes more requests. It sends other nodes 3 phase messages older than their stable checkpoint so they should discard them. """ delay = 15 instId = 1 reqsToSend = reqs_for_logsize + 2 npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) slowReplica = npr[0] slowNode = slowReplica.node slowNode.nodeIbStasher.delay(ppDelay(delay, instId)) slowNode.nodeIbStasher.delay(pDelay(delay, instId)) def discardCounts(replicas, pat): counts = {} for r in replicas: counts[r.name] = countDiscarded(r, pat) return counts oldStashCount = slowReplica.spylog.count( TestReplica.stashOutsideWatermarks.__name__) oldDiscardCounts = discardCounts( [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode], 'achieved stable checkpoint') sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend, 1) timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) looper.run( eventually(checkNodeDataForEquality, slowNode, *[_ for _ in txnPoolNodeSet if _ != slowNode], retryWait=1, timeout=timeout)) newStashCount = slowReplica.spylog.count( TestReplica.stashOutsideWatermarks.__name__) assert newStashCount > oldStashCount def chk(): counts = discardCounts( [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode], 'achieved stable checkpoint') for nm, count in counts.items(): assert count > oldDiscardCounts[nm] timeout = waits.expectedNodeToNodeMessageDeliveryTime() * \ len(txnPoolNodeSet) + delay looper.run(eventually(chk, retryWait=1, timeout=timeout))
def test_node_request_preprepare(looper, txnPoolNodeSet, client1, wallet1, client1Connected, teardown): """ Node requests PRE-PREPARE only once after getting PREPAREs. """ slow_node, other_nodes, primary_node, \ other_primary_nodes = split_nodes(txnPoolNodeSet) # Drop PrePrepares and Prepares slow_node.nodeIbStasher.delay(ppDelay(300, 0)) slow_node.nodeIbStasher.delay(pDelay(300, 0)) send_reqs_batches_and_get_suff_replies(looper, wallet1, client1, 10, 5) slow_node.nodeIbStasher.drop_delayeds() slow_node.nodeIbStasher.resetDelays() old_count_req = count_requested_preprepare_req(slow_node) old_count_resp = count_requested_preprepare_resp(slow_node) def chk(increase=True): # Method is called assert count_requested_preprepare_req(slow_node) > old_count_req # Requesting Preprepare assert count_requested_preprepare_resp( slow_node) - old_count_resp == (1 if increase else 0) for pp in primary_node.master_replica.sentPrePrepares.values(): for rep in [n.master_replica for n in other_primary_nodes]: prepare = Prepare(rep.instId, pp.viewNo, pp.ppSeqNo, pp.ppTime, pp.digest, pp.stateRootHash, pp.txnRootHash ) rep.send(prepare) looper.run(eventually(chk, True, retryWait=1)) old_count_resp = count_requested_preprepare_resp(slow_node) prepare = Prepare(rep.instId, pp.viewNo, pp.ppSeqNo, pp.ppTime, pp.digest, pp.stateRootHash, pp.txnRootHash ) rep.send(prepare) looper.run(eventually(chk, False, retryWait=1)) old_count_req = count_requested_preprepare_req(slow_node) old_count_resp = count_requested_preprepare_resp(slow_node)
def make_master_replica_lag(node): # class AbysmalBox(list): # def append(self, object) -> None: # pass # # node.replicas._master_replica.inBox = AbysmalBox() node.nodeIbStasher.delay(ppDelay(1200, 0)) node.nodeIbStasher.delay(pDelay(1200, 0)) node.nodeIbStasher.delay(cDelay(1200, 0))
def test_backups_dont_order_while_reordering(txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, looper): """ This test needs to show that for now we stop ordering on backups until master in reordering state after view_change Steps: 1. Delay ordering on master replica for collecting requests to reorder after VC 2. Make sure that master didn't order 3. Delay old_view_pp_request and force VC 4. Ensure that all backup replica on all nodes cannot order because primary waiting for reordering on master """ def check_pp_count(node, expected_count, inst_id=0): assert node.replicas._replicas[inst_id].last_ordered_3pc[1] == expected_count, \ "master last ordered: {}, backup_last_ordered: {}".format(node.master_replica._ordering_service.batches, node.replicas._replicas[ 1]._ordering_service.batches) # We expect that after VC Gamma will be primary on backup delayed_node = txnPoolNodeSet[-2] fast_nodes = [n for n in txnPoolNodeSet if n != delayed_node] master_pp_seq_no_before = delayed_node.master_replica.last_ordered_3pc[1] with delay_rules_without_processing(delayed_node.nodeIbStasher, pDelay(instId=MASTER_REPLICA_INDEX), cDelay(instId=MASTER_REPLICA_INDEX), msg_req_delay(), msg_rep_delay(), ppDelay(instId=MASTER_REPLICA_INDEX)): sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, REQS_FOR_REORDERING) looper.run(eventually(check_pp_count, delayed_node, REQS_FOR_REORDERING, BACKUP_INST_ID)) assert delayed_node.master_replica.last_ordered_3pc[1] == master_pp_seq_no_before with delay_rules([n.nodeIbStasher for n in txnPoolNodeSet], old_view_pp_request_delay()): ensure_view_change(looper, txnPoolNodeSet) # check that view change is finished on all nodes looper.run(eventually(check_not_in_view_change, txnPoolNodeSet)) # check that delayed node is selected on all fast nodes but not on delayed node def check_backup_primaries(): assert delayed_node.replicas[BACKUP_INST_ID]._consensus_data.primary_name is None assert delayed_node.master_replica.last_ordered_3pc[1] == master_pp_seq_no_before assert all( n.replicas[BACKUP_INST_ID]._consensus_data.primary_name == generateName(delayed_node.name, instId=BACKUP_INST_ID) for n in fast_nodes ) looper.run(eventually(check_backup_primaries)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQS_FOR_REORDERING) for node in txnPoolNodeSet: assert node.replicas._replicas[BACKUP_INST_ID].last_ordered_3pc[1] == 0
def setup(request, looper, txnPoolNodeSet): slow_node = getNonPrimaryReplicas(txnPoolNodeSet, 0)[1].node fast_nodes = [n for n in txnPoolNodeSet if n != slow_node] # Delay catchup reply so that the test gets time to make the check, # this delay is reset after the check slow_node.nodeIbStasher.delay(cr_delay(100)) slow_node.nodeIbStasher.delay(pDelay(100, 0)) slow_node.nodeIbStasher.delay(cDelay(100, 0)) if request.param == 'all': slow_node.nodeIbStasher.delay(ppDelay(100, 0)) return slow_node, fast_nodes
def test_stabilize_checkpoint_while_unstashing_when_missing_pre_prepare( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): # Prepare nodes lagging_node = txnPoolNodeSet[-1] lagging_master_replcia = lagging_node.master_replica rest_nodes = txnPoolNodeSet[:-1] # 1. send enough requests so that just 1 is left for checkpoint stabilization sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint - 1) # 2. delay PrePrepare on 1 node so that prepares and commits will be stashed with delay_rules(lagging_node.nodeIbStasher, ppDelay()): with delay_rules(lagging_node.nodeIbStasher, msg_rep_delay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # all good nodes stabilized checkpoint looper.run( eventually(check_for_nodes, rest_nodes, check_stable_checkpoint, 5)) # bad node received checkpoints from all nodes but didn't stabilize it looper.run( eventually(check_for_nodes, [lagging_node], check_stable_checkpoint, 0)) looper.run( eventually(check_for_nodes, [lagging_node], check_received_checkpoint_votes, 5, len(rest_nodes))) # bad node has all commits and prepares for the last request stashed looper.run( eventually(lambda: assertExp( (0, CHK_FREQ) in lagging_master_replcia._ordering_service. preparesWaitingForPrePrepare and len( lagging_master_replcia._ordering_service. preparesWaitingForPrePrepare[ (0, CHK_FREQ)]) == len(rest_nodes) - 1))) looper.run( eventually(lambda: assertExp( (0, CHK_FREQ) in lagging_master_replcia._ordering_service. commitsWaitingForPrepare and len( lagging_master_replcia._ordering_service. commitsWaitingForPrepare[ (0, CHK_FREQ)]) == len(rest_nodes)))) # 3. the delayed PrePrepare is processed, and stashed prepares and commits are unstashed # checkpoint will be stabilized during unstashing, and the request will be ordered looper.run( eventually(check_for_nodes, [lagging_node], check_stable_checkpoint, 5)) waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=5)
def test_stable_checkpoint_when_one_instance_slow(chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): delay = 5 pr = getPrimaryReplica(txnPoolNodeSet, 1) slowNode = pr.node otherNodes = [n for n in txnPoolNodeSet if n != slowNode] for n in otherNodes: n.nodeIbStasher.delay(ppDelay(delay, 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + delay looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout))
def test_node_requests_missing_preprepare(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ A node has bad network with primary and thus loses PRE-PREPARE, it requests PRE-PREPARE from primary once it has sufficient PREPAREs """ slow_node, other_nodes, primary_node, other_non_primary_nodes = split_nodes( txnPoolNodeSet) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) old_count_pp = get_count( slow_node.master_replica, slow_node.master_replica._ordering_service.process_preprepare) old_count_mrq = { n.name: get_count(n, n.process_message_req) for n in other_nodes } old_count_mrp = get_count(slow_node, slow_node.process_message_rep) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=15, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) assert not slow_node.master_replica.requested_pre_prepares # `slow_node` processed PRE-PREPARE # assert get_count(slow_node.master_replica, # slow_node.master_replica._ordering_service.process_preprepare) > old_count_pp # `slow_node` did receive `MessageRep` assert get_count(slow_node, slow_node.process_message_rep) > old_count_mrp # Primary node should received `MessageReq` and other nodes shouldn't recv_reqs = set() for n in other_non_primary_nodes: if get_count(n, n.process_message_req) > old_count_mrq[n.name]: recv_reqs.add(n.name) assert get_count(primary_node, primary_node.process_message_req) > \ old_count_mrq[primary_node.name] assert len(recv_reqs) == 0 # All nodes including the `slow_node` ordered the same requests assert check_if_all_equal_in_list( [n.master_replica._ordering_service.ordered for n in txnPoolNodeSet])
def testStableCheckpointWhenOneInstanceSlow(chkFreqPatched, looper, txnPoolNodeSet, client1, wallet1, client1Connected): pr = getPrimaryReplica(txnPoolNodeSet, 1) slowNode = pr.node otherNodes = [n for n in txnPoolNodeSet if n != slowNode] for n in otherNodes: n.nodeIbStasher.delay(ppDelay(5, 1)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, CHK_FREQ, 1) looper.run( eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=15))
def testOrderingWhenPrePrepareNotReceived(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Send commits but delay pre-prepare and prepares such that enough commits are received, now the request should not be ordered until pre-prepare is received and ordering should just happen once, """ delay = 10 non_prim_reps = getNonPrimaryReplicas(txnPoolNodeSet, 0) slow_rep = non_prim_reps[0] slow_node = slow_rep.node slow_node.nodeIbStasher.delay(ppDelay(delay, 0)) slow_node.nodeIbStasher.delay(pDelay(delay, 0)) stash_pp = [] stash_p = [] orig_pp_method = slow_rep.processPrePrepare orig_p_method = slow_rep.processPrepare def patched_pp(self, msg, sender): stash_pp.append((msg, sender)) def patched_p(self, msg, sender): stash_p.append((msg, sender)) slow_rep.processPrePrepare = \ types.MethodType(patched_pp, slow_rep) slow_rep.processPrepare = \ types.MethodType(patched_p, slow_rep) def chk1(): assert len(slow_rep.commitsWaitingForPrepare) > 0 sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet)) + delay looper.run(eventually(chk1, retryWait=1, timeout=timeout)) for m, s in stash_pp: orig_pp_method(m, s) for m, s in stash_p: orig_p_method(m, s) def chk2(): assert len(slow_rep.commitsWaitingForPrepare) == 0 assert slow_rep.spylog.count(slow_rep.doOrder.__name__) == 1 timeout = waits.expectedOrderingTime(len(non_prim_reps) + 1) + 2 * delay looper.run(eventually(chk2, retryWait=1, timeout=timeout))
def test_node_requests_missing_preprepare(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ A node has bad network with primary and thus loses PRE-PREPARE, it requests PRE-PREPARE from primary once it has sufficient PREPAREs """ slow_node, other_nodes, primary_node, other_non_primary_nodes = split_nodes( txnPoolNodeSet) # Delay PRE-PREPAREs by large amount simulating loss slow_node.nodeIbStasher.delay(ppDelay(300, 0)) old_count_pp = get_count(slow_node.master_replica, slow_node.master_replica.processPrePrepare) old_count_mrq = {n.name: get_count(n, n.process_message_req) for n in other_nodes} old_count_mrp = get_count(slow_node, slow_node.process_message_rep) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=15, num_batches=5) waitNodeDataEquality(looper, slow_node, *other_nodes) assert not slow_node.master_replica.requested_pre_prepares # `slow_node` processed PRE-PREPARE assert get_count(slow_node.master_replica, slow_node.master_replica.processPrePrepare) > old_count_pp # `slow_node` did receive `MessageRep` assert get_count(slow_node, slow_node.process_message_rep) > old_count_mrp # Primary node should received `MessageReq` and other nodes shouldn't recv_reqs = set() for n in other_non_primary_nodes: if get_count(n, n.process_message_req) > old_count_mrq[n.name]: recv_reqs.add(n.name) assert get_count(primary_node, primary_node.process_message_req) > \ old_count_mrq[primary_node.name] assert len(recv_reqs) == 0 # All nodes including the `slow_node` ordered the same requests assert check_if_all_equal_in_list([n.master_replica.ordered for n in txnPoolNodeSet])
def testOneNodeAltersAClientRequest(looper, txnPoolNodeSet, evilAlpha, sdk_pool_handle, sdk_wallet_client): """Malicious Alpha node sends incorrect propagate. This test check that nodes raise InsufficientCorrectSignatures in validate this propagate""" alpha = txnPoolNodeSet[0] goodNodes = list(txnPoolNodeSet) goodNodes.remove(alpha) # delay incoming client messages for good nodes by 250 milliseconds # this gives Alpha a chance to send a propagate message for n in goodNodes: # type: TestNode n.nodeIbStasher.delay(ppDelay(sys.maxsize)) n.nodeIbStasher.delay(req_delay(sys.maxsize)) pastNodes = [] request_couple_json = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) sent1 = sdk_json_to_request_object(request_couple_json[0][0]) checkPropagated(looper, txnPoolNodeSet, sent1, faultyNodes) def check(): for node in goodNodes: if node not in pastNodes: # ensure the nodes are suspicious of Alpha params = node.spylog.getLastParams(TestNode.reportSuspiciousNode) frm = params["nodeName"] reason = params["reason"] assert frm == 'Alpha' assert reason == InsufficientCorrectSignatures.reason.format(0, 1) # ensure Alpha's propagates were ignored by the other nodes key = sent1.digest props = node.requests[key].propagates assert 'Alpha' not in props for good in goodNodes: assert good.name in props pastNodes.append(node) for node in goodNodes: node.nodeIbStasher.resetDelays() timeout = waits.expectedClientRequestPropagationTime(len(txnPoolNodeSet)) looper.run(eventually(check, retryWait=1, timeout=timeout))
def test_freeing_forwarded_not_preprepared_request( looper, chkFreqPatched, reqs_for_checkpoint, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): behind_node = txnPoolNodeSet[-1] behind_node.requests.clear() sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, CHK_FREQ, CHK_FREQ) with delay_rules(behind_node.nodeIbStasher, ppDelay(delay=sys.maxsize), pDelay(delay=sys.maxsize), cDelay(delay=sys.maxsize)): count = behind_node.spylog.count(behind_node.allLedgersCaughtUp) sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, req_num, req_num) looper.run(eventually(node_caughtup, behind_node, count, retryWait=1)) # We execute caughtup requests assert len(behind_node.requests) == req_num assert all(r.executed for r in behind_node.requests.values() if behind_node.seqNoDB.get(r.request.key)[1])
def test_view_not_changed(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Test that a view change is not done when the performance of master does not go down """ """ Send multiple requests to the client and delay some requests by all backup instances to ensure master instance is always faster than backup instances and there is no view change """ # Delay PRE-PREPARE for all backup protocol instances so master performs # better for i in range(1, F + 1): nonPrimReps = getNonPrimaryReplicas(txnPoolNodeSet, i) # type: Iterable[TestReplica] for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelay(10, i)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=0)
def testViewNotChanged(looper: Looper, nodeSet: TestNodeSet, up, wallet1, client1): """ Test that a view change is not done when the performance of master does not go down """ """ Send multiple requests to the client and delay some requests by all backup instances to ensure master instance is always faster than backup instances and there is no view change """ # Delay PRE-PREPARE for all backup protocol instances so master performs # better for i in range(1, F + 1): nonPrimReps = getNonPrimaryReplicas(nodeSet, i) # type: Iterable[TestReplica] for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelay(10, i)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) checkViewNoForNodes(nodeSet, 0)
def testElectionsAfterViewChange(delayedPerf, looper: Looper, nodeSet: TestNodeSet, up, wallet1, client1): """ Test that a primary election does happen after a view change """ # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls # and view changes nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelay(10, 0)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4) # Ensure view change happened for both node and its primary elector for node in nodeSet: looper.run(eventually(partial(checkViewChangeInitiatedForNode, node, 1), retryWait=1, timeout=20)) # Ensure elections are done again and pool is setup again with appropriate # protocol instances and each protocol instance is setup properly too checkProtocolInstanceSetup(looper, nodeSet, retryWait=1, timeout=30)
def slow_non_primary(nodes, inst_id=0, delay=5): # make non-primary replica slow to receive PRE-PREPAREs npr = getNonPrimaryReplicas(nodes, inst_id)[0] slow_node = npr.node slow_node.nodeIbStasher.delay(ppDelay(delay, inst_id)) return npr
def test_node_request_preprepare(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, teardown): """ Node requests PRE-PREPARE only once after getting PREPAREs. """ slow_node, other_nodes, primary_node, \ other_primary_nodes = split_nodes(txnPoolNodeSet) # Drop PrePrepares and Prepares slow_node.nodeIbStasher.delay(ppDelay(300, 0)) slow_node.nodeIbStasher.delay(pDelay(300, 0)) sdk_send_batches_of_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=10, num_batches=5) slow_node.nodeIbStasher.drop_delayeds() slow_node.nodeIbStasher.resetDelays() old_count_req = count_requested_preprepare_req(slow_node) old_count_resp = count_requested_preprepare_resp(slow_node) def chk(increase=True): # Method is called assert count_requested_preprepare_req(slow_node) > old_count_req # Requesting Preprepare assert count_requested_preprepare_resp( slow_node) - old_count_resp == (1 if increase else 0) for pp in primary_node.master_replica.sentPrePrepares.values(): for rep in [n.master_replica for n in other_primary_nodes]: prepare = Prepare(rep.instId, pp.viewNo, pp.ppSeqNo, pp.ppTime, pp.digest, pp.stateRootHash, pp.txnRootHash ) rep.send(prepare) looper.run(eventually(chk, True, retryWait=1)) old_count_resp = count_requested_preprepare_resp(slow_node) prepare = Prepare(rep.instId, pp.viewNo, pp.ppSeqNo, pp.ppTime, pp.digest, pp.stateRootHash, pp.txnRootHash ) rep.send(prepare) looper.run(eventually(chk, False, retryWait=1)) old_count_req = count_requested_preprepare_req(slow_node) old_count_resp = count_requested_preprepare_resp(slow_node)
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, wallet1, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view """ f = getMaxFailures(nodeCount) # Delay processing of instance change on a node nodeA = nodeSet.Alpha nodeA.nodeIbStasher.delay(icDelay(60)) nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls and view changes ppDelayer = ppDelay(5, 0) for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelayer) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 4, timeoutPerReq=5 * nodeCount) # Every node except Node A should have a view change for node in nodeSet: if node.name != nodeA.name: looper.run(eventually( partial(checkViewChangeInitiatedForNode, node, 1), retryWait=1, timeout=20)) # Node A's view should not have changed yet with pytest.raises(AssertionError): looper.run(eventually(partial( checkViewChangeInitiatedForNode, nodeA, 1), retryWait=1, timeout=20)) # NodeA should not have any pending 3 phase request for a later view for r in nodeA.replicas: # type: TestReplica assert len(r.threePhaseMsgsForLaterView) == 0 # Reset delays on incoming messages from all nodes for node in nodeSet: node.nodeIbStasher.nodelay(ppDelayer) # Send one more request sendRandomRequest(wallet1, client1) def checkPending3PhaseReqs(): # Get all replicas that have their primary status decided reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] # Atleast one replica should have its primary status decided assert len(reps) > 0 for r in reps: # type: TestReplica logger.debug("primary status for replica {} is {}" .format(r, r.primaryNames)) assert len(r.threePhaseMsgsForLaterView) > 0 # NodeA should now have pending 3 phase request for a later view looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))
def make_master_replica_lag(node): node.nodeIbStasher.delay(ppDelay(1200, 0)) node.nodeIbStasher.delay(pDelay(1200, 0)) node.nodeIbStasher.delay(cDelay(1200, 0))
def test_non_primary_recvs_3phase_message_outside_watermarks( chkFreqPatched, reqs_for_logsize, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A node is slow in receiving PRE-PREPAREs and PREPAREs. A lot of requests are sent and the slow node has started receiving COMMITs outside of its watermarks and so stashes them. Also this node is slow in receiving CHECKPOINTs. So a catch-up does not occur on it. Then the slow node eventually receives the sent PRE-PREPAREs and PREPAREs and so orders the 3PC-batches between its watermarks. The other nodes discard the COMMITs from the slow node since they have already achieved stable checkpoints for these COMMITs. After that the slow node eventually receives the sent CHECKPOINTs from the other nodes and so stabilizes own completed checkpoints and updates its watermarks. A catch-up is not triggered because no received checkpoints are stashed. Since now the watermarks have been updated, the slow node processes 3PC-messages stashed earlier and its ledger becomes equal to the ledgers of the other nodes. """ backupInstId = 1 npr = getNonPrimaryReplicas(txnPoolNodeSet, backupInstId) slowReplica = npr[0] slowNode = slowReplica.node slowNode.nodeIbStasher.delay(ppDelay(300, backupInstId)) slowNode.nodeIbStasher.delay(pDelay(300, backupInstId)) slowNode.nodeIbStasher.delay(chk_delay(300)) initialDomainLedgerSize = slowNode.domainLedger.size oldStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__) # 1. Send requests more than fit between the watermarks on the slow node sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize + 2) # Verify that the slow node stashes the batches outside of its watermarks newStashCount = slowReplica.spylog.count(TestReplica.stashOutsideWatermarks.__name__) assert newStashCount > oldStashCount oldDiscardCounts = discardCounts([n.replicas[backupInstId] for n in txnPoolNodeSet if n != slowNode], 'achieved stable checkpoint') # 2. Deliver the sent PREPREPAREs and PREPAREs to the slow node slowNode.nodeIbStasher.reset_delays_and_process_delayeds(PREPREPARE, PREPARE) # Verify that the slow node orders the 3PC-batches between its watermarks # but no more. looper.runFor(waits.expectedTransactionExecutionTime(len(txnPoolNodeSet))) checkNodeDataForInequality(slowNode, *[n for n in txnPoolNodeSet if n != slowNode]) assert slowNode.domainLedger.size - initialDomainLedgerSize == reqs_for_logsize # Also verify that the other nodes discard the COMMITs from the slow node # since they have already achieved stable checkpoints for these COMMITs. counts = discardCounts( [n.replicas[backupInstId] for n in txnPoolNodeSet if n != slowNode], 'achieved stable checkpoint') for nm, count in counts.items(): assert count > oldDiscardCounts[nm] oldCatchupTimes = slowNode.spylog.count(Node.start_catchup) # 3. Deliver the sent CHECKPOINTs to the slow node slowNode.nodeIbStasher.reset_delays_and_process_delayeds(CHECKPOINT) # Verify that the slow node processes 3PC-messages stashed earlier and its # ledger becomes equal to the ledgers of the other nodes while a catch-up # is not made. waitNodeDataEquality(looper, slowNode, *[n for n in txnPoolNodeSet if n != slowNode]) assert slowNode.domainLedger.size - initialDomainLedgerSize == reqs_for_logsize + 2 newCatchupTimes = slowNode.spylog.count(Node.start_catchup) assert newCatchupTimes == oldCatchupTimes
def delaysPrePrepareProcessing(node, delay: float=30, instId: int=None): node.nodeIbStasher.delay(ppDelay(delay=delay, instId=instId))