def setup(looper, tconf, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) P = getPrimaryReplica(txnPoolNodeSet) # set LAMBDA smaller than the production config to make the test faster testLambda = 10 delay_by = 2 * testLambda old_view_nos = set([n.viewNo for n in txnPoolNodeSet]) assert len(old_view_nos) == 1 old_view_no = old_view_nos.pop() for node in txnPoolNodeSet: # Make `Delta` small enough so throughput check passes. node.monitor.Delta = .001 node.monitor.Lambda = testLambda for r in node.replicas.values(): r.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS += delay_by # make P (primary replica on master) faulty, i.e., slow to send # PRE-PREPARE the next def specificPrePrepare(msg): if isinstance(msg, PrePrepare): return delay_by # just more that LAMBDA P.outBoxTestStasher.delay(specificPrePrepare) # TODO select or create a timeout for this case in 'waits' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5, customTimeoutPerReq=tconf.TestRunningTimeLimitSec) return adict(nodes=txnPoolNodeSet, old_view_no=old_view_no)
def test_lagged_checkpoint_completion(chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One node in a pool lags to order the last 3PC-batch in a checkpoint so that when it eventually orders this 3PC-batch and thus completes the checkpoint it has already received and stashed the corresponding checkpoint messages from all the other nodes. The test verifies that the node successfully processes the stashed checkpoint messages and stabilizes the checkpoint. """ slow_node = txnPoolNodeSet[-1] # All the nodes in the pool normally orders all the 3PC-batches in a # checkpoint except the last 3PC-batch. The last 3PC-batch in the # checkpoint is ordered by all the nodes except one slow node because this # node lags to receive Commits. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) slow_node.nodeIbStasher.delay(cDelay()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # All the other nodes complete the checkpoint and send Checkpoint messages # to others. The slow node receives and stashes these messages because it # has not completed the checkpoint. def check(): for replica in slow_node.replicas.values(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 4 assert replica.checkpoints[(1, 5)].digest is None assert replica.checkpoints[(1, 5)].isStable is False assert len(replica.stashedRecvdCheckpoints) == 1 assert 0 in replica.stashedRecvdCheckpoints assert len(replica.stashedRecvdCheckpoints[0]) == 1 assert (1, 5) in replica.stashedRecvdCheckpoints[0] assert len(replica.stashedRecvdCheckpoints[0][(1, 5)]) == \ len(txnPoolNodeSet) - 1 stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(check, timeout=stabilization_timeout)) # Eventually the slow node receives Commits, orders the last 3PC-batch in # the checkpoint and thus completes it, processes the stashed checkpoint # messages and stabilizes the checkpoint. slow_node.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedOrderingTime(len(txnPoolNodeSet))) for replica in slow_node.replicas.values(): assert len(replica.checkpoints) == 1 assert (1, 5) in replica.checkpoints assert replica.checkpoints[(1, 5)].seqNo == 5 assert replica.checkpoints[(1, 5)].digest is not None assert replica.checkpoints[(1, 5)].isStable is True assert len(replica.stashedRecvdCheckpoints) == 0
def test_primary_receives_delayed_prepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Primary gets all PREPAREs after COMMITs """ delay = 50 primary_node = get_master_primary_node(txnPoolNodeSet) other_nodes = [n for n in txnPoolNodeSet if n != primary_node] primary_node.nodeIbStasher.delay(pDelay(delay, 0)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=10) for node in other_nodes: assert node.master_replica.prePrepares assert node.master_replica.prepares assert node.master_replica.commits assert primary_node.master_replica.sentPrePrepares assert not primary_node.master_replica.prepares assert primary_node.master_replica.commits
def testNodeCatchupAfterDisconnect(sdk_new_node_caught_up, txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns): """ A node that disconnects after some transactions should eventually get the transactions which happened while it was disconnected :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns logger.debug("Disconnecting node {} with pool ledger size {}". format(new_node, new_node.poolManager.txnSeqNo)) disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, new_node, stopNode=False) # TODO: Check if the node has really stopped processing requests? logger.debug("Sending requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 5) # Make sure new node got out of sync waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1]) logger.debug("Connecting the stopped node, {}".format(new_node)) reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node) logger.debug("Waiting for the node to catch up, {}".format(new_node)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) logger.debug("Sending more requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 10) checkNodeDataForEquality(new_node, *txnPoolNodeSet[:-1])
def test_non_primary_accepts_pre_prepare_time(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One of the non-primary has an in-correct clock so it thinks PRE-PREPARE has incorrect time """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=2) # send_reqs_to_nodes_and_verify_all_replies(looper, wallet1, client1, 2) # The replica having the bad clock confused_npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1] make_clock_faulty(confused_npr.node) old_acceptable_rvs = getAllReturnVals( confused_npr, confused_npr.is_pre_prepare_time_acceptable) old_susp_count = get_timestamp_suspicion_count(confused_npr.node) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=2) assert get_timestamp_suspicion_count(confused_npr.node) > old_susp_count new_acceptable_rvs = getAllReturnVals( confused_npr, confused_npr.is_pre_prepare_time_acceptable) # `is_pre_prepare_time_acceptable` first returned False then returned True assert [True, False, *old_acceptable_rvs] == new_acceptable_rvs
def testNodeDoesNotParticipateUntilCaughtUp(txnPoolNodeSet, nodes_slow_to_process_catchup_reqs, sdk_node_created_after_some_txns): """ A new node that joins after some transactions should stash new transactions until it has caught up :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_created_after_some_txns txnPoolNodeSet.append(new_node) old_nodes = txnPoolNodeSet[:-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 5) chk_commits_prepares_recvd(0, old_nodes, new_node) for node in old_nodes: node.reset_delays_and_process_delayeds() timeout = waits.expectedPoolCatchupTime(len(txnPoolNodeSet)) + \ catchup_delay + \ waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=timeout) waitNodeDataEquality(looper, new_node, *old_nodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 2) # Commits and Prepares are received by all old nodes with pytest.raises(AssertionError): # Since nodes discard 3PC messages for already ordered requests. chk_commits_prepares_recvd(0, old_nodes, new_node) waitNodeDataEquality(looper, new_node, *old_nodes)
def test_not_check_if_no_new_requests(perf_chk_patched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Checks that node does not do performance check if there were no new requests since previous check """ # Ensure that nodes participating, because otherwise they do not do check for node in txnPoolNodeSet: assert node.isParticipating # Check that first performance checks passes, but further do not for node in txnPoolNodeSet: assert node.checkPerformance() is not None assert node.checkPerformance() is None assert node.checkPerformance() is None assert node.checkPerformance() is None # Send new request and check that after it nodes can do # performance check again num_requests = 1 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_requests) for node in txnPoolNodeSet: assert node.checkPerformance() is not None
def test_ordering_with_nodes_have_not_bls_key_proofs(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, sdk_wallet_client, monkeypatch, validate_bls_signature_without_key_proof): ''' Add BLS key without BLS key proof for all nodes. Test that when VALIDATE_BLS_SIGNATURE_WITHOUT_KEY_PROOF = False node does not use key sent without proof and transaction can not be ordered. And with VALIDATE_BLS_SIGNATURE_WITHOUT_KEY_PROOF = True transaction successfully ordered. ''' for n in txnPoolNodeSet: monkeypatch.setattr(n.poolManager.reqHandler, 'doStaticValidation', lambda req: True) for node_index in range(0, len(txnPoolNodeSet)): update_bls_keys_no_proof(node_index, sdk_wallet_stewards, sdk_pool_handle, looper, txnPoolNodeSet) monkeypatch.undo() if validate_bls_signature_without_key_proof: sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[3], 1) else: with pytest.raises(PoolLedgerTimeoutException): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[3], 1)
def test_restart_majority_to_same_view(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, sdk_pool_handle, sdk_wallet_client): # Add transaction to ledger sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) majority = txnPoolNodeSet[:3] minority = txnPoolNodeSet[3:] # Restart majority group tm = tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(txnPoolNodeSet)) majority_before_restart = majority.copy() restart_nodes(looper, txnPoolNodeSet, majority, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False, wait_for_elections=False) ensureElectionsDone(looper, majority, instances_list=range(2)) # Check that nodes in minority group are aware that they might have inconsistent 3PC state for node in minority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 1 # Check that nodes in majority group didn't think they might have inconsistent 3PC state for node in majority_before_restart: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Check that nodes in majority group don't think they might have inconsistent 3PC state for node in majority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Restart minority group restart_nodes(looper, txnPoolNodeSet, minority, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False) # Check that all nodes are still functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_primary_recvs_3phase_message_outside_watermarks(perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This happens since other nodes are slow in processing its PRE-PREPARE. Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ tconf = perf_chk_patched delay = 2 instId = 0 reqs_to_send = 2 * reqs_for_logsize + 1 logger.debug('Will send {} requests'.format(reqs_to_send)) npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) orderedCount = pr.stats.get(TPCStat.OrderSent) for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) r.node.nodeIbStasher.delay(pDelay(delay, instId)) tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize) total_timeout = (tm_exec_1_batch + delay) * batch_count def chk(): assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send) looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
def test_view_change_with_different_prepare_certificate(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check that a node without pre-prepare but with quorum of prepares wouldn't use this transaction as a last in prepare certificate """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_node = txnPoolNodeSet[-1] # delay preprepares and message response with preprepares. with delay_rules(slow_node.nodeIbStasher, ppDelay(delay=sys.maxsize)): with delay_rules(slow_node.nodeIbStasher, msg_rep_delay(delay=sys.maxsize, types_to_delay=[PREPREPARE, ])): last_ordered = slow_node.master_replica.last_ordered_3pc sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) looper.run(eventually(check_prepare_certificate, txnPoolNodeSet[0:-1], last_ordered[1] + 1)) for n in txnPoolNodeSet: n.view_changer.on_master_degradation() assert slow_node.master_replica.last_prepared_certificate_in_view() == \ (0, last_ordered[1]) ensureElectionsDone(looper, txnPoolNodeSet)
def test_get_last_ordered_timestamp_after_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): node_to_disconnect = txnPoolNodeSet[-1] reply_before = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] looper.runFor(2) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet[:-1]) ts_from_state = node_to_disconnect.master_replica._get_last_timestamp_from_state(DOMAIN_LEDGER_ID) assert ts_from_state == get_txn_time(reply['result']) assert ts_from_state != get_txn_time(reply_before['result'])
def test_delay_commits(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tconf): """ #3 Test case: disable normal view change to make tests deterministic delay commits for all nodes except node X send request check ordered transaction in node X start view_change check end of view change for all nodes switch off commits' delay get reply (means that request was ordered in all nodes) repeat Expected result with correct view change: transactions should be ordered normally Expected result with current view change: node X can't finish second transaction """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) nodes_stashers = [n.nodeIbStasher for n in txnPoolNodeSet if n != txnPoolNodeSet[-1]] for _ in range(2): do_view_change_with_delayed_commits_on_all_but_one(txnPoolNodeSet, nodes_stashers, txnPoolNodeSet[-1], looper, sdk_pool_handle, sdk_wallet_client)
def test_new_node_accepts_chosen_primary( txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns): looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_set_with_node_added_after_some_txns logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) # here we must have view_no = 4 # - current primary is Alpha (based on node registry before new node joined) # - but new node expects itself as primary basing # on updated node registry # -> new node doesn't verify current primary assert not new_node.view_changer._primary_verified # -> new node haven't received ViewChangeDone from the expected primary # (self VCHD message is registered when node sends it, not the case # for primary propagate logic) assert not new_node.view_changer.has_view_change_from_primary # -> BUT new node understands that no view change actually happens assert new_node.view_changer._is_propagated_view_change_completed logger.debug("Send requests to ensure that pool is working properly, " "viewNo: {}".format(txnPoolNodeSet[0].viewNo)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 3) logger.debug("Ensure nodes data equality".format(txnPoolNodeSet[0].viewNo)) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def test_resend_instance_change_messages(looper, txnPoolNodeSet, tconf, sdk_wallet_steward, sdk_pool_handle): primary_node = txnPoolNodeSet[0] old_view_no = checkViewNoForNodes(txnPoolNodeSet, 0) assert primary_node.master_replica.isPrimary for n in txnPoolNodeSet: n.nodeIbStasher.delay(icDelay(3 * tconf.INSTANCE_CHANGE_TIMEOUT)) assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {0} disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_node, stopNode=False) txnPoolNodeSet.remove(primary_node) looper.run(eventually(partial(check_count_connected_node, txnPoolNodeSet, 4), timeout=5, acceptableExceptions=[AssertionError])) looper.runFor(2*tconf.INSTANCE_CHANGE_TIMEOUT) assert set([n.view_changer.instance_change_rounds for n in txnPoolNodeSet]) == {1} looper.runFor(tconf.INSTANCE_CHANGE_TIMEOUT) looper.run(eventually(partial(checkViewNoForNodes, txnPoolNodeSet, expectedViewNo=old_view_no + 1), timeout=tconf.VIEW_CHANGE_TIMEOUT)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_not_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) primary_on_backup = txnPoolNodeSet[2] assert primary_on_backup.replicas._replicas[1].isPrimary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_on_backup, stopNode=True) looper.removeProdable(primary_on_backup) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, LOG_SIZE) restarted_node = start_stopped_node(primary_on_backup, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[2] = restarted_node ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT) assert restarted_node.replicas._replicas[1].isPrimary assert restarted_node.replicas._replicas[1].h == 0 assert restarted_node.replicas._replicas[1].H == LOG_SIZE
def test_propagate_of_ordered_request_doesnt_stash_requests_in_authenticator( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): # Universal delayer def stopAll(msg): return 100000 def check_verified_req_list_is_empty(): for node in txnPoolNodeSet: assert len(node.clientAuthNr._verified_reqs) == 0 # Order one request while cutting off last node lastNode = txnPoolNodeSet[-1] with delay_rules(lastNode.nodeIbStasher, stopAll), \ delay_rules(lastNode.clientIbStasher, stopAll): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_propagates = [n.spylog.count('processPropagate') for n in txnPoolNodeSet] def check_more_propagates_delivered(): new_propagates = [n.spylog.count('processPropagate') for n in txnPoolNodeSet] assert all(old < new for old, new in zip(old_propagates, new_propagates)) # Wait until more propagates are delivered to all nodes looper.run(eventually(check_more_propagates_delivered)) # Make sure that verified req list will be empty eventually looper.run(eventually(check_verified_req_list_is_empty))
def test_replicas_prepare_time(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): # Check that each replica's PREPARE time is same as the PRE-PREPARE time sent_batches = 5 for i in range(sent_batches): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=2) looper.runFor(1) for node in txnPoolNodeSet: for r in node.replicas.values(): rec_prps = defaultdict(list) for p in recvd_prepares(r): rec_prps[(p.viewNo, p.ppSeqNo)].append(p) pp_coll = r.sentPrePrepares if r.isPrimary else r.prePrepares for key, pp in pp_coll.items(): for p in rec_prps[key]: assert pp.ppTime == p.ppTime # `last_accepted_pre_prepare_time` is the time of the last PRE-PREPARE assert r.last_accepted_pre_prepare_time == pp_coll.peekitem(-1)[ 1].ppTime # The ledger should store time for each txn and it should be same # as the time for that PRE-PREPARE if r.isMaster: for iv in node.txn_seq_range_to_3phase_key[DOMAIN_LEDGER_ID]: three_pc_key = iv.data for seq_no in range(iv.begin, iv.end): assert get_txn_time(node.domainLedger.getBySeqNo(seq_no))\ == pp_coll[three_pc_key].ppTime
def testPostingThroughput(postingStatsEnabled, decreasedMonitoringTimeouts, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): config = decreasedMonitoringTimeouts reqCount = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqCount) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) looper.runFor(WIND_SIZE * MIN_CNT) for node in txnPoolNodeSet: assert node.monitor.highResThroughput > 0 assert node.monitor.totalRequests == reqCount # TODO: Add implementation to actually call firebase plugin # and test if firebase plugin is sending total request count # if node is primary looper.runFor(config.DashboardUpdateFreq) for node in txnPoolNodeSet: node.monitor.spylog.count(Monitor.sendThroughput.__name__) > 0
def test_restart_clientstack_before_reply_on_3_of_4_nodes(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): orig_send_reply = TestNode.sendReplyToClient def send_after_restart(self, reply, reqKey): self.restart_clientstack() orig_send_reply(self, reply, reqKey) def patch_sendReplyToClient(): for node in txnPoolNodeSet[:3]: node.sendReplyToClient = types.MethodType(send_after_restart, node) def revert_origin_back(): for node in txnPoolNodeSet: node.sendReplyToClient = types.MethodType(orig_send_reply, node) patch_sendReplyToClient() sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) revert_origin_back()
def test_view_change_after_some_txns(txnPoolNodesLooper, txnPoolNodeSet, some_txns_done, testNodeClass, viewNo, # noqa sdk_pool_handle, sdk_wallet_client, node_config_helper_class, tconf, tdir, allPluginsPath, tmpdir_factory): """ Check that view change is done after processing some of txns """ ensure_view_change(txnPoolNodesLooper, txnPoolNodeSet) ensureElectionsDone(looper=txnPoolNodesLooper, nodes=txnPoolNodeSet) ensure_all_nodes_have_same_data(txnPoolNodesLooper, nodes=txnPoolNodeSet) sdk_send_random_and_check(txnPoolNodesLooper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) ensure_all_nodes_have_same_data(txnPoolNodesLooper, txnPoolNodeSet) for node in txnPoolNodeSet: txnPoolNodesLooper.removeProdable(node) node.stop() config = getConfigOnce() reload_modules_for_replay(tconf) replayable_node_class, basedirpath = get_replayable_node_class( tmpdir_factory, tdir, testNodeClass, config) print('-------------Replaying now---------------------') for node in txnPoolNodeSet: create_replayable_node_and_check(txnPoolNodesLooper, txnPoolNodeSet, node, replayable_node_class, node_config_helper_class, tconf, basedirpath, allPluginsPath)
def some_txns_done(tconf, txnPoolNodesLooper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): for i in range(math.ceil(TOTAL_TXNS / 2)): sdk_add_new_nym(txnPoolNodesLooper, sdk_pool_handle, sdk_wallet_steward, alias='testSteward' + randomString(100)) for i in range(math.floor(TOTAL_TXNS / 2)): sdk_send_random_and_check(txnPoolNodesLooper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)
def test3PCOverBatchWithThresholdReqs(tconf, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Check that 3 phase commit happens when threshold number of requests are received and propagated. :return: """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize)
def testInstChangeWithLowerRatioThanDelta(looper, step3, sdk_pool_handle, sdk_wallet_client): # from plenum.test.test_node import ensureElectionsDone # ensureElectionsDone(looper, []) sdk_send_random_and_check(looper, step3.nodes, sdk_pool_handle, sdk_wallet_client, 9) # wait for every node to run another checkPerformance waitForNextPerfCheck(looper, step3.nodes, step3.perfChecks) provoke_and_wait_for_view_change(looper, step3.nodes, 1, sdk_pool_handle, sdk_wallet_client)
def test3PCOverBatchWithLessThanThresholdReqs(tconf, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ Check that 3 phase commit happens when threshold number of requests are not received but threshold time has passed :return: """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize - 1)
def test_nodes_with_bad_clock(tconf, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ All nodes have bad clocks but they eventaully get repaired, an example of nodes being cut off from NTP server for some time or NTP sync disabled then without node restart NTP sync enabled """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=Max3PCBatchSize * 3) ledger_sizes = {node.name: node.domainLedger.size for node in txnPoolNodeSet} susp_counts = {node.name: get_timestamp_suspicion_count(node) for node in txnPoolNodeSet} for node in txnPoolNodeSet: make_clock_faulty( node, clock_slow_by_sec=node.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS + randint( 5, 15), ppr_always_wrong=False) for _ in range(5): sdk_send_random_request(looper, sdk_pool_handle, sdk_wallet_client) looper.runFor(.2) # Let some time pass looper.runFor(3) def chk(): for node in txnPoolNodeSet: # Each node raises suspicion assert get_timestamp_suspicion_count(node) > susp_counts[node.name] # Ledger does not change assert node.domainLedger.size == ledger_sizes[node.name] looper.run(eventually(chk, retryWait=1)) # Fix clocks for node in txnPoolNodeSet: def utc_epoch(self) -> int: return get_utc_epoch() node.utc_epoch = types.MethodType(utc_epoch, node) # Let some more time pass looper.runFor(3) # All nodes reply sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, count=Max3PCBatchSize * 2)
def test_ledger_status_after_txn_ordered(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # we expect last ordered 3PC is not None for Domain ledger only, as there is a txn added to Domain ledger sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) check_ledger_statuses(txnPoolNodeSet, pool_last_ordered_3pc=(None, None), domain_last_ordered_3pc=txnPoolNodeSet[0].master_last_ordered_3PC, config_last_ordered_3pc=(None, None))
def test_node_request_propagates(looper, setup, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One of node lacks sufficient propagates """ faulty_node, recv_client_requests = setup old_count_recv_ppg = get_count(faulty_node, faulty_node.processPropagate) old_count_recv_req = get_count(faulty_node, faulty_node.processRequest) old_count_request_propagates = get_count( faulty_node, faulty_node.request_propagates) def sum_of_sent_batches(): return faulty_node.replicas[0].lastPrePrepareSeqNo + \ faulty_node.replicas[1].lastPrePrepareSeqNo old_sum_of_sent_batches = sum_of_sent_batches() sent_reqs = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sent_reqs) assert get_count( faulty_node, faulty_node.processPropagate) > old_count_recv_ppg if recv_client_requests: assert get_count( faulty_node, faulty_node.processRequest) > old_count_recv_req else: assert get_count( faulty_node, faulty_node.processRequest) == old_count_recv_req # Attempt to request PROPAGATEs was made as many number of times as the # number of sent batches in both replicas since both replicas # independently request PROPAGATEs assert get_count(faulty_node, faulty_node.request_propagates) - \ old_count_request_propagates == (sum_of_sent_batches() - old_sum_of_sent_batches) requested_propagate_counts = getAllReturnVals( faulty_node, faulty_node.request_propagates) # The last attempt to request PROPAGATEs was not successful assert requested_propagate_counts[0] == 0 # The first attempt to request PROPAGATEs was successful as PROPAGATEs # were requested for all nodes assert requested_propagate_counts[1] == sent_reqs faulty_node.nodeIbStasher.reset_delays_and_process_delayeds() sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, num_reqs=4)
def changeNodeHa(looper, txnPoolNodeSet, tconf, shouldBePrimary, tdir, sdk_pool_handle, sdk_wallet_stewards, sdk_wallet_client): # prepare new ha for node and client stack subjectedNode = None node_index = None for nodeIndex, n in enumerate(txnPoolNodeSet): if shouldBePrimary == n.has_master_primary: subjectedNode = n node_index = nodeIndex break nodeStackNewHA, clientStackNewHA = genHa(2) logger.debug("change HA for node: {} to {}".format( subjectedNode.name, (nodeStackNewHA, clientStackNewHA))) # change HA sdk_wallet_steward = sdk_wallet_stewards[node_index] node_dest = hexToFriendly(subjectedNode.nodestack.verhex) sdk_send_update_node(looper, sdk_wallet_steward, sdk_pool_handle, node_dest, subjectedNode.name, nodeStackNewHA[0], nodeStackNewHA[1], clientStackNewHA[0], clientStackNewHA[1], services=[VALIDATOR]) # stop node for which HA will be changed subjectedNode.stop() looper.removeProdable(subjectedNode) # start node with new HA config_helper = PNodeConfigHelper(subjectedNode.name, tconf, chroot=tdir) restartedNode = TestNode(subjectedNode.name, config_helper=config_helper, config=tconf, ha=nodeStackNewHA, cliha=clientStackNewHA) looper.add(restartedNode) txnPoolNodeSet[nodeIndex] = restartedNode looper.run(checkNodesConnected(txnPoolNodeSet, customTimeout=70)) electionTimeout = waits.expectedPoolElectionTimeout( nodeCount=len(txnPoolNodeSet), numOfReelections=3) ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1, customTimeout=electionTimeout) sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 8)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_recover_stop_primaries_no_view_change(looper, checkpoint_size, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_steward): """ Test that we can recover after having more than f nodes disconnected: - send txns - stop current master primary - restart current master primary - send txns """ active_nodes = list(txnPoolNodeSet) assert 4 == len(active_nodes) initial_view_no = active_nodes[0].viewNo logger.info("send at least one checkpoint") assert nodes_do_not_have_checkpoints(*active_nodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2 * checkpoint_size) assert nodes_have_checkpoints(*active_nodes) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Stop first node (current Primary)") stopped_node, active_nodes = stop_primary(looper, active_nodes) logger.info("Restart the primary node") restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath) assert nodes_do_not_have_checkpoints(restarted_node) assert nodes_have_checkpoints(*active_nodes) active_nodes = active_nodes + [restarted_node] logger.info("Check that primary selected") ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=0) ensure_all_nodes_have_same_data( looper, nodes=active_nodes, exclude_from_check=['check_last_ordered_3pc_backup']) logger.info("Check if the pool is able to process requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 10 * checkpoint_size) ensure_all_nodes_have_same_data( looper, nodes=active_nodes, exclude_from_check=['check_last_ordered_3pc_backup']) assert nodes_have_checkpoints(*active_nodes)
def test_catchup_with_lost_ledger_status(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath, monkeypatch, lost_count): '''Skip processing of lost_count Message Responses with LEDGER STATUS in catchup; test makes sure that the node eventually finishes catchup''' node_to_disconnect = txnPoolNodeSet[-1] def unpatch_after_call(status, frm): global call_count call_count += 1 if call_count >= lost_count: # unpatch processLedgerStatus after lost_count calls monkeypatch.undo() call_count = 0 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) nodeHa, nodeCHa = HA(*node_to_disconnect.nodestack.ha), HA( *node_to_disconnect.clientstack.ha) config_helper = PNodeConfigHelper(node_to_disconnect.name, tconf, chroot=tdir) node_to_disconnect = TestNode(node_to_disconnect.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) # patch processLedgerStatus monkeypatch.setattr(node_to_disconnect.ledgerManager, 'processLedgerStatus', unpatch_after_call) # add node_to_disconnect to pool looper.add(node_to_disconnect) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
def testPostingThroughput(postingStatsEnabled, decreasedMonitoringTimeouts, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ The throughput after `DashboardUpdateFreq` seconds and before sending any requests should be zero. Send `n` requests in less than `ThroughputWindowSize` seconds and the throughput till `ThroughputWindowSize` should consider those `n` requests. After `ThroughputWindowSize` seconds the throughput should be zero Test `totalRequests` too. """ config = decreasedMonitoringTimeouts # We are sleeping for this window size, because we need to clear previous # values that were being stored for this much time in tests looper.runFor(config.ThroughputWindowSize) reqCount = 10 for node in txnPoolNodeSet: assert node.monitor.highResThroughput == 0 assert node.monitor.totalRequests == 0 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqCount) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) for node in txnPoolNodeSet: assert len(node.monitor.orderedRequestsInLast) == reqCount assert node.monitor.highResThroughput > 0 assert node.monitor.totalRequests == reqCount # TODO: Add implementation to actually call firebase plugin # and test if firebase plugin is sending total request count # if node is primary looper.runFor(config.DashboardUpdateFreq) for node in txnPoolNodeSet: node.monitor.spylog.count(Monitor.sendThroughput.__name__) > 0 # Run for latency window duration so that `orderedRequestsInLast` # becomes empty looper.runFor(config.ThroughputWindowSize) def chk(): for node in txnPoolNodeSet: assert len(node.monitor.orderedRequestsInLast) == 0 assert node.monitor.highResThroughput == 0 assert node.monitor.totalRequests == reqCount timeout = config.ThroughputWindowSize looper.run(eventually(chk, retryWait=1, timeout=timeout))
def test_catchup_with_lost_first_consistency_proofs(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath, monkeypatch, lost_count): '''Skip processing of first lost_count CONSISTENCY_PROOFs in catchup. In this case catchup node has no quorum with f+1 CONSISTENCY_PROOFs for the longer transactions list. It need to request CONSISTENCY_PROOFs again and finishes catchup. Test makes sure that the node eventually finishes catchup''' node_to_disconnect = txnPoolNodeSet[-1] def unpatch_after_call(proof, frm): global call_count call_count += 1 if call_count >= lost_count: # unpatch processConsistencyProof after lost_count calls monkeypatch.undo() call_count = 0 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) nodeHa, nodeCHa = HA(*node_to_disconnect.nodestack.ha), HA( *node_to_disconnect.clientstack.ha) config_helper = PNodeConfigHelper(node_to_disconnect.name, tconf, chroot=tdir) node_to_disconnect = TestNode(node_to_disconnect.name, config_helper=config_helper, config=tconf, ha=nodeHa, cliha=nodeCHa, pluginPaths=allPluginsPath) # patch processConsistencyProof monkeypatch.setattr(node_to_disconnect.ledgerManager, 'processConsistencyProof', unpatch_after_call) # add node_to_disconnect to pool looper.add(node_to_disconnect) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet, exclude_from_check=['check_last_ordered_3pc_backup'])
def test_process_three_phase_msg_and_stashed_future_view( txnPoolNodeSet, looper, tconf, sdk_pool_handle, sdk_wallet_steward): """ 1. Delay ViewChangeDone messages for the slow_node. 2. Start view change on all nodes. 3. Order a new request. 4. Check that slow_node could not order this request and stashed all 3pc messages and other nodes ordered. 6. Reset delays. 7. Check that the last request is ordered on the slow_node and stashed messages were removed. """ slow_node = txnPoolNodeSet[-1] fast_nodes = txnPoolNodeSet[:-1] view_no = slow_node.viewNo old_stashed = { inst_id: r.stasher.stash_size(STASH_VIEW_3PC) for inst_id, r in slow_node.replicas.items() } with delay_rules([ slow_node.nodeIbStasher, ], msg_rep_delay(types_to_delay=[PREPREPARE, PREPARE, COMMIT])): with delay_rules([ slow_node.nodeIbStasher, ], nv_delay()): for n in txnPoolNodeSet: n.view_changer.on_master_degradation() waitForViewChange(looper, fast_nodes, expectedViewNo=view_no + 1, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper=looper, nodes=fast_nodes, instances_list=range( fast_nodes[0].requiredNumberOfInstances)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) assert slow_node.view_change_in_progress # 1 - pre-prepare msg # (len(txnPoolNodeSet) - 2) - prepare msgs # (len(txnPoolNodeSet) - 1) - commit msgs stashed_master_messages = 2 * (1 + (len(txnPoolNodeSet) - 2) + (len(txnPoolNodeSet) - 1)) assert slow_node.master_replica.stasher.stash_size( STASH_VIEW_3PC) == old_stashed[0] + stashed_master_messages def chk(): for inst_id, r in slow_node.replicas.items(): assert r.last_ordered_3pc[1] == 2 assert r.stasher.stash_size(STASH_VIEW_3PC) == 0 looper.run(eventually(chk)) waitNodeDataEquality(looper, slow_node, *fast_nodes)
def test_view_change_during_unstash(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf): slow_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] slow_stasher = slow_node.nodeIbStasher other_stashers = [n.nodeIbStasher for n in other_nodes] all_stashers = [n.nodeIbStasher for n in txnPoolNodeSet] # Preload nodes with some transactions sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) for node in txnPoolNodeSet: assert node.master_replica.last_ordered_3pc == (0, 1) # Prevent ordering of some requests start_delaying(all_stashers, delay_3pc(view_no=0, after=7, msgs=(Prepare, Commit))) # Stop ordering on slow node and send requests slow_node_after_5 = start_delaying(slow_stasher, delay_3pc(view_no=0, after=5, msgs=Commit)) slow_node_until_5 = start_delaying(slow_stasher, delay_3pc(view_no=0, after=0)) reqs_view_0 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 8) # Make pool order first 2 batches and pause pool_after_3 = start_delaying(other_stashers, delay_3pc(view_no=0, after=3)) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 3)) # Start catchup, continue ordering everywhere (except two last batches on slow node) with delay_rules(slow_stasher, cr_delay()): slow_node._do_start_catchup(just_started=False) looper.run(eventually(check_catchup_is_started, slow_node)) stop_delaying_and_process(pool_after_3) looper.run(eventually(check_nodes_ordered_till, other_nodes, 0, 7)) # Finish catchup and continue processing on slow node looper.run(eventually(check_catchup_is_finished, slow_node)) stop_delaying_and_process(slow_node_until_5) looper.run(eventually(check_nodes_ordered_till, [slow_node], 0, 5)) # Start view change and allow slow node to get remaining commits with delay_rules(all_stashers, icDelay()): trigger_view_change(txnPoolNodeSet) looper.runFor(0.1) stop_delaying_and_process(slow_node_after_5) # Ensure that expected number of requests was ordered replies = sdk_get_replies(looper, reqs_view_0) for rep in replies[:6]: sdk_check_reply(rep) # Ensure that everything is ok ensureElectionsDone(looper, txnPoolNodeSet) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_caught_up_for_current_view_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ One of the node experiences poor network and loses 3PC messages. It has to do multiple rounds of catchup to be caught up """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) bad_node = nprs[-1].node other_nodes = [n for n in txnPoolNodeSet if n != bad_node] orig_method = bad_node.master_replica.dispatchThreePhaseMsg # Bad node does not process any 3 phase messages, equivalent to messages # being lost def bad_method(self, m, s): pass bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType( bad_method, bad_node.master_replica) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize) waitNodeDataInequality(looper, bad_node, *other_nodes) # Patch all nodes to return ConsistencyProof of a smaller ledger to the # bad node but only once, so that the bad_node needs to do catchup again. make_a_node_catchup_twice(bad_node, other_nodes, DOMAIN_LEDGER_ID, Max3PCBatchSize) def is_catchup_needed_count(): return len(getAllReturnVals(bad_node, bad_node.is_catchup_needed, compare_val_to=True)) def caught_up_for_current_view_count(): return len(getAllReturnVals(bad_node, bad_node.caught_up_for_current_view, compare_val_to=True)) old_count_1 = is_catchup_needed_count() old_count_2 = caught_up_for_current_view_count() ensure_view_change(looper, txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) assert is_catchup_needed_count() > old_count_1 # The bad_node caught up due to receiving sufficient ViewChangeDone # messages assert caught_up_for_current_view_count() > old_count_2 bad_node.master_replica.dispatchThreePhaseMsg = types.MethodType( orig_method, bad_node.master_replica)
def test_receive_incorrect_catchup_request_with_catchuptill_greater_ledger_size( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): catchup_till = 100 req = CatchupReq(leger_id, 0, 10, catchup_till) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) ledger_manager = txnPoolNodeSet[0].ledgerManager ledger_manager.processCatchupReq(req, "frm") ledger_size = ledger_manager.getLedgerForMsg(req).size _check_call_discard( ledger_manager, "not able to service since " "catchupTill = {} greater than " "ledger size = {}".format(catchup_till, ledger_size))
def test_nodes_maintain_master_txn_3PC_map(looper, txnPoolNodeSet, pre_check, sdk_node_created_after_some_txns): _, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_created_after_some_txns txnPoolNodeSet.append(new_node) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) # Check the new node has set same `last_3pc_ordered` for master as others check_last_3pc_master(new_node, txnPoolNodeSet[:4]) chk_if_equal_txn_to_3pc(txnPoolNodeSet[:4]) # Requests still processed sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
def test_choose_ts_from_state(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) primary_node = get_master_primary_node(txnPoolNodeSet) excpected_ts = get_utc_epoch() + 30 req_handler = primary_node.write_manager.request_handlers[NYM][0] req_handler.database_manager.ts_store.set(excpected_ts, req_handler.state.headHash) primary_node.master_replica.last_accepted_pre_prepare_time = None reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] assert abs(excpected_ts - int(get_txn_time(reply['result']))) < 3
def test_receive_incorrect_catchup_request_with_end_greater_catchuptill( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): end = 15 catchup_till = 10 req = CatchupReq(leger_id, 0, end, catchup_till) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) ledger_manager = txnPoolNodeSet[0].ledgerManager ledger_manager.processCatchupReq(req, "frm") _check_call_discard( ledger_manager, "not able to service since " "end = {} greater than " "catchupTill = {}".format(end, catchup_till))
def test_node_load(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, capsys): client_batches = 150 txns_per_batch = 25 for i in range(client_batches): s = perf_counter() sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, txns_per_batch) with capsys.disabled(): print('{} executed {} client txns in {:.2f} seconds'. format(i + 1, txns_per_batch, perf_counter() - s))
def test_receive_incorrect_catchup_request_with_start_greater_end( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): start = 10 end = 5 req = CatchupReq(leger_id, start, end, 11) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) ledger_manager = txnPoolNodeSet[0].ledgerManager ledger_manager.processCatchupReq(req, "frm") _check_call_discard( ledger_manager, "not able to service since " "start = {} greater than " "end = {}".format(start, end))
def test_no_requests_processed_during_view_change(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): for node in txnPoolNodeSet: node.view_change_in_progress = True with pytest.raises(RequestRejectedException) as e: sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 10) assert 'Can not process requests when view change is in progress' in \ e._excinfo[1].args[0] for node in txnPoolNodeSet: check_replica_queue_empty(node)
def test_revert_pp_from_malicious(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): def raise_invalid_ex(): raise InvalidClientMessageException(1, 2, 3) malicious_primary = getPrimaryReplica(txnPoolNodeSet).node not_malicious_nodes = set(txnPoolNodeSet) - {malicious_primary} for n in not_malicious_nodes: n.doDynamicValidation = lambda *args, **kwargs: raise_invalid_ex() with pytest.raises(RequestRejectedException, match="client request invalid"): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def step1(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): startedNodes = txnPoolNodeSet """ stand up a pool of nodes and send 5 requests to client """ # the master instance has a primary replica, call it P P = getPrimaryReplica(startedNodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # profile_this(sendReqsToNodesAndVerifySuffReplies, looper, client1, 5) return adict(P=P, nodes=startedNodes, requests=requests)
def test_view_change_triggered_after_ordering(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, REQ_COUNT) current_view_no = checkViewNoForNodes(txnPoolNodeSet) trigger_view_change(txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) assert checkViewNoForNodes(txnPoolNodeSet) == current_view_no + 1
def test_ledger_status_for_new_node(looper, txnPoolNodeSet, sdk_node_created_after_some_txns): _, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet + [new_node], exclude_from_check=['check_last_ordered_3pc_backup']) # Ledger Status for Pool ledger should return not None 3PC key as # a new Node txn was ordered check_ledger_statuses(txnPoolNodeSet) # check Ledger Status on a new Node (it should contain the same last ordered 3PC as on others) check_ledger_statuses_on_node(new_node)
def testRequestFullRoundTrip(restrictiveVerifier, sdk_pool_handle, sdk_wallet_client, looper, txnPoolNodeSet): update = { 'reason': 'client request invalid: InvalidClientRequest() ' '[caused by amount too high\nassert 999 <= 100]' } with pytest.raises(RequestNackedException) as e: sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) assert 'client request invalid: InvalidClientRequest() ' '[caused by amount too high\nassert 999 <= 100]' in \ e._excinfo[1].args[0]
def simulate_slow_master(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, delay=10, num_reqs=4): m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's performance falls and view changes delayNonPrimaries(txnPoolNodeSet, 0, delay) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, num_reqs) return m_primary_node
def check_view_change_one_slow_node(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, vc_counts, slow_node_is_next_primary, delay_commit=True, delay_pre_prepare=True): current_view_no = checkViewNoForNodes(txnPoolNodeSet) expected_view_no = current_view_no + vc_counts next_primary = get_next_primary_name(txnPoolNodeSet, expected_view_no) pretenders = [ r.node for r in getNonPrimaryReplicas(txnPoolNodeSet) if not r.isPrimary ] if slow_node_is_next_primary: delayed_node = [n for n in pretenders if n.name == next_primary][0] else: delayed_node = [n for n in pretenders if n.name != next_primary][0] fast_nodes = [node for node in txnPoolNodeSet if node != delayed_node] delayers = [] if delay_pre_prepare: delayers.append(ppDelay()) delayers.append(msg_rep_delay(types_to_delay=[PREPREPARE])) if delay_commit: delayers.append(cDelay()) # delay OldViewPrePrepareReply so that slow node doesn't receive PrePrepares before ReOrdering phase finishes with delay_rules(delayed_node.nodeIbStasher, old_view_pp_reply_delay()): with delay_rules_without_processing(delayed_node.nodeIbStasher, *delayers): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) trigger_view_change(txnPoolNodeSet) if vc_counts == 2: for node in txnPoolNodeSet: node.master_replica.internal_bus.send( NodeNeedViewChange(current_view_no + 2)) waitForViewChange(looper=looper, txnPoolNodeSet=txnPoolNodeSet, expectedViewNo=expected_view_no) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) # wait till fast nodes finish re-ordering looper.run(eventually(check_has_commits, fast_nodes)) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_recover_stop_primaries(looper, checkpoint_size, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_steward): """ Test that we can recover after having more than f nodes disconnected: - stop current master primary (Alpha) - send txns - restart current master primary (Beta) - send txns """ active_nodes = list(txnPoolNodeSet) assert 4 == len(active_nodes) initial_view_no = active_nodes[0].viewNo logger.info("Stop first node (current Primary)") _, active_nodes = stop_primary(looper, active_nodes) logger.info("Make sure view changed") expected_view_no = initial_view_no + 1 waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) ensureElectionsDone(looper=looper, nodes=active_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("send at least one checkpoint") assert nodes_do_not_have_checkpoints(*active_nodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2 * checkpoint_size) assert nodes_have_checkpoints(*active_nodes) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Stop second node (current Primary) so the primary looses his state") stopped_node, active_nodes = stop_primary(looper, active_nodes) logger.info("Restart the primary node") restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath) assert nodes_do_not_have_checkpoints(restarted_node) assert nodes_have_checkpoints(*active_nodes) active_nodes = active_nodes + [restarted_node] logger.info("Check that primary selected") ensureElectionsDone(looper=looper, nodes=active_nodes, numInstances=2, customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Check if the pool is able to process requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 10 * checkpoint_size) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) assert nodes_have_checkpoints(*active_nodes)
def test_view_change_on_quorum_of_master_degraded(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, viewNo): """ Node will change view even though it does not find the master to be degraded when a quorum of nodes agree that master performance degraded """ m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's performance falls and view changes delayNonPrimaries(txnPoolNodeSet, 0, 10) pr = getPrimaryReplica(txnPoolNodeSet, 0) relucatantNode = pr.node # Count sent instance changes of all nodes sentInstChanges = {} instChngMethodName = ViewChanger.sendInstanceChange.__name__ for n in txnPoolNodeSet: sentInstChanges[n.name] = n.view_changer.spylog.count( instChngMethodName) # Node reluctant to change view, never says master is degraded relucatantNode.monitor.isMasterDegraded = types.MethodType( lambda x: False, relucatantNode.monitor) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 4) for n in txnPoolNodeSet: n.checkPerformance() # Check that view change happened for all nodes waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1) # All nodes except the reluctant node should have sent a view change and # thus must have called `sendInstanceChange` for n in txnPoolNodeSet: if n.name != relucatantNode.name: assert n.view_changer.spylog.count(instChngMethodName) > \ sentInstChanges.get(n.name, 0) else: assert n.view_changer.spylog.count(instChngMethodName) == \ sentInstChanges.get(n.name, 0) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) assert m_primary_node.name != new_m_primary_node.name ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet, looper, tdir, tconf, allPluginsPath, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client): """ A newly joined node is catching up and sends catchup requests to other nodes but one of the nodes does not reply and the newly joined node cannot complete the process till the timeout and then requests the missing transactions. """ def ignoreCatchupReq(self, req, frm): logger.info("{} being malicious and ignoring catchup request {} " "from {}".format(self, req, frm)) # One of the node does not process catchup request. npr = getNonPrimaryReplicas(txnPoolNodeSet, 0) badReplica = npr[0] badNode = badReplica.node badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType( ignoreCatchupReq, badNode.ledgerManager) more_requests = 10 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, more_requests) _, new_node = sdk_add_new_steward_and_node(looper, sdk_pool_handle, sdk_wallet_steward, 'EpsilonSteward', 'Epsilon', tdir, tconf, allPluginsPath=allPluginsPath) txnPoolNodeSet.append(new_node) looper.run(checkNodesConnected(txnPoolNodeSet)) # Since one of the nodes does not reply, this new node will experience a # timeout and retry catchup requests, hence a long test timeout. timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \ reduced_catchup_timeout_conf.CatchupTransactionsTimeout waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup']) sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1], customTimeout=timeout, exclude_from_check=['check_last_ordered_3pc_backup'])
def test_re_order_pre_prepares(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): # 1. drop Prepares and Commits on 4thNode # Order a couple of requests on Nodes 1-3 lagging_node = txnPoolNodeSet[-1] other_nodes = txnPoolNodeSet[:-1] with delay_rules_without_processing(lagging_node.nodeIbStasher, cDelay(), pDelay()): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3) assert all(n.master_last_ordered_3PC == (0, 3) for n in other_nodes) # 2. simulate view change start so that # all PrePrepares/Prepares/Commits are cleared # and uncommitted txns are reverted for n in txnPoolNodeSet: n.replicas.send_to_internal_bus(ViewChangeStarted(view_no=1)) master_ordering_service = n.master_replica._ordering_service assert not master_ordering_service.prePrepares assert not master_ordering_service.prepares assert not master_ordering_service.commits assert master_ordering_service.old_view_preprepares ledger = n.db_manager.ledgers[DOMAIN_LEDGER_ID] state = n.db_manager.states[DOMAIN_LEDGER_ID] assert len(ledger.uncommittedTxns) == 0 assert ledger.uncommitted_root_hash == ledger.tree.root_hash assert state.committedHead == state.head # 3. Simulate View Change finish to re-order the same PrePrepare assert lagging_node.master_last_ordered_3PC == (0, 0) new_master = txnPoolNodeSet[1] batches = sorted([ preprepare_to_batch_id(pp) for _, pp in new_master.master_replica. _ordering_service.old_view_preprepares.items() ]) new_view_msg = NewViewCheckpointsApplied(view_no=0, view_changes=[], checkpoint=None, batches=batches) for n in txnPoolNodeSet: n.master_replica._consensus_data.prev_view_prepare_cert = batches[ -1].pp_seq_no n.master_replica._ordering_service._bus.send(new_view_msg) # 4. Make sure that the nodes 1-3 (that already ordered the requests) sent Prepares and Commits so that # the request was eventually ordered on Node4 as well waitNodeDataEquality(looper, lagging_node, *other_nodes) assert lagging_node.master_last_ordered_3PC == (0, 4) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_prepare_in_queue_before_vc(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle): """ Test steps: 1. Sent N random requests. 2. Patching processNodeInBox method for node Delta. This method will process only not Prepare messages and store in nodeInBox queue Prepare messages 3. Sent one request and check, that all Prepares are stored in nodeInBox queue and there is quorum of it 4. Compare last_ordered_3pc_key and last_prepared_certificate. Last_prepared_certificate must be greater then last ordered 5. ppSeqNo in last_prepared_certificate must be at least as ppSeqNo for queued Prepares msgs in nodeInBox queue """ def chk_quorumed_prepares_count(prepares, count): pp_qourum = slow_node.quorums.prepare.value assert len([pp for key, pp in prepares.items() if prepares.hasQuorum(pp.msg, pp_qourum)]) == count def patched_start_view_change(self, *args, **kwargs): self.node.processNodeInBox = functools.partial(TestNode.processNodeInBox, self.node) ViewChanger.start_view_change(self, *args, **kwargs) while stashed_msgs: self.node.nodestack.rxMsgs.append(stashed_msgs.popleft()) """Send REQ_COUNT txns""" slow_node = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, REQ_COUNT) """Check that there is REQ_COUNT prepares with quorum in queue""" chk_quorumed_prepares_count(slow_node.master_replica.prepares, REQ_COUNT) """Patch processNodeInBox method for saving Prepares in nodeInBox queue""" not_processing_prepare(slow_node) """Send 1 txn""" sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, REQ_COUNT_AFTER_SLOW) chk_quorumed_prepares_count(slow_node.master_replica.prepares, REQ_COUNT) """Get last ordered 3pc key (should be (0, REQ_COUNT))""" ordered_lpc = slow_node.master_replica.last_ordered_3pc """Delay view_change_done messages""" slow_node.nodeIbStasher.delay(vcd_delay(100)) """Patch on_view_change_start method for reverting processNodeInBox method""" slow_node.view_changer.start_view_change = functools.partial(patched_start_view_change, slow_node.view_changer) """Initiate view change""" ensure_view_change(looper, txnPoolNodeSet) """Last prepared certificate should take into account Prepares in nodeInBox queue too""" expected_lpc = slow_node.master_replica.last_prepared_before_view_change assert expected_lpc == (0, 11) """Last ordered key should be less than last_prepared_before_view_change""" assert compare_3PC_keys(ordered_lpc, expected_lpc) > 0
def test_restart_majority_to_same_view(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, sdk_pool_handle, sdk_wallet_client): # Add transaction to ledger sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) majority = txnPoolNodeSet[:3] minority = txnPoolNodeSet[3:] # Restart majority group tm = tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout( len(txnPoolNodeSet)) majority_before_restart = majority.copy() restart_nodes(looper, txnPoolNodeSet, majority, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False, wait_for_elections=False) ensureElectionsDone(looper, majority, numInstances=2) # Check that nodes in minority group are aware that they might have inconsistent 3PC state for node in minority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 1 # Check that nodes in majority group didn't think they might have inconsistent 3PC state for node in majority_before_restart: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Check that nodes in majority group don't think they might have inconsistent 3PC state for node in majority: assert node.spylog.count(node.on_inconsistent_3pc_state) == 0 # Restart minority group restart_nodes(looper, txnPoolNodeSet, minority, tconf, tdir, allPluginsPath, after_restart_timeout=tm, start_one_by_one=False) # Check that all nodes are still functional sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)
def test_catch_up_after_demoted(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_wallet_client): logger.info( "1. add a new node after sending some txns and check that catch-up " "is done (the new node is up to date)") looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4]) logger.info("2. turn the new node off (demote)") node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet_handle, sdk_pool_handle, node_dest, new_node.name, None, None, None, None, services=[]) logger.info("3. send more requests, " "so that the new node's state is outdated") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) checkNodeDataForInequality(new_node, *txnPoolNodeSet[:-1]) logger.info("4. turn the new node on") sdk_send_update_node(looper, new_steward_wallet_handle, sdk_pool_handle, node_dest, new_node.name, None, None, None, None, services=[VALIDATOR]) logger.info("5. make sure catch-up is done " "(the new node is up to date again)") waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) logger.info("6. send more requests and make sure " "that the new node participates in processing them") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet_handle, 10) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def testTreeRootsCorrectAfterEachBatch(tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Check if both state root and txn tree root are correct and same on each node after each batch :return: """ # Send 1 batch sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf.Max3PCBatchSize) checkNodesHaveSameRoots(txnPoolNodeSet) # Send 2 batches sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * tconf.Max3PCBatchSize) checkNodesHaveSameRoots(txnPoolNodeSet)
def test_lagged_checkpoint_completion(chkFreqPatched, looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): """ One node in a pool lags to order the last 3PC-batch in a checkpoint so that when it eventually orders this 3PC-batch and thus completes the checkpoint it has already received and stashed the corresponding checkpoint messages from all the other nodes. The test verifies that the node successfully processes the stashed checkpoint messages and stabilizes the checkpoint. """ slow_node = txnPoolNodeSet[-1] # All the nodes in the pool normally orders all the 3PC-batches in a # checkpoint except the last 3PC-batch. The last 3PC-batch in the # checkpoint is ordered by all the nodes except one slow node because this # node lags to receive Commits. sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 4) slow_node.nodeIbStasher.delay(cDelay()) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) # All the other nodes complete the checkpoint and send Checkpoint messages # to others. The slow node receives and stashes these messages because it # has not completed the checkpoint. def check(): for replica in slow_node.replicas.values(): check_stable_checkpoint(replica, 0) check_num_unstable_checkpoints(replica, 0) check_num_received_checkpoints(replica, 1) check_received_checkpoint_votes(replica, pp_seq_no=5, num_votes=len(txnPoolNodeSet) - 1) stabilization_timeout = \ waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) looper.run(eventually(check, timeout=stabilization_timeout)) # Eventually the slow node receives Commits, orders the last 3PC-batch in # the checkpoint and thus completes it, processes the stashed checkpoint # messages and stabilizes the checkpoint. slow_node.nodeIbStasher.reset_delays_and_process_delayeds() looper.runFor(waits.expectedOrderingTime(len(txnPoolNodeSet))) for replica in slow_node.replicas.values(): check_stable_checkpoint(replica, 5) check_num_unstable_checkpoints(replica, 0) check_num_received_checkpoints(replica, 0)