def setup(looper, tconf, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) P = getPrimaryReplica(txnPoolNodeSet) # set LAMBDA smaller than the production config to make the test faster testLambda = 10 delay_by = 2 * testLambda old_view_nos = set([n.viewNo for n in txnPoolNodeSet]) assert len(old_view_nos) == 1 old_view_no = old_view_nos.pop() for node in txnPoolNodeSet: # Make `Delta` small enough so throughput check passes. node.monitor.Delta = .001 node.monitor.Lambda = testLambda for r in node.replicas.values(): r.config.ACCEPTABLE_DEVIATION_PREPREPARE_SECS += delay_by # make P (primary replica on master) faulty, i.e., slow to send # PRE-PREPARE the next def specificPrePrepare(msg): if isinstance(msg, PrePrepare): return delay_by # just more that LAMBDA P.outBoxTestStasher.delay(specificPrePrepare) # TODO select or create a timeout for this case in 'waits' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5, customTimeoutPerReq=tconf.TestRunningTimeLimitSec) return adict(nodes=txnPoolNodeSet, old_view_no=old_view_no)
def testPrimarySelectionAfterViewChange( # noqa looper, txnPoolNodeSet, primaryReplicas, catchup_complete_count): """ Test that primary replica of a protocol instance shifts to a new node after a view change. """ # TODO: This test can fail due to view change. ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) for n in txnPoolNodeSet: assert n.spylog.count( n.allLedgersCaughtUp) > catchup_complete_count[n.name] # Primary replicas before view change prBeforeVC = primaryReplicas # Primary replicas after view change instanceCount = getNoInstances(nodeCount) prAfterVC = [getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount)] # Primary replicas have moved to the next node for br, ar in zip(prBeforeVC, prAfterVC): assert ar.node.rank - br.node.rank == 1 check_rank_consistent_across_each_node(txnPoolNodeSet) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1)
def setup(txnPoolNodeSet): primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \ getNonPrimaryReplicas(txnPoolNodeSet, 0) faultyRep = nonPrimaryReps[0] makeNodeFaulty(faultyRep.node, partial(sendDuplicate3PhaseMsg, msgType=Commit, count=3, instId=0)) # The node of the primary replica above should not be blacklisted by any # other node since we are simulating multiple COMMIT messages and # want to check for a particular suspicion whitelistNode(faultyRep.node.name, [node for node in txnPoolNodeSet if node != faultyRep.node], Suspicions.DUPLICATE_CM_SENT.code) # If the request is ordered then COMMIT will be rejected much earlier for r in [primaryRep, *nonPrimaryReps]: def do_nothing(self, commit): pass r.doOrder = types.MethodType(do_nothing, r) return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps, faultyRep=faultyRep)
def g(instId): allReplicas = getAllReplicas(txnPoolNodeSet, instId) primaryReplica = getPrimaryReplica(txnPoolNodeSet, instId) def replicas_gets_correct_num_of_COMMITs(): """ num of commit messages must be = n when zero fault; n = num of nodes and greater than or equal to n-f with faults. """ passes = 0 numOfMsgsWithZFN = quorums.commit.value numOfMsgsWithFault = quorums.commit.value key = (primaryReplica.viewNo, primaryReplica.lastPrePrepareSeqNo) for r in allReplicas: if key in r.commits: rcvdCommitRqst = r.commits[key] actualMsgsReceived = len(rcvdCommitRqst.voters) passes += int(msgCountOK(nodeCount, faultyNodes, actualMsgsReceived, numOfMsgsWithZFN, numOfMsgsWithFault)) assert passes >= min(len(allReplicas) - faultyNodes, numOfMsgsWithZFN) replicas_gets_correct_num_of_COMMITs()
def send_commit(view_no, pp_seq_no, nodes): commit = Commit( 0, view_no, pp_seq_no) primary_node = getPrimaryReplica(nodes).node sendMessageToAll(nodes, primary_node, commit)
def testPrePrepareWithHighSeqNo(looper, txnPoolNodeSet, propagated1): def chk(): for r in getNonPrimaryReplicas(txnPoolNodeSet, instId): nodeSuspicions = len(getNodeSuspicions( r.node, Suspicions.WRONG_PPSEQ_NO.code)) assert nodeSuspicions == 1 def checkPreprepare(replica, viewNo, ppSeqNo, req, numOfPrePrepares): assert (replica.prePrepares[viewNo, ppSeqNo][0]) == \ (req.identifier, req.reqId, req.digest) primary = getPrimaryReplica(txnPoolNodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, instId) req = propagated1.reqDigest primary.doPrePrepare(req) timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet)) for np in nonPrimaryReplicas: looper.run( eventually(checkPreprepare, np, primary.viewNo, primary.lastPrePrepareSeqNo - 1, req, 1, retryWait=.5, timeout=timeout)) newReqDigest = (req.identifier, req.reqId + 1, req.digest) incorrectPrePrepareReq = PrePrepare(instId, primary.viewNo, primary.lastPrePrepareSeqNo + 2, *newReqDigest, get_utc_epoch()) primary.send(incorrectPrePrepareReq, TPCStat.PrePrepareSent) timeout = waits.expectedPrePrepareTime(len(txnPoolNodeSet)) looper.run(eventually(chk, retryWait=1, timeout=timeout))
def test_primary_recvs_3phase_message_outside_watermarks(perf_chk_patched, chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_logsize): """ One of the primary starts getting lot of requests, more than his log size and queues up requests since they will go beyond its watermarks. This happens since other nodes are slow in processing its PRE-PREPARE. Eventually this primary will send PRE-PREPARE for all requests and those requests will complete """ tconf = perf_chk_patched delay = 2 instId = 0 reqs_to_send = 2 * reqs_for_logsize + 1 logger.debug('Will send {} requests'.format(reqs_to_send)) npr = getNonPrimaryReplicas(txnPoolNodeSet, instId) pr = getPrimaryReplica(txnPoolNodeSet, instId) orderedCount = pr.stats.get(TPCStat.OrderSent) for r in npr: r.node.nodeIbStasher.delay(ppDelay(delay, instId)) r.node.nodeIbStasher.delay(pDelay(delay, instId)) tm_exec_1_batch = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) batch_count = math.ceil(reqs_to_send / tconf.Max3PCBatchSize) total_timeout = (tm_exec_1_batch + delay) * batch_count def chk(): assert orderedCount + batch_count == pr.stats.get(TPCStat.OrderSent) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_to_send) looper.run(eventually(chk, retryWait=1, timeout=total_timeout))
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Scenario -> A client sends requests, some nodes delay COMMITs to few specific nodes such some nodes achieve commit quorum later for those requests compared to other nodes. But all nodes `ORDER` request in the same order of ppSeqNos https://www.pivotaltracker.com/n/projects/1889887/stories/133655009 """ pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) assert len(replicas) == 6 rep0 = pr rep1 = replicas[0] rep2 = replicas[1] rep3 = replicas[2] rep4 = replicas[3] rep5 = replicas[4] rep6 = replicas[5] node0 = rep0.node node1 = rep1.node node2 = rep2.node node3 = rep3.node node4 = rep4.node node5 = rep5.node node6 = rep6.node ppSeqsToDelay = 5 commitDelay = 3 # delay each COMMIT by this number of seconds delayedPpSeqNos = set() requestCount = 10 def specificCommits(wrappedMsg): nonlocal node3, node4, node5 msg, sender = wrappedMsg if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: return commitDelay for node in (node1, node2): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, requestCount) timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_get_and_check_replies(looper, sdk_reqs)
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (2,3 - with primary backup replica) will have prepare or send preprepare on backup replicas and slow_nodes are have not and transaction will ordered on all master replicas. Check last ordered after view change and after another one request.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc with delay_rules(nodes_stashers, pDelay(instId=slow_instance)): with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(is_prepared, fast_nodes, 2, slow_instance)) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) looper.run( eventually(check_last_ordered, txnPoolNodeSet, slow_instance, (txnPoolNodeSet[0].viewNo, 1))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def setup(txnPoolNodeSet): def dontSendPrePrepareRequest(self, pp_req: PrePrepare): logger.debug("EVIL: {} not sending pre-prepare message for request {}". format(self.name, pp_req)) return pr = getPrimaryReplica(txnPoolNodeSet, instId) evilMethod = types.MethodType(dontSendPrePrepareRequest, pr) pr.sendPrePrepare = evilMethod
def slow_primary(nodes, inst_id=0, delay=5): # make primary replica slow to send PRE-PREPAREs def ifPrePrepare(msg): if isinstance(msg, PrePrepare): return delay pr = getPrimaryReplica(nodes, inst_id) pr.outBoxTestStasher.delay(ifPrePrepare) return pr
def setup(txnPoolNodeSet): primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \ getNonPrimaryReplicas(txnPoolNodeSet, 0) # The primary replica would send PRE-PREPARE messages with incorrect digest makeNodeFaulty(primaryRep.node, partial(send3PhaseMsgWithIncorrectDigest, msgType=PrePrepare)) return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps)
def test_slow_nodes_catchup_before_selecting_primary_in_new_view( tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, one_node_added): """ Delay 3PC messages to one node and view change messages to some others (including primary) so the node that does not receive enough 3PC messages is behind but learns of the view change quickly and starts catchup. Other nodes learn of the view change late and thus keep on processing requests """ new_node = one_node_added nprs = [r.node for r in getNonPrimaryReplicas(txnPoolNodeSet, 0)] primary_node = getPrimaryReplica(txnPoolNodeSet, 0).node slow_node = nprs[-1] # nodes_slow_to_inst_chg = [primary_node] + nprs[:2] nodes_slow_to_inst_chg = [n for n in txnPoolNodeSet if n != slow_node] delay_3pc = 100 delay_ic = 5 sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) delay_3pc_messages([slow_node], 0, delay_3pc) for n in nodes_slow_to_inst_chg: n.nodeIbStasher.delay(icDelay(delay_ic)) def start_count(): return sum([1 for e in slow_node.ledgerManager.spylog.getAll( slow_node.ledgerManager.startCatchUpProcess.__name__) if e.params['ledgerId'] == DOMAIN_LEDGER_ID]) s = start_count() requests = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 10 * Max3PCBatchSize) ensure_view_change(looper, nodes=txnPoolNodeSet, exclude_from_check=nodes_slow_to_inst_chg) sdk_get_and_check_replies(looper, requests) waitNodeDataEquality(looper, slow_node, *txnPoolNodeSet[:-1]) e = start_count() assert e - s >= 2 looper.run(eventually(checkViewNoForNodes, slow_node.viewNo)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) waitNodeDataEquality(looper, new_node, *nodes_slow_to_inst_chg)
def test_can_restore_last_sent_pp_seq_no_if_relevant( txnPoolNodeSet, view_no_set, setup): replica = getPrimaryReplica(txnPoolNodeSet, instId=1) node = replica.node assert node.viewNo == 2 can = node.last_sent_pp_store_helper._can_restore_last_sent_pp_seq_no( PrePrepareKey(inst_id=1, view_no=2, pp_seq_no=5)) assert can is True
def test_cannot_restore_last_sent_pp_seq_no_if_replica_is_master( txnPoolNodeSet, view_no_set, setup): replica = getPrimaryReplica(txnPoolNodeSet, instId=0) node = replica.node assert node.viewNo == 2 can = node.last_sent_pp_store_helper._can_restore_last_sent_pp_seq_no( PrePrepareKey(inst_id=0, view_no=2, pp_seq_no=5)) assert can is False
def test_stable_checkpoint_when_one_instance_slow(chkFreqPatched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint): delay = 5 pr = getPrimaryReplica(txnPoolNodeSet, 1) slowNode = pr.node otherNodes = [n for n in txnPoolNodeSet if n != slowNode] for n in otherNodes: n.nodeIbStasher.delay(ppDelay(delay, 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, reqs_for_checkpoint) timeout = waits.expectedTransactionExecutionTime(len(txnPoolNodeSet)) + delay looper.run(eventually(chkChkpoints, txnPoolNodeSet, 1, 0, retryWait=1, timeout=timeout))
def setup(txnPoolNodeSet): primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \ getNonPrimaryReplicas(txnPoolNodeSet, 0) # A non primary replica sends PREPARE messages with incorrect digest faultyRep = nonPrimaryReps[0] makeNodeFaulty(faultyRep.node, partial(send3PhaseMsgWithIncorrectDigest, msgType=Prepare, instId=0)) return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps, faultyRep=faultyRep)
def send_prepare(view_no, pp_seq_no, nodes, state_root=None, txn_root=None): prepare = Prepare( 0, view_no, pp_seq_no, get_utc_epoch(), "random digest", state_root or '0' * 44, txn_root or '0' * 44 ) primary_node = getPrimaryReplica(nodes).node sendMessageToAll(nodes, primary_node, prepare)
def test_try_restore_last_sent_pp_seq_no_if_none_stored( tconf, txnPoolNodeSet, view_no_set, setup): replica = getPrimaryReplica(txnPoolNodeSet, instId=1) node = replica.node assert node.viewNo == 2 node.last_sent_pp_store_helper.try_restore_last_sent_pp_seq_no() assert replica.lastPrePrepareSeqNo == 0 assert replica.last_ordered_3pc == (2, 0) assert replica.h == 0 assert replica.H == 0 + tconf.LOG_SIZE
def test_no_propagate_request_on_different_last_ordered_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' Send random request and do view change then fast_nodes (1, 4 - without primary backup replicas) are already ordered transaction on master and some backup replica and slow_nodes are not on backup replica. Wait ordering on slow_nodes.''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].replicas[slow_instance].last_ordered_3pc with delay_rules(nodes_stashers, cDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(check_last_ordered, fast_nodes, slow_instance, (old_view_no, old_last_ordered[1] + 1))) check_last_ordered(slow_nodes, slow_instance, old_last_ordered) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] check_last_ordered(non_primaries, slow_instance, (old_view_no, old_last_ordered[1] + 1)) # Backup primary replica must not advance last_ordered_3pc # up to the master's value check_last_ordered([primary], slow_instance, (old_view_no, old_last_ordered[1])) check_last_ordered(txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0], old_last_ordered[1] + 1)) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def step1(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): startedNodes = txnPoolNodeSet """ stand up a pool of nodes and send 5 requests to client """ # the master instance has a primary replica, call it P P = getPrimaryReplica(startedNodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # profile_this(sendReqsToNodesAndVerifySuffReplies, looper, client1, 5) return adict(P=P, nodes=startedNodes, requests=requests)
def setup(txnPoolNodeSet): primaryRep, nonPrimaryReps = getPrimaryReplica(txnPoolNodeSet, 0), \ getNonPrimaryReplicas(txnPoolNodeSet, 0) # The primary replica would send 3 duplicate PRE-PREPARE requests to # non primary replicas makeNodeFaulty(primaryRep.node, partial(sendDuplicate3PhaseMsg, msgType=PrePrepare, count=3)) # The node of the primary replica above should not be blacklisted by any # other node since we are simulating multiple PRE-PREPARE messages and # want to check for a particular suspicion return adict(primaryRep=primaryRep, nonPrimaryReps=nonPrimaryReps)
def one_replica_and_others_in_backup_instance( request, txnPoolNodeSet, view_change_done): # NOTICE: This parametrized fixture triggers view change as pre-condition backup_inst_id = 1 primary = getPrimaryReplica(txnPoolNodeSet, backup_inst_id) non_primaries = getNonPrimaryReplicas(txnPoolNodeSet, backup_inst_id) if request.param == 'primary': return primary, non_primaries else: return non_primaries[0], [primary] + non_primaries[1:]
def testNodeDiscardMessageFromUnknownView(txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, sdk_new_node_caught_up, allPluginsPath, sdk_wallet_client): """ Node discards 3-phase or ViewChangeDone messages from view nos that it does not know of (view nos before it joined the pool) :return: """ looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \ sdk_node_set_with_node_added_after_some_txns viewNo = new_node.viewNo # Force two view changes: node discards msgs which have viewNo # at least two less than node's. Current protocol implementation # needs to hold messages from the previous view as well as # from the current view. for i in range(2): ensure_view_change(looper, txnPoolNodeSet) waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1]) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sender = txnPoolNodeSet[0] rid_x_node = sender.nodestack.getRemote(new_node.name).uid messageTimeout = waits.expectedNodeToNodeMessageDeliveryTime() # 3 pc msg (PrePrepare) needs to be discarded _, did = sdk_wallet_client primaryRepl = getPrimaryReplica(txnPoolNodeSet) three_pc = PrePrepare( 0, viewNo, 10, get_utc_epoch(), ["random request digest"], init_discarded(), "random digest", DOMAIN_LEDGER_ID, primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), primaryRepl.txnRootHash(DOMAIN_LEDGER_ID), 0, True ) sender.send(three_pc, rid_x_node) looper.run(eventually(checkDiscardMsg, [new_node, ], three_pc, 'un-acceptable viewNo', retryWait=1, timeout=messageTimeout))
def test_view_change_on_quorum_of_master_degraded(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, viewNo): """ Node will change view even though it does not find the master to be degraded when a quorum of nodes agree that master performance degraded """ m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's performance falls and view changes delayNonPrimaries(txnPoolNodeSet, 0, 10) pr = getPrimaryReplica(txnPoolNodeSet, 0) relucatantNode = pr.node # Count sent instance changes of all nodes sentInstChanges = {} instChngMethodName = ViewChanger.sendInstanceChange.__name__ for n in txnPoolNodeSet: sentInstChanges[n.name] = n.view_changer.spylog.count(instChngMethodName) # Node reluctant to change view, never says master is degraded relucatantNode.monitor.isMasterDegraded = types.MethodType( lambda x: False, relucatantNode.monitor) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 4) # Check that view change happened for all nodes waitForViewChange(looper, txnPoolNodeSet, expectedViewNo=viewNo + 1) # All nodes except the reluctant node should have sent a view change and # thus must have called `sendInstanceChange` for n in txnPoolNodeSet: if n.name != relucatantNode.name: assert n.view_changer.spylog.count(instChngMethodName) > \ sentInstChanges.get(n.name, 0) else: assert n.view_changer.spylog.count(instChngMethodName) == \ sentInstChanges.get(n.name, 0) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_m_primary_node = get_master_primary_node(list(txnPoolNodeSet)) assert m_primary_node.name != new_m_primary_node.name ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_try_restore_last_sent_pp_seq_no_if_invalid_stored( tconf, txnPoolNodeSet, view_no_set, setup): replica = getPrimaryReplica(txnPoolNodeSet, instId=1) node = replica.node assert node.viewNo == 2 node.nodeStatusDB.put(LAST_SENT_PRE_PREPARE, pack_pp_key(PrePrepareKey(inst_id=1, view_no=2, pp_seq_no=5))[:-1]) node.last_sent_pp_store_helper.try_restore_last_sent_pp_seq_no() assert replica.lastPrePrepareSeqNo == 0 assert replica.last_ordered_3pc == (2, 0) assert replica.h == 0 assert replica.H == 0 + tconf.LOG_SIZE
def test_master_primary_different_from_previous_view_for_itself( txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client): """ After a view change, primary must be different from previous primary for master instance, it does not matter for other instance. Break it into 2 tests, one where the primary is malign and votes for itself but is still not made primary in the next view. """ old_view_no = checkViewNoForNodes(txnPoolNodeSet) pr = slow_primary(txnPoolNodeSet, 0, delay=10) old_pr_node = pr.node def _get_undecided_inst_id(self): undecideds = [i for i, r in self.replicas if r.isPrimary is None] # Try to nominate for the master instance return undecideds, 0 # Patching old primary's elector's method to nominate itself # again for the the new view old_pr_node.elector._get_undecided_inst_id = types.MethodType( _get_undecided_inst_id, old_pr_node.elector) # View change happens provoke_and_wait_for_view_change(looper, txnPoolNodeSet, old_view_no + 1, sdk_pool_handle, sdk_wallet_client) # Elections done ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) # New primary is not same as old primary assert getPrimaryReplica(txnPoolNodeSet, 0).node.name != old_pr_node.name # All other nodes discarded the nomination by the old primary for node in txnPoolNodeSet: if node != old_pr_node: assert countDiscarded(node.elector, 'of master in previous view too') == 1 # The new primary can still process requests sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def test_checkpoints_not_removed_on_backup_primary_replica_after_catchup( chkFreqPatched, txnPoolNodeSet, view_setup, clear_checkpoints): replica = getPrimaryReplica(txnPoolNodeSet, 1) others = set(getAllReplicas(txnPoolNodeSet, 1)) - {replica} node = replica.node node.master_replica.last_ordered_3pc = (2, 12) replica.checkpoints[(11, 15)] = CheckpointState(seqNo=15, digests=[], digest='digest-11-15', receivedDigests={r.name: 'digest-11-15' for r in others}, isStable=True) replica.checkpoints[(16, 20)] = CheckpointState(seqNo=19, digests=['digest-16', 'digest-17', 'digest-18', 'digest-19'], digest=None, receivedDigests={}, isStable=False) replica.stashedRecvdCheckpoints[2] = {} replica.stashedRecvdCheckpoints[2][(16, 20)] = {} replica.stashedRecvdCheckpoints[2][(16, 20)][next(iter(others)).name] = \ Checkpoint(instId=1, viewNo=2, seqNoStart=16, seqNoEnd=20, digest='digest-16-20') # Simulate catch-up completion node.ledgerManager.last_caught_up_3PC = (2, 20) node.allLedgersCaughtUp() assert len(replica.checkpoints) == 2 assert (11, 15) in replica.checkpoints assert (16, 20) in replica.checkpoints assert len(replica.stashedRecvdCheckpoints) == 1 assert 2 in replica.stashedRecvdCheckpoints assert len(replica.stashedRecvdCheckpoints[2]) == 1 assert (16, 20) in replica.stashedRecvdCheckpoints[2] assert len(replica.stashedRecvdCheckpoints[2][(16, 20)]) == 1
def test_no_new_view_3pc_messages_processed_during_view_change( looper, txnPoolNodeSet): for node in txnPoolNodeSet: node.view_change_in_progress = True new_view_no = getPrimaryReplica(txnPoolNodeSet).node.viewNo + 1 pp_seq_no = 1 send_pre_prepare(new_view_no, pp_seq_no, txnPoolNodeSet) looper.runFor(1) check_all_replica_queue_empty(txnPoolNodeSet) send_prepare(new_view_no, pp_seq_no, txnPoolNodeSet) looper.runFor(1) check_all_replica_queue_empty(txnPoolNodeSet) send_commit(new_view_no, pp_seq_no, txnPoolNodeSet) looper.runFor(1) check_all_replica_queue_empty(txnPoolNodeSet)
def testPrePrepareProcessedInOrder(perf_chk_patched, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ A non-primary receives PRE-PREPARE out of order, it receives with ppSeqNo 2 earlier than it receives the one with ppSeqNo 1 but it stashes the one with ppSeqNo 2 and only unstashes it for processing once it has processed PRE-PREPARE with ppSeqNo 1 :return: """ tconf = perf_chk_patched pr, otherR = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) otherNodes = [r.node for r in otherR] ppsToDelay = 2 delayeds = 0 expectedDelayeds = (len(txnPoolNodeSet) - 1) * ppsToDelay delayedPpSeqNos = set() def specificPrePrepares(wrappedMsg): nonlocal delayeds msg, sender = wrappedMsg if isinstance(msg, PrePrepare) and delayeds < expectedDelayeds: delayeds += 1 delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} would be delayed'.format(msg.ppSeqNo)) return pp_delay for node in otherNodes: logger.debug('{} would be delaying reception of some pre-prepares'. format(node)) node.nodeIbStasher.delay(specificPrePrepares) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, (ppsToDelay + 1) * tconf.Max3PCBatchSize) checkNodesHaveSameRoots(txnPoolNodeSet) for r in otherR: seqNos = [a['pp'].ppSeqNo for a in getAllArgs(r, r.addToPrePrepares)] seqNos.reverse() assert sorted(seqNos) == seqNos
def test_no_propagate_request_on_different_prepares_on_backup_before_vc(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): ''' 1. Send random request 2. Make 3 node on backup instance slow in getting prepares 3. Send random request 4. do view change 5. reset delays => we expect that all nodes and all instances have the same last ordered ''' sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) slow_instance = 1 slow_nodes = txnPoolNodeSet[1:3] fast_nodes = [n for n in txnPoolNodeSet if n not in slow_nodes] nodes_stashers = [n.nodeIbStasher for n in slow_nodes] old_last_ordered = txnPoolNodeSet[0].replicas[slow_instance].last_ordered_3pc batches_count = old_last_ordered[1] with delay_rules(nodes_stashers, pDelay(instId=slow_instance)): with delay_rules(nodes_stashers, ppDelay(instId=slow_instance)): # send one request sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) batches_count += 1 old_view_no = txnPoolNodeSet[0].viewNo looper.run( eventually(is_prepared, fast_nodes, batches_count, slow_instance)) # trigger view change on all nodes ensure_view_change(looper, txnPoolNodeSet) # wait for view change done on all nodes ensureElectionsDone(looper, txnPoolNodeSet) batches_count += 1 primary = getPrimaryReplica(txnPoolNodeSet, slow_instance).node non_primaries = [n for n in txnPoolNodeSet if n is not primary] looper.run(eventually(check_last_ordered, non_primaries, slow_instance, (old_view_no + 1, 1))) # Backup primary replica set new_view and seq_no == 1, because of primary batch looper.run(eventually(check_last_ordered, [primary], slow_instance, (old_view_no + 1, 1))) # +2 because 2 batches will be reordered after view_change looper.run(eventually(check_last_ordered, txnPoolNodeSet, txnPoolNodeSet[0].master_replica.instId, (old_last_ordered[0] + 1, batches_count + 2))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) batches_count += 1 looper.run( eventually(check_last_ordered, txnPoolNodeSet, slow_instance, (txnPoolNodeSet[0].viewNo, 2))) assert all(0 == node.spylog.count(node.request_propagates) for node in txnPoolNodeSet)
def send_prepare(view_no, pp_seq_no, nodes, state_root=None, txn_root=None): prepare = Prepare(0, view_no, pp_seq_no, get_utc_epoch(), "random digest", state_root or '0' * 44, txn_root or '0' * 44) primary_node = getPrimaryReplica(nodes).node sendMessageToAll(nodes, primary_node, prepare)
def primaryReplicas(txnPoolNodeSet): instanceCount = getNoInstances(nodeCount) return [getPrimaryReplica(txnPoolNodeSet, i) for i in range(instanceCount)]
def send_commit(view_no, pp_seq_no, nodes): commit = Commit(0, view_no, pp_seq_no) primary_node = getPrimaryReplica(nodes).node sendMessageToAll(nodes, primary_node, commit)
def g(instId): primary = getPrimaryReplica(txnPoolNodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, instId) def primarySeesCorrectNumberOfPREPREPAREs(): """ no of PRE-PREPARE as seen by processPrePrepare method for primary must be 0 with or without faults in system """ l1 = len([ param for param in getAllArgs(primary, primary.processPrePrepare) ]) assert l1 == 0, 'Primary {} sees no pre-prepare'.format(primary) def nonPrimarySeesCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE as seen by processPrePrepare method for non-primaries must be 1; whn zero faulty nodes in system. 2. no of PRE-PREPARE as seen by processPrePrepare method for non-primaries must be greater than or equal to 0; with faults in system. """ expectedPrePrepareRequest = PrePrepare( instId, primary.viewNo, primary.lastPrePrepareSeqNo, get_utc_epoch(), [[propagated1.identifier, propagated1.reqId]], 1, Replica.batchDigest([ propagated1, ]), DOMAIN_LEDGER_ID, primary.stateRootHash(DOMAIN_LEDGER_ID), primary.txnRootHash(DOMAIN_LEDGER_ID), ) passes = 0 for npr in nonPrimaryReplicas: actualMsgs = len([ param for param in getAllArgs(npr, npr.processPrePrepare) if (param['pre_prepare'][0:3] + param['pre_prepare'][4:], param['sender']) == (expectedPrePrepareRequest[0:3] + expectedPrePrepareRequest[4:], primary.name) ]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 passes += int( msgCountOK(nodesSize, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(nonPrimaryReplicas) - faultyNodes, \ 'Non-primary sees correct number pre-prepares - {}'.format(passes) def primarySentsCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE sent by primary is 1 with or without fault in system but, when primary is faulty no of sent PRE_PREPARE will be zero and primary must be marked as malicious. """ actualMsgs = len([ param for param in getAllArgs(primary, primary.sendPrePrepare) if (param['ppReq'].reqIdr[0][0], param['ppReq'].reqIdr[0][1], param['ppReq'].digest) == (propagated1.identifier, propagated1.reqId, primary.batchDigest([ propagated1, ])) ]) numOfMsgsWithZFN = 1 # TODO: Considering, Primary is not faulty and will always send # PRE-PREPARE. Write separate test for testing when Primary # is faulty assert msgCountOK(nodesSize, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithZFN ), 'Primary sends correct number of per-prepare' def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE received by non-primaries must be 1 with zero faults in system, and 0 faults in system. """ passes = 0 for npr in nonPrimaryReplicas: l4 = len([ param for param in getAllArgs(npr, npr.addToPrePrepares) if (param['pp'].reqIdr[0][0], param['pp'].reqIdr[0][1], param['pp'].digest) == (propagated1.identifier, propagated1.reqId, primary.batchDigest([ propagated1, ])) ]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 passes += msgCountOK(nodesSize, faultyNodes, l4, numOfMsgsWithZFN, numOfMsgsWithFaults) assert passes >= len(nonPrimaryReplicas) - faultyNodes, \ 'Non-primary receives correct number of pre-prepare -- {}'.format(passes) primarySeesCorrectNumberOfPREPREPAREs() nonPrimarySeesCorrectNumberOfPREPREPAREs() primarySentsCorrectNumberOfPREPREPAREs() nonPrimaryReceivesCorrectNumberOfPREPREPAREs()
def g(instId): allReplicas = getAllReplicas(txnPoolNodeSet, instId) primary = getPrimaryReplica(txnPoolNodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, instId) def primaryDontSendAnyPREPAREs(): """ 1. no of PREPARE sent by primary should be 0 """ for r in allReplicas: for param in getAllArgs(r, Replica.processPrepare): sender = param['sender'] assert sender != primary.name def allReplicasSeeCorrectNumberOfPREPAREs(): """ 1. no of PREPARE received by replicas must be n - 1; n = num of nodes without fault, and greater than or equal to n-f-1 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 1 numOfMsgsWithFaults = quorums.prepare.value for replica in allReplicas: key = primary.viewNo, primary.lastPrePrepareSeqNo if key in replica.prepares: actualMsgs = len(replica.prepares[key].voters) passes += int( msgCountOK(nodeCount, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(allReplicas) - faultyNodes def primaryReceivesCorrectNumberOfPREPAREs(): """ num of PREPARE seen by primary replica is n - 1; n = num of nodes without fault, and greater than or equal to n-f-1 with faults. """ actualMsgs = len([ param for param in getAllArgs(primary, primary.processPrepare) if (param['prepare'].instId, param['prepare'].viewNo, param['prepare'].ppSeqNo) == (primary.instId, primary.viewNo, primary.lastPrePrepareSeqNo) and param['sender'] != primary.name ]) numOfMsgsWithZFN = nodeCount - 1 numOfMsgsWithFaults = quorums.prepare.value assert msgCountOK(nodeCount, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults) # TODO what if the primary is faulty? def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): """ num of PREPARE seen by Non primary replica is n - 2 without faults and n-f-2 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 2 numOfMsgsWithFaults = quorums.prepare.value - 1 for npr in nonPrimaryReplicas: actualMsgs = len([ param for param in getAllArgs(npr, npr.processPrepare) if (param['prepare'].instId, param['prepare'].viewNo, param['prepare'].ppSeqNo) == ( primary.instId, primary.viewNo, primary.lastPrePrepareSeqNo) ]) passes += int( msgCountOK(nodeCount, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(nonPrimaryReplicas) - faultyNodes # TODO how do we know if one of the faulty nodes is a primary or # not? primaryDontSendAnyPREPAREs() allReplicasSeeCorrectNumberOfPREPAREs() primaryReceivesCorrectNumberOfPREPAREs() nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs()
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Scenario -> A client sends requests, some nodes delay COMMITs to few specific nodes such some nodes achieve commit quorum later for those requests compared to other nodes. But all nodes `ORDER` request in the same order of ppSeqNos https://www.pivotaltracker.com/n/projects/1889887/stories/133655009 """ pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \ getNonPrimaryReplicas(txnPoolNodeSet, instId=0) assert len(replicas) == 6 rep0 = pr rep1 = replicas[0] rep2 = replicas[1] rep3 = replicas[2] rep4 = replicas[3] rep5 = replicas[4] rep6 = replicas[5] node0 = rep0.node node1 = rep1.node node2 = rep2.node node3 = rep3.node node4 = rep4.node node5 = rep5.node node6 = rep6.node ppSeqsToDelay = 5 commitDelay = 3 # delay each COMMIT by this number of seconds delayedPpSeqNos = set() requestCount = 10 def specificCommits(wrappedMsg): nonlocal node3, node4, node5 msg, sender = wrappedMsg if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: return commitDelay for node in (node1, node2): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, requestCount) timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout) sdk_get_and_check_replies(looper, sdk_reqs)
def test_node_erases_last_sent_pp_key_on_pool_restart(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched): # Get a node with a backup primary replica and the rest of the nodes replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 7)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == 0 assert replica.lastPrePrepareSeqNo == 7 assert replica.h == 6 assert replica.H == 6 + LOG_SIZE # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Restart all the nodes in the pool and wait for primary elections done all_nodes = copy(txnPoolNodeSet) for n in all_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, n.name, timeout=nodeCount, stopNode=True) looper.removeProdable(n) txnPoolNodeSet.remove(n) for n in all_nodes: txnPoolNodeSet.append( start_stopped_node(n, looper, tconf, tdir, allPluginsPath)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) node = nodeByName(txnPoolNodeSet, node.name) replica = node.replicas[backup_inst_id] # Verify that the node has erased the stored last sent PrePrepare key assert LAST_SENT_PRE_PREPARE not in node.nodeStatusDB # Verify correspondingly that after the pool restart the replica # (which must again be the primary in its instance) has not restored # lastPrePrepareSeqNo, not adjusted last_ordered_3pc and not shifted # the watermarks assert node.viewNo == 0 assert replica.isPrimary assert replica.lastPrePrepareSeqNo == 0 assert replica.last_ordered_3pc == (0, 0) assert replica.h == 0 assert replica.H == 0 + LOG_SIZE # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_backup_primary_restores_pp_seq_no_if_view_is_same( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched, view_no): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) seq_no = 7 if view_no == 0 else 8 looper.run( eventually(lambda r: assertExp(r.last_ordered_3pc == (view_no, seq_no)), replica, retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == view_no assert replica.lastPrePrepareSeqNo == seq_no # Ensure that the node has stored the last sent PrePrepare key assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB last_sent_pre_prepare_key = \ node_status_db_serializer.deserialize( node.nodeStatusDB.get(LAST_SENT_PRE_PREPARE)) assert last_sent_pre_prepare_key == { str(backup_inst_id): [view_no, seq_no] } # Restart the node containing the replica disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node.name, stopNode=True) looper.removeProdable(node) txnPoolNodeSet.remove(node) node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) replica = node.replicas[backup_inst_id] # Verify that after the successful propagate primary procedure the replica # (which must still be the primary in its instance) has restored # lastPrePrepareSeqNo and adjusted last_ordered_3pc and shifted # the watermarks correspondingly assert node.viewNo == view_no assert replica.isPrimary assert replica.lastPrePrepareSeqNo == seq_no assert replica.last_ordered_3pc == (view_no, seq_no) assert replica.h == seq_no assert replica.H == seq_no + LOG_SIZE # Verify also that the stored last sent PrePrepare key has not been erased assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) seq_no = 8 if view_no == 0 else 9 looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (view_no, seq_no)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_successive_batch_do_no_change_state(looper, tdirWithPoolTxns, tdirWithDomainTxnsUpdated, tconf, nodeSet, trustee, trusteeWallet, monkeypatch): """ Send 2 NYM txns in different batches such that the second batch does not change state so that state root remains same, but keep the identifier and reqId different. Make sure the first request is not ordered by the primary before PRE-PREPARE for the second is sent. Also check reject and commit :return: """ prim_node = getPrimaryReplica(nodeSet, 0).node all_reqs = [] # Delay only first PRE-PREPARE pp_seq_no_to_delay = 1 delay_pp_duration = 5 delay_cm_duration = 10 def delay_commits(wrappedMsg): msg, sender = wrappedMsg if isinstance(msg, Commit) and msg.instId == 0: return delay_cm_duration def new_identity(): wallet = Wallet(randomString(5)) signer = DidSigner() new_idr, _ = wallet.addIdentifier(signer=signer) verkey = wallet.getVerkey(new_idr) idy = Identity(identifier=new_idr, verkey=verkey, role=None) return idy, wallet def submit_id_req(idy, wallet=None, client=None): nonlocal all_reqs wallet = wallet if wallet is not None else trusteeWallet client = client if client is not None else trustee wallet.updateTrustAnchoredIdentity(idy) reqs = wallet.preparePending() all_reqs.extend(reqs) client.submitReqs(*reqs) return reqs def submit_id_req_and_wait(idy, wallet=None, client=None): reqs = submit_id_req(idy, wallet=wallet, client=client) looper.runFor(.2) return reqs def check_verkey(i, vk): for node in nodeSet: data = node.idrCache.getNym(i, isCommitted=True) assert data[VERKEY] == vk def check_uncommitted(count): for node in nodeSet: assert len(node.idrCache.un_committed) == count for node in nodeSet: for rpl in node.replicas: monkeypatch.setattr(rpl, '_request_missing_three_phase_messages', lambda *x, **y: None) idy, new_wallet = new_identity() new_idr = idy.identifier verkey = idy.verkey submit_id_req(idy) waitForSufficientRepliesForRequests(looper, trustee, requests=all_reqs[-1:], add_delay_to_timeout=delay_cm_duration) for node in nodeSet: node.nodeIbStasher.delay(delay_commits) new_client, _ = genTestClient(nodeSet, tmpdir=tdirWithPoolTxns, usePoolLedger=True) looper.add(new_client) looper.run(new_client.ensureConnectedToNodes(count=len(nodeSet))) new_client.registerObserver(new_wallet.handleIncomingReply, name='temp') idy.seqNo = None # Setting the same verkey thrice but in different batches with different # request ids for _ in range(3): req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client) logger.debug('{} sent request {} to change verkey'.format( new_client, req)) waitForSufficientRepliesForRequests(looper, new_client, requests=all_reqs[-3:], add_delay_to_timeout=delay_cm_duration) # Number of uncommitted entries is 0 looper.run(eventually(check_uncommitted, 0)) check_verkey(new_idr, verkey) pp_seq_no_to_delay = 4 new_client.deregisterObserver(name='temp') # Setting the verkey to `x`, then `y` and then back to `x` but in different # batches with different request ids. The idea is to change # state root to `t` then `t'` and then back to `t` and observe that no # errors are encountered idy, new_wallet = new_identity() submit_id_req(idy) waitForSufficientRepliesForRequests(looper, trustee, requests=all_reqs[-1:], add_delay_to_timeout=delay_cm_duration) new_client.registerObserver(new_wallet.handleIncomingReply) idy.seqNo = None x_signer = SimpleSigner(identifier=idy.identifier) idy.verkey = x_signer.verkey req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client) new_wallet.updateSigner(idy.identifier, x_signer) logger.debug('{} sent request {} to change verkey'.format(new_client, req)) y_signer = SimpleSigner(identifier=idy.identifier) idy.verkey = y_signer.verkey req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client) new_wallet.updateSigner(idy.identifier, y_signer) logger.debug('{} sent request {} to change verkey'.format(new_client, req)) idy.verkey = x_signer.verkey req, = submit_id_req_and_wait(idy, wallet=new_wallet, client=new_client) new_wallet.updateSigner(idy.identifier, x_signer) logger.debug('{} sent request {} to change verkey'.format(new_client, req)) waitForSufficientRepliesForRequests(looper, new_client, requests=all_reqs[-3:], add_delay_to_timeout=delay_cm_duration) # Number of uncommitted entries is 0 looper.run(eventually(check_uncommitted, 0)) check_verkey(new_idr, verkey) monkeypatch.undo() # Delay COMMITs so that IdrCache can be checked for correct # number of entries uncommitteds = {} methods = {} for node in nodeSet: cache = node.idrCache uncommitteds[cache._name] = [] cre = cache.currentBatchCreated com = cache.onBatchCommitted methods[cache._name] = (cre, com) # Patch methods to record and check roots after commit def patched_cre(self, stateRoot): uncommitteds[self._name].append(stateRoot) return methods[self._name][0](stateRoot) def patched_com(self, stateRoot): assert uncommitteds[self._name][0] == stateRoot rv = methods[self._name][1](stateRoot) uncommitteds[self._name] = uncommitteds[self._name][1:] return rv cache.currentBatchCreated = types.MethodType(patched_cre, cache) cache.onBatchCommitted = types.MethodType(patched_com, cache) # Set verkey of multiple identities more = 5 keys = {} for _ in range(more): idy, _ = new_identity() keys[idy.identifier] = idy.verkey submit_id_req(idy) looper.runFor(.01) # Correct number of uncommitted entries looper.run(eventually(check_uncommitted, more, retryWait=1)) waitForSufficientRepliesForRequests(looper, trustee, requests=all_reqs[-more:], add_delay_to_timeout=delay_cm_duration) # Number of uncommitted entries is 0 looper.run(eventually(check_uncommitted, 0)) # The verkeys are correct for i, v in keys.items(): check_verkey(i, v) waitNodeDataEquality(looper, nodeSet[0], *nodeSet[1:]) keys = {} for _ in range(3): idy, _ = new_identity() keys[idy.identifier] = idy.verkey submit_id_req(idy) looper.runFor(.01) # Correct number of uncommitted entries looper.run(eventually(check_uncommitted, 3, retryWait=1)) # Check batch reject for node in nodeSet: cache = node.idrCache initial = cache.un_committed cache.batchRejected() # After reject, last entry is removed assert cache.un_committed == initial[:-1] root = cache.un_committed[0][0] cache.onBatchCommitted(root) # Calling commit with same root results in Assertion error with pytest.raises(AssertionError): cache.onBatchCommitted(root)
def test_successive_batch_do_no_change_state(looper, tdirWithPoolTxns, tdirWithDomainTxnsUpdated, tconf, nodeSet, trustee, trusteeWallet): """ Send 2 NYM txns in different batches such that the second batch does not change state so that state root remains same, but keep the identifier and reqId different. Make sure the first request is not ordered by the primary before PRE-PREPARE for the second is sent. Also check reject and commit :return: """ prim_node = getPrimaryReplica(nodeSet, 0).node other_nodes = [n for n in nodeSet if n != prim_node] # Delay only first PRE-PREPARE pp_seq_no_to_delay = 1 def specific_pre_prepare(wrappedMsg): nonlocal pp_seq_no_to_delay msg, sender = wrappedMsg if isinstance(msg, PrePrepare) and \ msg.instId == 0 and \ msg.ppSeqNo == pp_seq_no_to_delay: return 5 def delay_commits(wrappedMsg): msg, sender = wrappedMsg if isinstance(msg, Commit) and msg.instId == 0: return 10 def new_identity(): wallet = Wallet(randomString(5)) signer = DidSigner() new_idr, _ = wallet.addIdentifier(signer=signer) verkey = wallet.getVerkey(new_idr) idy = Identity(identifier=new_idr, verkey=verkey, role=None) return idy def submit_id_req(idy): nonlocal all_reqs trusteeWallet.updateTrustAnchoredIdentity(idy) reqs = trusteeWallet.preparePending() all_reqs.extend(reqs) trustee.submitReqs(*reqs) def check_verkey(i, vk): for node in nodeSet: data = node.reqHandler.idrCache.getNym(i, isCommitted=True) assert data[VERKEY] == vk def check_uncommitted(count): for node in nodeSet: assert len(node.reqHandler.idrCache.unCommitted) == count for node in other_nodes: node.nodeIbStasher.delay(specific_pre_prepare) idy = new_identity() new_idr = idy.identifier verkey = idy.verkey all_reqs = [] # Setting the same verkey twice but in different batches with different # request ids for _ in range(3): submit_id_req(idy) looper.runFor(.2) waitForSufficientRepliesForRequests(looper, trustee, requests=all_reqs, add_delay_to_timeout=5) # Number of uncommitted entries is 0 looper.run(eventually(check_uncommitted, 0)) check_verkey(new_idr, verkey) pp_seq_no_to_delay = 4 for node in other_nodes: node.nodeIbStasher.delay(specific_pre_prepare) # Setting the verkey to `x`, then `y` and then back to `x` but in different # batches with different with different request ids. The idea is to change # state root to `t` then `t'` and then back to `t` and observe that no # errors are encountered idy = new_identity() new_idr = idy.identifier verkey = idy.verkey submit_id_req(idy) looper.runFor(.2) new_verkey = SimpleSigner().verkey idy.verkey = new_verkey submit_id_req(idy) looper.runFor(.2) idy.verkey = verkey submit_id_req(idy) looper.runFor(.2) waitForSufficientRepliesForRequests(looper, trustee, requests=all_reqs, add_delay_to_timeout=5) # Number of uncommitted entries is 0 looper.run(eventually(check_uncommitted, 0)) check_verkey(new_idr, verkey) # Dleay COMMITs so that IdrCache can be checked for correct # number of entries uncommitteds = {} methods = {} for node in nodeSet: node.nodeIbStasher.delay(delay_commits) cache = node.reqHandler.idrCache uncommitteds[cache._name] = [] cre = cache.currentBatchCreated com = cache.onBatchCommitted methods[cache._name] = (cre, com) # Patch methods to record and check roots after commit def patched_cre(self, stateRoot): uncommitteds[self._name].append(stateRoot) return methods[self._name][0](stateRoot) def patched_com(self, stateRoot): assert uncommitteds[self._name][0] == stateRoot rv = methods[self._name][1](stateRoot) uncommitteds[self._name] = uncommitteds[self._name][1:] return rv cache.currentBatchCreated = types.MethodType(patched_cre, cache) cache.onBatchCommitted = types.MethodType(patched_com, cache) # Set verkey of multiple identities more = 5 keys = {} for _ in range(more): idy = new_identity() keys[idy.identifier] = idy.verkey submit_id_req(idy) looper.runFor(.01) # Correct number of uncommitted entries looper.run(eventually(check_uncommitted, more, retryWait=1)) waitForSufficientRepliesForRequests(looper, trustee, requests=all_reqs, add_delay_to_timeout=10) # Number of uncommitted entries is 0 looper.run(eventually(check_uncommitted, 0)) # The verkeys are correct for i, v in keys.items(): check_verkey(i, v) waitNodeDataEquality(looper, nodeSet[0], *nodeSet[1:]) keys = {} for _ in range(3): idy = new_identity() keys[idy.identifier] = idy.verkey submit_id_req(idy) looper.runFor(.01) # Correct number of uncommitted entries looper.run(eventually(check_uncommitted, 3, retryWait=1)) # Check batch reject for node in nodeSet: cache = node.reqHandler.idrCache initial = cache.unCommitted cache.batchRejected() # After reject, last entry is removed assert cache.unCommitted == initial[:-1] root = cache.unCommitted[0][0] cache.onBatchCommitted(root) # Calling commit with same root results in Assertion error with pytest.raises(AssertionError): cache.onBatchCommitted(root)
def testOrderingCase2(looper, nodeSet, up, client1, wallet1): """ Scenario -> A client sends requests, some nodes delay COMMITs to few specific nodes such some nodes achieve commit quorum later for those requests compared to other nodes. But all nodes `ORDER` request in the same order of ppSeqNos https://www.pivotaltracker.com/n/projects/1889887/stories/133655009 """ pr, replicas = getPrimaryReplica(nodeSet, instId=0), \ getNonPrimaryReplicas(nodeSet, instId=0) assert len(replicas) == 6 rep0 = pr rep1 = replicas[0] rep2 = replicas[1] rep3 = replicas[2] rep4 = replicas[3] rep5 = replicas[4] rep6 = replicas[5] node0 = rep0.node node1 = rep1.node node2 = rep2.node node3 = rep3.node node4 = rep4.node node5 = rep5.node node6 = rep6.node ppSeqsToDelay = 5 commitDelay = 3 # delay each COMMIT by this number of seconds delayedPpSeqNos = set() requestCount = 10 def specificCommits(wrappedMsg): nonlocal node3, node4, node5 msg, sender = wrappedMsg if isinstance(msg, PrePrepare): if len(delayedPpSeqNos) < ppSeqsToDelay: delayedPpSeqNos.add(msg.ppSeqNo) logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo)) if isinstance(msg, Commit) and msg.instId == 0 and \ sender in (n.name for n in (node3, node4, node5)) and \ msg.ppSeqNo in delayedPpSeqNos: return commitDelay for node in (node1, node2): logger.debug('{} would be delaying commits'.format(node)) node.nodeIbStasher.delay(specificCommits) requests = sendRandomRequests(wallet1, client1, requestCount) waitForSufficientRepliesForRequests(looper, client1, requests=requests) def ensureSlowNodesHaveAllTxns(): nonlocal node1, node2 for node in node1, node2: assert len(node.domainLedger) == requestCount timeout = waits.expectedPoolGetReadyTimeout(len(nodeSet)) looper.run( eventually(ensureSlowNodesHaveAllTxns, retryWait=1, timeout=timeout)) checkAllLedgersEqual( (n.domainLedger for n in (node0, node3, node4, node5, node6))) for node in (node1, node2): for n in nodeSet: if n != node: checkLedgerEquality(node.domainLedger, n.domainLedger) checkAllLedgersEqual((n.domainLedger for n in nodeSet))
def g(instId): primary = getPrimaryReplica(nodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId) def primarySeesCorrectNumberOfPREPREPAREs(): """ no of PRE-PREPARE as seen by processPrePrepare method for primary must be 0 with or without faults in system """ l1 = len([ param for param in getAllArgs(primary, primary.processPrePrepare) ]) assert l1 == 0 def nonPrimarySeesCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE as seen by processPrePrepare method for non-primaries must be 1; whn zero faulty nodes in system. 2. no of PRE-PREPARE as seen by processPrePrepare method for non-primaries must be greater than or equal to 0; with faults in system. """ expectedPrePrepareRequest = PrePrepare(instId, primary.viewNo, primary.lastPrePrepareSeqNo, propagated1.identifier, propagated1.reqId, propagated1.digest, time.time()) passes = 0 for npr in nonPrimaryReplicas: actualMsgs = len([ param for param in getAllArgs(npr, npr.processPrePrepare) if (param['pp'][:-1], param['sender']) == (expectedPrePrepareRequest[:-1], primary.name) ]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 passes += int( msgCountOK(nodesSize, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(nonPrimaryReplicas) - faultyNodes def primarySentsCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE sent by primary is 1 with or without fault in system but, when primary is faulty no of sent PRE_PREPARE will be zero and primary must be marked as malicious. """ actualMsgs = len([ param for param in getAllArgs(primary, primary.doPrePrepare) if (param['reqDigest'].identifier, param['reqDigest'].reqId, param['reqDigest'].digest) == (propagated1.identifier, propagated1.reqId, propagated1.digest) ]) numOfMsgsWithZFN = 1 # TODO: Considering, Primary is not faulty and will always send # PRE-PREPARE. Write separate test for testing when Primary # is faulty assert msgCountOK(nodesSize, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithZFN) def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE received by non-primaries must be 1 with zero faults in system, and 0 faults in system. """ passes = 0 for npr in nonPrimaryReplicas: l4 = len([ param for param in getAllArgs(npr, npr.addToPrePrepares) if (param['pp'].identifier, param['pp'].reqId, param['pp'].digest) == (propagated1.identifier, propagated1.reqId, propagated1.digest) ]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 passes += msgCountOK(nodesSize, faultyNodes, l4, numOfMsgsWithZFN, numOfMsgsWithFaults) assert passes >= len(nonPrimaryReplicas) - faultyNodes primarySeesCorrectNumberOfPREPREPAREs() nonPrimarySeesCorrectNumberOfPREPREPAREs() primarySentsCorrectNumberOfPREPREPAREs() nonPrimaryReceivesCorrectNumberOfPREPREPAREs()
def g(instId): primary = getPrimaryReplica(txnPoolNodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(txnPoolNodeSet, instId) def primarySeesCorrectNumberOfPREPREPAREs(): """ no of PRE-PREPARE as seen by processPrePrepare method for primary must be 0 with or without faults in system """ l1 = len([ param for param in getAllArgs( primary._ordering_service, primary._ordering_service.process_preprepare) ]) assert l1 == 0, 'Primary {} sees no pre-prepare'.format(primary) def nonPrimarySeesCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE as seen by processPrePrepare method for non-primaries must be 1; whn zero faulty nodes in system. 2. no of PRE-PREPARE as seen by processPrePrepare method for non-primaries must be greater than or equal to 0; with faults in system. """ tm = get_utc_epoch() expectedPrePrepareRequest = PrePrepare( instId, primary.viewNo, primary.lastPrePrepareSeqNo, tm, [propagated1.digest], init_discarded(), primary._ordering_service.generate_pp_digest( [propagated1.digest], primary.viewNo, tm), DOMAIN_LEDGER_ID, primary._ordering_service.get_state_root_hash( DOMAIN_LEDGER_ID), primary._ordering_service.get_txn_root_hash(DOMAIN_LEDGER_ID), 0, True, primary._ordering_service.get_state_root_hash(POOL_LEDGER_ID), primary._ordering_service.get_txn_root_hash(AUDIT_LEDGER_ID), ) passes = 0 for npr in nonPrimaryReplicas: actualMsgs = len([ param for param in getAllArgs( npr._ordering_service, npr._ordering_service.process_preprepare) if (param['pre_prepare'][0:3] + param['pre_prepare'][4:6] + param['pre_prepare'][7:], param['sender']) == (expectedPrePrepareRequest[0:3] + expectedPrePrepareRequest[4:6] + param['pre_prepare'][7:], primary.name) ]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 passes += int( msgCountOK(nodesSize, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(nonPrimaryReplicas) - faultyNodes, \ '1Non-primary sees correct number pre-prepares - {}'.format(passes) def primarySentsCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE sent by primary is 1 with or without fault in system but, when primary is faulty no of sent PRE_PREPARE will be zero and primary must be marked as malicious. """ actualMsgs = len([ param for param in getAllArgs( primary._ordering_service, primary._ordering_service.send_pre_prepare) if param['ppReq'].reqIdr[0] == propagated1.digest and param['ppReq'].digest == primary._ordering_service.generate_pp_digest( [propagated1.digest], get_original_viewno( param['ppReq']), param['ppReq'].ppTime) ]) numOfMsgsWithZFN = 1 # TODO: Considering, Primary is not faulty and will always send # PRE-PREPARE. Write separate test for testing when Primary # is faulty assert msgCountOK(nodesSize, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithZFN ), 'Primary sends correct number of per-prepare' def nonPrimaryReceivesCorrectNumberOfPREPREPAREs(): """ 1. no of PRE-PREPARE received by non-primaries must be 1 with zero faults in system, and 0 faults in system. """ passes = 0 for npr in nonPrimaryReplicas: l4 = len([ param for param in getAllArgs( npr._ordering_service, npr._ordering_service._add_to_pre_prepares) if param['pp'].reqIdr[0] == propagated1.digest and param['pp'].digest == OrderingService.generate_pp_digest([ propagated1.digest, ], get_original_viewno(param['pp']), param['pp'].ppTime) ]) numOfMsgsWithZFN = 1 numOfMsgsWithFaults = 0 passes += msgCountOK(nodesSize, faultyNodes, l4, numOfMsgsWithZFN, numOfMsgsWithFaults) assert passes >= len(nonPrimaryReplicas) - faultyNodes, \ '2Non-primary receives correct number of pre-prepare -- {}'.format(passes) primarySeesCorrectNumberOfPREPREPAREs() nonPrimarySeesCorrectNumberOfPREPREPAREs() primarySentsCorrectNumberOfPREPREPAREs() nonPrimaryReceivesCorrectNumberOfPREPREPAREs()
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): """ Replicas should not accept PRE-PREPARE for view "v" and prepare sequence number "n" if it has already accepted a request with view number "v" and sequence number "n" """ numOfNodes = 4 fValue = getMaxFailures(numOfNodes) primaryRepl = getPrimaryReplica(nodeSet, 1) logger.debug("Primary Replica: {}".format(primaryRepl)) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, 1) logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) # Delay COMMITs so request is not ordered and checks can be made c_delay = 10 for node in nodeSet: node.nodeIbStasher.delay(cDelay(delay=c_delay, instId=1)) request1 = sendRandomRequest(wallet1, client1) for npr in nonPrimaryReplicas: looper.run( eventually(checkPrepareReqSent, npr, request1.identifier, request1.reqId, primaryRepl.viewNo, retryWait=1)) prePrepareReq = primaryRepl.sentPrePrepares[ primaryRepl.viewNo, primaryRepl.lastPrePrepareSeqNo] looper.run( eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1)) # logger.debug("Patching the primary replica's pre-prepare sending method ") # orig_method = primaryRepl.sendPrePrepare # def patched(self, ppReq): # self.sentPrePrepares[ppReq.viewNo, ppReq.ppSeqNo] = ppReq # ppReq = updateNamedTuple(ppReq, **{f.PP_SEQ_NO.nm: 1}) # self.send(ppReq, TPCStat.PrePrepareSent) # # primaryRepl.sendPrePrepare = types.MethodType(patched, primaryRepl) logger.debug( "Decrementing the primary replica's pre-prepare sequence number by " "one...") primaryRepl._lastPrePrepareSeqNo -= 1 view_no = primaryRepl.viewNo request2 = sendRandomRequest(wallet1, client1) timeout = waits.expectedPrePrepareTime(len(nodeSet)) looper.run( eventually(checkPrePrepareReqSent, primaryRepl, request2, retryWait=1, timeout=timeout)) # Since the node is malicious, it will not be able to process requests due # to conflicts in PRE-PREPARE primaryRepl.node.stop() looper.removeProdable(primaryRepl.node) reqIdr = [(request2.identifier, request2.reqId)] prePrepareReq = PrePrepare(primaryRepl.instId, view_no, primaryRepl.lastPrePrepareSeqNo, get_utc_epoch(), reqIdr, 1, primaryRepl.batchDigest([request2]), DOMAIN_LEDGER_ID, primaryRepl.stateRootHash(DOMAIN_LEDGER_ID), primaryRepl.txnRootHash(DOMAIN_LEDGER_ID)) logger.debug("""Checking whether all the non primary replicas have received the pre-prepare request with same sequence number""") timeout = waits.expectedPrePrepareTime(len(nodeSet)) looper.run( eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1, timeout=timeout)) logger.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") timeout = waits.expectedPrepareTime(len(nodeSet)) looper.runFor(timeout) # expect prepare processing timeout # check if prepares have not been sent for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run( eventually(checkPrepareReqSent, npr, request2.identifier, request2.reqId, view_no, retryWait=1, timeout=timeout)) timeout = waits.expectedTransactionExecutionTime(len(nodeSet)) + c_delay result1 = \ looper.run(eventually(check_sufficient_replies_received, client1, request1.identifier, request1.reqId, retryWait=1, timeout=timeout)) logger.debug("request {} gives result {}".format(request1, result1))
def test_view_change_add_one_node_uncommitted_by_next_primary( looper, tdir, tconf, allPluginsPath, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward): # 1. Pre-requisites: viewNo=2, Primary is Node3 for viewNo in range(1, 3): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, viewNo) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=30) # 2. Add Steward for new Node new_steward_wallet_handle = sdk_add_new_nym(looper, sdk_pool_handle, sdk_wallet_steward, alias="testClientSteward" + randomString(3), role=STEWARD_STRING) # 3. Send txn to add Node5 # It will not be proposed and ordered by the current Primary, but will be proposed by the next one in the new view # Make sure that the request is propagated by the next Primary old_state_root_hash = txnPoolNodeSet[0].stateRootHash( ledgerId=POOL_LEDGER_ID, isCommitted=False) primary_node = getPrimaryReplica(txnPoolNodeSet).node next_primary = txnPoolNodeSet[-1] with delay_rules_without_processing(primary_node.nodeIbStasher, ppgDelay()): sdk_add_new_node(looper, sdk_pool_handle, new_steward_wallet_handle, new_node_name="Psi", tdir=tdir, tconf=tconf, allPluginsPath=allPluginsPath, autoStart=True, nodeClass=TestNode, do_post_node_creation=None, services=[VALIDATOR], wait_till_added=False) looper.run(eventually(check_node_txn_propagated, [next_primary])) check_node_txn_not_applied(txnPoolNodeSet, old_state_root_hash) # 4. Trigger view change to view # Make sure that only the next Primary (Node4) finishes View Change to view=3 slow_nodes = txnPoolNodeSet[:3] fast_nodes = [next_primary] slow_stashers = [slow_node.nodeIbStasher for slow_node in slow_nodes] with delay_rules_without_processing( slow_stashers, nv_delay(), msg_rep_delay(types_to_delay=[NEW_VIEW])): trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, 3) # view change is finished on Node4 only looper.run(eventually(check_view_change_done, fast_nodes, 3)) for n in slow_nodes: assert n.master_replica._consensus_data.waiting_for_new_view # wait till fast nodes apply the Node txn in the new View (Node4 creates a new batch with it) looper.run( eventually(check_node_txn_applied, fast_nodes, old_state_root_hash)) check_node_txn_not_applied(slow_nodes, old_state_root_hash) # 5. Trigger view change to view=4, and make sure it's finished properly trigger_view_change(txnPoolNodeSet) waitForViewChange(looper, txnPoolNodeSet, 4) ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=35) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle)