def poolTxnReceived(self, msg: PoolLedgerTxns, frm): global t logger.debug("{} received pool txn {} from {}".format(self, msg, frm)) txn = getattr(msg, t) seqNo = get_seq_no(txn) if seqNo not in self.tempNodeTxns: self.tempNodeTxns[seqNo] = {} self.tempNodeTxns[seqNo][frm] = txn # If this is the next sequence number that should go into ledger, then # check if there are enough same transactions from different nodes if (seqNo - self.ledger.size) == 1: f = getMaxFailures(len(self.nodeReg)) if len(self.tempNodeTxns[seqNo]) > f: # TODO: Shouldnt this use `checkIfMoreThanFSameItems` txns = [item for item, count in collections.Counter( [json.dumps(_t, sort_keys=True) for _t in self.tempNodeTxns[seqNo].values()] ).items() if count > f] if len(txns) > 0: txn = json.loads(txns[0]) self.addToLedger(txn) self.tempNodeTxns.pop(seqNo) else: logger.error("{} has not got enough similar node " "transactions".format(self))
def checkRequest(cli, looper, operation): cName = "Joe" cli.enterCmd("new client {}".format(cName)) # Let client connect to the nodes looper.runFor(3) # Send request to all nodes cli.enterCmd('client {} send {}'.format(cName, operation)) client = cli.clients[cName] f = getMaxFailures(len(cli.nodes)) # Ensure client gets back the replies looper.run(eventually( checkSufficientRepliesRecvd, client.inBox, client.lastReqId, f, retryWait=2, timeout=30)) txn, status = client.getReply(client.lastReqId) # Ensure the cli shows appropriate output cli.enterCmd('client {} show {}'.format(cName, client.lastReqId)) printeds = cli.printeds printedReply = printeds[1] printedStatus = printeds[0] assert printedReply['msg'] == "Reply for the request: {{'txnId': '{}" \ "'}}".format(txn['txnId']) assert printedStatus['msg'] == "Status: {}".format(status)
def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, stewardWallet, allPluginsPath): """ Add 2 new nodes to trigger replica addition and primary election """ for nodeName in ("Zeta", "Eta"): newStewardName = "testClientSteward"+randomString(3) newSteward, newStewardWallet, newNode = addNewStewardAndNode(looper, steward1, stewardWallet, newStewardName, nodeName, tdirWithPoolTxns, tconf, allPluginsPath) txnPoolNodeSet.append(newNode) looper.run(checkNodesConnected(txnPoolNodeSet)) logger.debug("{} connected to the pool".format(newNode)) looper.run(eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=7)) f = getMaxFailures(len(txnPoolNodeSet)) def checkFValue(): for node in txnPoolNodeSet: assert node.f == f assert len(node.replicas) == (f + 1) looper.run(eventually(checkFValue, retryWait=1, timeout=5)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, timeout=5)
def checkRequest(cli, looper, operation): cName = "Joe" cli.enterCmd("new client {}".format(cName)) # Let client connect to the nodes looper.runFor(3) # Send request to all nodes cli.enterCmd('client {} send {}'.format(cName, operation)) client = cli.clients[cName] f = getMaxFailures(len(cli.nodes)) # Ensure client gets back the replies looper.run( eventually(checkSufficientRepliesRecvd, client.inBox, client.lastReqId, f, retryWait=2, timeout=30)) txn, status = client.getReply(client.lastReqId) # Ensure the cli shows appropriate output cli.enterCmd('client {} show {}'.format(cName, client.lastReqId)) printeds = cli.printeds printedReply = printeds[1] printedStatus = printeds[0] txnTimePattern = "\'txnTime\': \d+\.*\d*" txnIdPattern = "\'txnId\': '" + txn['txnId'] + "'" # txnPattern1 = "Reply for the request: \{" + timePattern + ", " + txnIdPattern + "\}" # txnPattern2 = "Reply for the request: \{" + txnIdPattern + ", " + timePattern + "\}" # assert re.match(txnPattern1, printedReply['msg']) or \ # re.match(txnPattern2, printedReply['msg']) assert re.search(txnIdPattern, printedReply['msg']) assert re.search(txnTimePattern, printedReply['msg']) assert printedStatus['msg'] == "Status: {}".format(status)
def testAdd2NewNodes(looper, txnPoolNodeSet, tdirWithPoolTxns, tconf, steward1, stewardWallet, allPluginsPath): """ Add 2 new nodes to trigger replica addition and primary election """ for nodeName in ("Zeta", "Eta"): newStewardName = "testClientSteward" + randomString(3) newSteward, newStewardWallet, newNode = addNewStewardAndNode( looper, steward1, stewardWallet, newStewardName, nodeName, tdirWithPoolTxns, tconf, allPluginsPath) txnPoolNodeSet.append(newNode) looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=30)) logger.debug("{} connected to the pool".format(newNode)) looper.run( eventually(checkNodeLedgersForEquality, newNode, *txnPoolNodeSet[:-1], retryWait=1, timeout=7)) f = getMaxFailures(len(txnPoolNodeSet)) def checkFValue(): for node in txnPoolNodeSet: assert node.f == f assert len(node.replicas) == (f + 1) looper.run(eventually(checkFValue, retryWait=1, timeout=5)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1, timeout=5)
def testLoggingTxnStateForValidRequest(looper, steward1, stewardWallet, logsearch): logsPropagate, _ = logsearch(levels=['INFO'], files=['propagator.py'], funcs=['propagate'], msgs=['propagating.*request.*from client']) logsOrdered, _ = logsearch(levels=['INFO'], files=['replica.py'], funcs=['order_3pc_key'], msgs=['ordered batch request']) logsCommited, _ = logsearch(levels=['INFO'], files=['node.py'], funcs=['executeBatch'], msgs=['committed batch request']) req, wallet = sendAddNewClient(None, "name", steward1, stewardWallet) nNodes = len(steward1.inBox) timeout = waits.expectedTransactionExecutionTime(nNodes) looper.run( eventually(checkSufficientRepliesReceived, steward1.inBox, req.reqId, getMaxFailures(nNodes), retryWait=1, timeout=timeout)) reqId = str(req.reqId) assert any(reqId in record.getMessage() for record in logsPropagate) assert any(reqId in record.getMessage() for record in logsOrdered) assert any(reqId in record.getMessage() for record in logsCommited)
def ensureConnectedToTestEnv(cli): if not cli.activeEnv: cli.enterCmd("connect test") timeout = waits.expectedClientConnectionTimeout( util.getMaxFailures(len(cli.nodeReg))) cli.looper.run( eventually(checkConnectedToEnv, cli, retryWait=1, timeout=timeout))
def test_start_vc_by_quorum_of_vc_msgs(view_change_service_builder, internal_bus, external_bus, validators, is_master): svc_queue = [] def svc_handler(msg: StartViewChange): svc_queue.append(msg) internal_bus.subscribe(StartViewChange, svc_handler) # Quorum for ViewChange message is N-f service = view_change_service_builder(validators[0]) proposed_view_no = 10 f = getMaxFailures(len(validators)) # Append N-f-1 ViewChange msgs to view_change_votes for validator in validators[1:-f]: msg = ViewChange(proposed_view_no, 0, [], [], []) service.process_view_change_message(msg, validator) # N-f-1 msgs is not enough for triggering view_change assert not svc_queue # Process the other one message service.process_view_change_message( ViewChange(proposed_view_no, 0, [], [], []), validators[-1]) if is_master: assert svc_queue assert isinstance(svc_queue[0], StartViewChange) assert svc_queue[0].view_no == proposed_view_no else: # ViewChange message isn't processed on backups assert not svc_queue
def checkRequest(cli, looper, operation): cName = "Joe" cli.enterCmd("new client {}".format(cName)) # Let client connect to the nodes looper.runFor(3) # Send request to all nodes cli.enterCmd('client {} send {}'.format(cName, operation)) client = cli.clients[cName] f = getMaxFailures(len(cli.nodes)) # Ensure client gets back the replies looper.run(eventually( checkSufficientRepliesRecvd, client.inBox, client.lastReqId, f, retryWait=2, timeout=30)) txn, status = client.getReply(client.lastReqId) # Ensure the cli shows appropriate output cli.enterCmd('client {} show {}'.format(cName, client.lastReqId)) printeds = cli.printeds printedReply = printeds[1] printedStatus = printeds[0] txnTimePattern = "\'txnTime\': \d+\.*\d*" txnIdPattern = "\'txnId\': '" + txn['txnId'] + "'" # txnPattern1 = "Reply for the request: \{" + timePattern + ", " + txnIdPattern + "\}" # txnPattern2 = "Reply for the request: \{" + txnIdPattern + ", " + timePattern + "\}" # assert re.match(txnPattern1, printedReply['msg']) or \ # re.match(txnPattern2, printedReply['msg']) assert re.search(txnIdPattern, printedReply['msg']) assert re.search(txnTimePattern, printedReply['msg']) assert printedStatus['msg'] == "Status: {}".format(status)
def canStartCatchUpProcess(self, ledgerType: int): recvdConsProof = self.recvdConsistencyProofs[ledgerType] # Consider an f value when this node was not connected adjustedF = getMaxFailures(self.owner.totalNodes - 1) if len(recvdConsProof) == (adjustedF+1): self.consistencyProofsTimers[ledgerType] = time.perf_counter() self._schedule(partial(self.checkIfCPsNeeded, ledgerType), self.config.ConsistencyProofsTimeout*( self.owner.totalNodes - 1)) if len(recvdConsProof) > 2*adjustedF: logger.debug("{} deciding on the basis of CPs {} and f {}". format(self, recvdConsProof, adjustedF)) grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) # If more than f nodes were found to be at the same state then this # node's state is good too if nullProofs > adjustedF: return True, None result = self._latestReliableProof(grpdPrf, self.ledgers[ledgerType]["ledger"]) return bool(result), (None if not result else ConsistencyProof( ledgerType, *result)) logger.debug("{} cannot start catchup since received only {} " "consistency proofs but need at least {}". format(self, len(recvdConsProof), 2*adjustedF + 1)) return False, None
def testStatusAfterAllNodesUp(cli, validNodeNames, createAllNodes): # Checking the output after command `status`. Testing the pool status here cli.enterCmd("status") printeds = cli.printeds clientStatus = printeds[4] fValue = printeds[3]['msg'] assert clientStatus['msg'] == "Clients: No clients are running. Try " \ "typing " \ "'new client <name>'." assert fValue == "f-value (number of possible faulty nodes): {}".format( getMaxFailures(len(validNodeNames))) for name in validNodeNames: # Checking the output after command `status node <name>`. Testing # the node status here cli.enterCmd("status node {}".format(name)) cli.looper.runFor(1) otherNodeNames = (set(validNodeNames) - { name, }) node = cli.nodes[name] cliLogs = list(cli.printeds) if node.hasPrimary: checkPrimaryLogs(node, cliLogs) else: checkNonPrimaryLogs(node, cliLogs) checkForNamedTokens(cli.printedTokens[1], otherNodeNames) if cli.clients: checkForNamedTokens(cli.printedTokens[1], cli.voidMsg)
def checkRequest(cli, operation): cName = "Joe" cli.enterCmd("new client {}".format(cName)) # Let client connect to the nodes cli.looper.run(eventually(checkClientConnected, cli, list(cli.nodes.keys()), cName, retryWait=1, timeout=5)) # Send request to all nodes createNewKeyring(cName, cli) cli.enterCmd("new key {}".format("testkey1")) assert "Key created in keyring {}".format(cName) in cli.lastCmdOutput cli.enterCmd("client {} send {}".format(cName, operation)) client = cli.clients[cName] wallet = cli.wallets[cName] # type: Wallet f = getMaxFailures(len(cli.nodes)) # Ensure client gets back the replies lastReqId = wallet._getIdData().lastReqId cli.looper.run(eventually(checkSufficientRepliesRecvd, client.inBox, lastReqId, f, retryWait=2, timeout=10)) txn, status = client.getReply(wallet.defaultId, lastReqId) # Ensure the cli shows appropriate output cli.enterCmd("client {} show {}".format(cName, lastReqId)) printeds = cli.printeds printedReply = printeds[1] printedStatus = printeds[0] # txnTimePattern = "'txnTime', \d+\.*\d*" # txnIdPattern = "'txnId', '" + txn['txnId'] + "'" txnTimePattern = "'txnTime': \d+\.*\d*" txnIdPattern = "'txnId': '" + txn["txnId"] + "'" assert re.search(txnIdPattern, printedReply["msg"]) assert re.search(txnTimePattern, printedReply["msg"]) assert printedStatus["msg"] == "Status: {}".format(status) return client, wallet
def poolTxnReceived(self, msg: PoolLedgerTxns, frm): logger.debug("{} received pool txn {} from {}".format(self, msg, frm)) txn = getattr(msg, t) seqNo = txn.pop(F.seqNo.name) if seqNo not in self.tempNodeTxns: self.tempNodeTxns[seqNo] = {} self.tempNodeTxns[seqNo][frm] = txn # If this is the next sequence number that should go into ledger, then # check if there are enough same transactions from different nodes if (seqNo - self.ledger.size) == 1: f = getMaxFailures(len(self.nodeReg)) if len(self.tempNodeTxns[seqNo]) > f: # TODO: Shouldnt this use `checkIfMoreThanFSameItems` txns = [item for item, count in collections.Counter( [json.dumps(t, sort_keys=True) for t in self.tempNodeTxns[seqNo].values()] ).items() if count > f] if len(txns) > 0: txn = json.loads(txns[0]) self.addToLedger(txn) if self.config.UPDATE_GENESIS_POOL_TXN_FILE: # Adding sequence number field since needed for safely # updating genesis file txn[F.seqNo.name] = len(self.ledger) updateGenesisPoolTxnFile(self.config.baseDir, self.config.poolTransactionsFile, txn) self.tempNodeTxns.pop(seqNo) else: logger.error("{} has not got enough similar node " "transactions".format(self))
def testStatusAfterAllNodesUp(cli, validNodeNames, createAllNodes): # Checking the output after command `status`. Testing the pool status here # waiting here for 5 seconds, So that after creating a node the whole output is printed first. cli.looper.runFor(5) cli.enterCmd("status") cli.looper.runFor(1) printeds = cli.printeds clientStatus = printeds[4] fValue = printeds[3]['msg'] assert clientStatus['msg'] == "Clients: No clients are running. Try " \ "typing " \ "'new client <name>'." assert fValue == "f-value (number of possible faulty nodes): {}".format( getMaxFailures(len(validNodeNames))) for name in validNodeNames: # Checking the output after command `status node <name>`. Testing # the node status here cli.enterCmd("status node {}".format(name)) cli.looper.runFor(1) otherNodeNames = (set(validNodeNames) - {name, }) node = cli.nodes[name] cliLogs = list(cli.printeds) if node.hasPrimary: checkPrimaryLogs(node, cliLogs) else: checkNonPrimaryLogs(node, cliLogs) checkForNamedTokens(cli.printedTokens[1], otherNodeNames) if cli.clients: checkForNamedTokens(cli.printedTokens[1], cli.voidMsg)
def getStatus(self): self.print('Nodes: ', newline=False) if not self.nodes: self.print("No nodes are running. Try typing 'new node <name>'.") else: self.printNames(self.nodes, newline=True) if not self.clients: clients = "No clients are running. Try typing 'new client <name>'." else: clients = ",".join(self.clients.keys()) self.print("Clients: " + clients) f = getMaxFailures(len(self.nodes)) self.print("f-value (number of possible faulty nodes): {}".format(f)) if f != 0 and len(self.nodes) >= 2 * f + 1: node = list(self.nodes.values())[0] mPrimary = node.replicas[node.instances.masterId].primaryName bPrimary = node.replicas[node.instances.backupIds[0]].primaryName self.print("Instances: {}".format(f + 1)) self.print(" Master (primary is on {})". format(Replica.getNodeName(mPrimary))) self.print(" Backup (primary is on {})". format(Replica.getNodeName(bPrimary))) else: self.print("Instances: " "Not enough nodes to create protocol instances")
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view """ f = getMaxFailures(nodeCount) # Delay processing of instance change on a node nodeA = nodeSet.Alpha nodeA.nodeIbStasher.delay(icDelay(60)) nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls and view changes ppDelayer = ppDelay(5, 0) for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelayer) sendReqsToNodesAndVerifySuffReplies(looper, client1, 4, timeout=5 * nodeCount) # Every node except Node A should have a view change for node in nodeSet: if node.name != nodeA.name: looper.run(eventually( partial(checkViewChangeInitiatedForNode, node, 0), retryWait=1, timeout=20)) # Node A's view should not have changed yet with pytest.raises(AssertionError): looper.run(eventually(partial( checkViewChangeInitiatedForNode, nodeA, 0), retryWait=1, timeout=20)) # NodeA should not have any pending 3 phase request for a later view for r in nodeA.replicas: # type: TestReplica assert len(r.threePhaseMsgsForLaterView) == 0 # Reset delays on incoming messages from all nodes for node in nodeSet: node.nodeIbStasher.nodelay(ppDelayer) # Send one more request sendRandomRequest(client1) def checkPending3PhaseReqs(): # Get all replicas that have their primary status decided reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] # Atleast one replica should have its primary status decided assert len(reps) > 0 for r in reps: # type: TestReplica logging.debug("primary status for replica {} is {}" .format(r, r.primaryNames)) assert len(r.threePhaseMsgsForLaterView) > 0 # NodeA should now have pending 3 phase request for a later view looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))
def canStartCatchUpProcess(self, ledgerType: int): recvdConsProof = self.recvdConsistencyProofs[ledgerType] # Consider an f value when this node was not connected adjustedF = getMaxFailures(self.owner.totalNodes - 1) if len(recvdConsProof) == (adjustedF + 1): self.consistencyProofsTimers[ledgerType] = time.perf_counter() self._schedule( partial(self.checkIfCPsNeeded, ledgerType), self.config.ConsistencyProofsTimeout * (self.owner.totalNodes - 1)) if len(recvdConsProof) > 2 * adjustedF: logger.debug("{} deciding on the basis of CPs {} and f {}".format( self, recvdConsProof, adjustedF)) grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) # If more than f nodes were found to be at the same state then this # node's state is good too if nullProofs > adjustedF: return True, None result = self._latestReliableProof( grpdPrf, self.ledgers[ledgerType]["ledger"]) return bool(result), (None if not result else ConsistencyProof( ledgerType, *result)) logger.debug("{} cannot start catchup since received only {} " "consistency proofs but need at least {}".format( self, len(recvdConsProof), 2 * adjustedF + 1)) return False, None
def poolTxnReceived(self, msg: PoolLedgerTxns, frm): logger.debug("{} received pool txn {} from {}".format(self, msg, frm)) txn = getattr(msg, t) seqNo = txn.pop(F.seqNo.name) if seqNo not in self.tempNodeTxns: self.tempNodeTxns[seqNo] = {} self.tempNodeTxns[seqNo][frm] = txn # If this is the next sequence number that should go into ledger, then # check if there are enough same transactions from different nodes if (seqNo - self.ledger.size) == 1: f = getMaxFailures(len(self.nodeReg)) if len(self.tempNodeTxns[seqNo]) > f: # TODO: Shouldnt this use `checkIfMoreThanFSameItems` txns = [ item for item, count in collections.Counter([ json.dumps(t, sort_keys=True) for t in self.tempNodeTxns[seqNo].values() ]).items() if count > f ] if len(txns) > 0: txn = json.loads(txns[0]) self.addToLedger(txn) if self.config.UpdateGenesisPoolTxnFile: # Adding sequence number field since needed for safely # updating genesis file txn[F.seqNo.name] = len(self.ledger) updateGenesisPoolTxnFile( self.config.baseDir, self.config.poolTransactionsFile, txn) self.tempNodeTxns.pop(seqNo) else: logger.error("{} has not got enough similar node " "transactions".format(self))
def test_process_checkpoint(checkpoint_service, checkpoint, pre_prepare, tconf, ordered, validators, is_master): global caught_msg caught_msg = None checkpoint_service._bus.subscribe(Cleanup, catch_msg) quorum = checkpoint_service._data.quorums.checkpoint.value n = len(validators) assert quorum == n - getMaxFailures(n) - 1 senders = ["sender{}".format(i) for i in range(quorum + 1)] key = (1, tconf.CHK_FREQ) old_key = (-1, 0) checkpoint_service._stash_checkpoint( Checkpoint(1, checkpoint.viewNo, 1, 1, "1"), "frm") checkpoint_service._stash_checkpoint( Checkpoint(1, checkpoint.viewNo + 1, 1, 1, "1"), "frm") checkpoint_service._checkpoint_state[old_key] = CheckpointState( 1, ["digest"] * (tconf.CHK_FREQ - 1), None, {}, False) checkpoint_service._checkpoint_state[key] = CheckpointState( key[1] - 1, ["digest"] * (tconf.CHK_FREQ - 1), None, {}, False) pre_prepare.ppSeqNo = key[1] ordered.ppSeqNo = pre_prepare.ppSeqNo checkpoint_service._data.preprepared.append(pre_prepare) checkpoint_service.process_ordered(ordered) _check_checkpoint(checkpoint_service, key[0], key[1], pre_prepare, check_shared_data=True) state = updateNamedTuple(checkpoint_service._checkpoint_state[key], digest=checkpoint.digest) checkpoint_service._checkpoint_state[key] = state for sender in senders[:quorum - 1]: assert checkpoint_service.process_checkpoint(checkpoint, sender) assert checkpoint_service._checkpoint_state[key].receivedDigests[ sender] == checkpoint.digest assert not checkpoint_service._checkpoint_state[key].isStable # send the last checkpoint to stable it assert checkpoint_service.process_checkpoint(checkpoint, senders[quorum - 1]) assert checkpoint_service._checkpoint_state[key].isStable # check _remove_stashed_checkpoints() assert checkpoint.viewNo not in checkpoint_service._stashed_recvd_checkpoints assert checkpoint.viewNo + 1 in checkpoint_service._stashed_recvd_checkpoints # check watermarks assert checkpoint_service._data.low_watermark == checkpoint.seqNoEnd # check that a Cleanup msg has been sent assert isinstance(caught_msg, Cleanup) assert caught_msg.cleanup_till_3pc == (checkpoint.viewNo, checkpoint.seqNoEnd) # check that old checkpoint_states has been removed assert old_key not in checkpoint_service._checkpoint_state
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): """ Replicas should not accept PRE-PREPARE for view "v" and prepare sequence number "n" if it has already accepted a request with view number "v" and sequence number "n" """ numOfNodes = 4 fValue = getMaxFailures(numOfNodes) request1 = sendRandomRequest(wallet1, client1) result1 = looper.run( eventually(checkSufficientRepliesRecvd, client1.inBox, request1.reqId, fValue, retryWait=1, timeout=5)) logger.debug("request {} gives result {}".format(request1, result1)) primaryRepl = getPrimaryReplica(nodeSet) logger.debug("Primary Replica: {}".format(primaryRepl)) logger.debug( "Decrementing the primary replica's pre-prepare sequence number by " "one...") primaryRepl.lastPrePrepareSeqNo -= 1 request2 = sendRandomRequest(wallet1, client1) looper.run( eventually(checkPrePrepareReqSent, primaryRepl, request2, retryWait=1, timeout=10)) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet) logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) prePrepareReq = PrePrepare(primaryRepl.instId, primaryRepl.viewNo, primaryRepl.lastPrePrepareSeqNo, wallet1.defaultId, request2.reqId, request2.digest, time.time()) logger.debug("""Checking whether all the non primary replicas have received the pre-prepare request with same sequence number""") looper.run( eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1, timeout=10)) logger.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run( eventually(checkPrepareReqSent, npr, wallet1.defaultId, request2.reqId, retryWait=1, timeout=10))
def ensureConnectedToTestEnv(cli): if not cli.activeEnv: cli.enterCmd("connect test") timeout = waits.expectedClientConnectionTimeout( util.getMaxFailures(len(cli.nodeReg)) ) cli.looper.run( eventually(checkConnectedToEnv, cli, retryWait=1, timeout=timeout))
def testCorrectNumOfProtocolInstances(pool): fValue = getMaxFailures(len(pool.nodeset)) for node in pool.nodeset: # num of protocol instances running on a node must be f + 1 assert len(getProtocolInstanceNums(node)) == fValue + 1 # There should be one running and up master Instance assert node.instances.masterId is not None # There should be exactly f non master instances assert len(node.instances.backupIds) == fValue
def hasConsensus(self, reqId: int): result = self.store.client.command("select hasConsensus from {} where " "{} = {}".format( REQ_DATA, f.REQ_ID.nm, reqId)) if result and result[0].oRecordData.get('hasConsensus'): replies = self.getReplies(reqId).values() fVal = getMaxFailures(len(list(replies))) return checkIfMoreThanFSameItems(replies, fVal) else: return False
def setPoolParams(self): nodeCount = len(self.nodeReg) self.f = getMaxFailures(nodeCount) self.minNodesToConnect = self.f + 1 self.totalNodes = nodeCount self.quorums = Quorums(nodeCount) logger.info("{} updated its pool parameters: f {}, totalNodes {}," "minNodesToConnect {}, quorums {}".format( self.alias, self.f, self.totalNodes, self.minNodesToConnect, self.quorums))
def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary): # prepare new ha for node and client stack subjectedNode = None stewardName = None stewardsSeed = None for nodeIndex, n in enumerate(txnPoolNodeSet): if (shouldBePrimary and n.primaryReplicaNo == 0) or \ (not shouldBePrimary and n.primaryReplicaNo != 0): subjectedNode = n stewardName = poolTxnStewardNames[nodeIndex] stewardsSeed = poolTxnData["seeds"][stewardName].encode() break nodeStackNewHA, clientStackNewHA = genHa(2) logger.debug("change HA for node: {} to {}". format(subjectedNode.name, (nodeStackNewHA, clientStackNewHA))) nodeSeed = poolTxnData["seeds"][subjectedNode.name].encode() # change HA stewardClient, req = changeHA(looper, tconf, subjectedNode.name, nodeSeed, nodeStackNewHA, stewardName, stewardsSeed) f = getMaxFailures(len(stewardClient.nodeReg)) looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, req.reqId, f, retryWait=1, timeout=20)) # stop node for which HA will be changed subjectedNode.stop() looper.removeProdable(subjectedNode) # start node with new HA restartedNode = TestNode(subjectedNode.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeStackNewHA, cliha=clientStackNewHA) looper.add(restartedNode) txnPoolNodeSet[nodeIndex] = restartedNode looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=70)) ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1, timeout=10) # start client and check the node HA anotherClient, _ = genTestClient(tmpdir=tdirWithPoolTxns, usePoolLedger=True) looper.add(anotherClient) looper.run(eventually(anotherClient.ensureConnectedToNodes)) stewardWallet = Wallet(stewardName) stewardWallet.addIdentifier(signer=SimpleSigner(seed=stewardsSeed)) sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, stewardClient, 8) looper.run(eventually(checkIfGenesisPoolTxnFileUpdated, *txnPoolNodeSet, stewardClient, anotherClient, retryWait=1, timeout=10)) looper.removeProdable(stewardClient)
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1, wallet1): """ Replicas should not accept PRE-PREPARE for view "v" and prepare sequence number "n" if it has already accepted a request with view number "v" and sequence number "n" """ numOfNodes = 4 fValue = getMaxFailures(numOfNodes) request1 = sendRandomRequest(wallet1, client1) result1 = looper.run( eventually(checkSufficientRepliesRecvd, client1.inBox, request1.reqId, fValue, retryWait=1, timeout=5)) logger.debug("request {} gives result {}".format(request1, result1)) primaryRepl = getPrimaryReplica(nodeSet) logger.debug("Primary Replica: {}".format(primaryRepl)) logger.debug( "Decrementing the primary replica's pre-prepare sequence number by " "one...") primaryRepl.lastPrePrepareSeqNo -= 1 request2 = sendRandomRequest(wallet1, client1) looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2, retryWait=1, timeout=10)) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet) logger.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) prePrepareReq = PrePrepare( primaryRepl.instId, primaryRepl.viewNo, primaryRepl.lastPrePrepareSeqNo, wallet1.defaultId, request2.reqId, request2.digest, time.time() ) logger.debug("""Checking whether all the non primary replicas have received the pre-prepare request with same sequence number""") looper.run(eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1, timeout=10)) logger.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run(eventually(checkPrepareReqSent, npr, wallet1.defaultId, request2.reqId, retryWait=1, timeout=10))
def _reliableProofs(self, groupedProofs): adjustedF = getMaxFailures(self.owner.totalNodes - 1) result = {} for (start, end), val in groupedProofs.items(): for (oldRoot, newRoot, hashes), count in val.items(): if count > adjustedF: result[(start, end)] = (oldRoot, newRoot, hashes) # There would be only one correct proof for a range of # sequence numbers break return result
def isRequestSuccessful(self, identifier, reqId): acks = self.reqRepStore.getAcks(identifier, reqId) nacks = self.reqRepStore.getNacks(identifier, reqId) f = getMaxFailures(len(self.nodeReg)) if len(acks) > f: return True, "Done" elif len(nacks) > f: # TODO: What if the the nacks were different from each node? return False, list(nacks.values())[0] else: return None
def test_process_checkpoint(checkpoint_service, checkpoint, pre_prepare, tconf, ordered, validators, is_master): checkpoint_stabilized_handler = Mock() checkpoint_service._bus.subscribe(CheckpointStabilized, checkpoint_stabilized_handler) quorum = checkpoint_service._data.quorums.checkpoint.value n = len(validators) assert quorum == n - getMaxFailures(n) - 1 senders = ["sender{}".format(i) for i in range(quorum + 1)] till_seq_no = tconf.CHK_FREQ checkpoint_service._received_checkpoints[cp_key(checkpoint.viewNo, 1)] = {"frm"} # For now, on checkpoint stabilization phase all checkpoints # with ppSeqNo less then stable_checkpoint will be removed checkpoint_service._received_checkpoints[cp_key( checkpoint.viewNo + 1, till_seq_no + 100)] = {"frm"} pre_prepare.ppSeqNo = till_seq_no pre_prepare.auditTxnRootHash = cp_digest(till_seq_no) ordered.ppSeqNo = pre_prepare.ppSeqNo ordered.auditTxnRootHash = pre_prepare.auditTxnRootHash checkpoint_service._data.preprepared.append( preprepare_to_batch_id(pre_prepare)) checkpoint_service.process_ordered(ordered) _check_checkpoint(checkpoint_service, till_seq_no, pre_prepare, check_shared_data=True) for sender in senders[:quorum - 1]: checkpoint_service.process_checkpoint(checkpoint, sender) assert checkpoint_service._data.stable_checkpoint < till_seq_no # send the last checkpoint to stable it checkpoint_service.process_checkpoint(checkpoint, senders[quorum - 1]) assert checkpoint_service._data.stable_checkpoint == till_seq_no # check _remove_stashed_checkpoints() assert sum(1 for cp in checkpoint_service._received_checkpoints if cp.view_no == checkpoint.viewNo) == 0 assert sum(1 for cp in checkpoint_service._received_checkpoints if cp.view_no == checkpoint.viewNo + 1) > 0 # check watermarks assert checkpoint_service._data.low_watermark == checkpoint.seqNoEnd # check that a Cleanup msg has been sent checkpoint_stabilized_handler.assert_called_once_with( CheckpointStabilized(inst_id=checkpoint_service._data.inst_id, last_stable_3pc=(checkpoint.viewNo, checkpoint.seqNoEnd)))
def isRequestSuccessful(self, reqId): acks = self.reqRepStore.getAcks(reqId) nacks = self.reqRepStore.getNacks(reqId) f = getMaxFailures(len(self.nodeReg)) if len(acks) > f: return True, "Done" elif len(nacks) > f: # TODO: What if the the nacks were different from each node? return False, list(nacks.values())[0] else: return None
def changeNodeHa(looper, txnPoolNodeSet, tdirWithPoolTxns, poolTxnData, poolTxnStewardNames, tconf, shouldBePrimary): # prepare new ha for node and client stack subjectedNode = None stewardName = None stewardsSeed = None for nodeIndex, n in enumerate(txnPoolNodeSet): if (shouldBePrimary and n.primaryReplicaNo == 0) or \ (not shouldBePrimary and n.primaryReplicaNo != 0): subjectedNode = n stewardName = poolTxnStewardNames[nodeIndex] stewardsSeed = poolTxnData["seeds"][stewardName].encode() break nodeStackNewHA, clientStackNewHA = genHa(2) logger.debug("change HA for node: {} to {}". format(subjectedNode.name, (nodeStackNewHA, clientStackNewHA))) nodeSeed = poolTxnData["seeds"][subjectedNode.name].encode() # change HA stewardClient, req = changeHA(looper, tconf, subjectedNode.name, nodeSeed, nodeStackNewHA, stewardName, stewardsSeed) f = getMaxFailures(len(stewardClient.nodeReg)) looper.run(eventually(checkSufficientRepliesRecvd, stewardClient.inBox, req.reqId, f, retryWait=1, timeout=20)) # stop node for which HA will be changed subjectedNode.stop() looper.removeProdable(subjectedNode) # start node with new HA restartedNode = TestNode(subjectedNode.name, basedirpath=tdirWithPoolTxns, config=tconf, ha=nodeStackNewHA, cliha=clientStackNewHA) looper.add(restartedNode) txnPoolNodeSet[nodeIndex] = restartedNode looper.run(checkNodesConnected(txnPoolNodeSet, overrideTimeout=70)) ensureElectionsDone(looper, txnPoolNodeSet, retryWait=1, timeout=10) # start client and check the node HA anotherClient, _ = genTestClient(tmpdir=tdirWithPoolTxns, usePoolLedger=True) looper.add(anotherClient) looper.run(eventually(anotherClient.ensureConnectedToNodes)) stewardWallet = Wallet(stewardName) stewardWallet.addIdentifier(signer=SimpleSigner(seed=stewardsSeed)) sendReqsToNodesAndVerifySuffReplies(looper, stewardWallet, stewardClient, 8) looper.run(eventually(checkIfGenesisPoolTxnFileUpdated, *txnPoolNodeSet, stewardClient, anotherClient, retryWait=1, timeout=10))
def checkSufficientRepliesForRequests(looper, client, requests, fVal=None, timeoutPerReq=None): nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) timeoutPerReq = timeoutPerReq or 5 * nodeCount coros = [] for request in requests: coros.append(partial(checkSufficientRepliesRecvd, client.inBox, request.reqId, fVal)) looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=timeoutPerReq * len(requests)))
def msgCountOK(nodesSize, faultyNodes, actualMessagesReceived, numOfMsgsWithZNF, numOfSufficientMsgs): if faultyNodes == 0: return actualMessagesReceived == numOfMsgsWithZNF elif faultyNodes <= getMaxFailures(nodesSize): return actualMessagesReceived >= numOfSufficientMsgs else: # Less than or equal to `numOfSufficientMsgs` since the faults may # not reduce the number of correct messages return actualMessagesReceived <= numOfSufficientMsgs
def test_start_catchup_on_quorum_of_stashed_checkpoints( checkpoint_service, checkpoint, pre_prepare, tconf, ordered, validators, is_master): master_catchup_handler = Mock() backup_catchup_handler = Mock() checkpoint_service._bus.subscribe(NeedMasterCatchup, master_catchup_handler) checkpoint_service._bus.subscribe(NeedBackupCatchup, backup_catchup_handler) def check_catchup_not_started(): master_catchup_handler.assert_not_called() backup_catchup_handler.assert_not_called() def check_catchup_started(till_seq_no: int): if is_master: master_catchup_handler.assert_called_once_with(NeedMasterCatchup()) backup_catchup_handler.assert_not_called() else: master_catchup_handler.assert_not_called() backup_catchup_handler.assert_called_once_with( NeedBackupCatchup( inst_id=checkpoint_service._data.inst_id, caught_up_till_3pc=(checkpoint_service.view_no, till_seq_no))) quorum = checkpoint_service._data.quorums.checkpoint.value n = len(validators) assert quorum == n - getMaxFailures(n) - 1 senders = ["sender{}".format(i) for i in range(quorum + 1)] till_seq_no = 2 * tconf.CHK_FREQ new_checkpoint = Checkpoint(instId=ordered.instId, viewNo=ordered.viewNo, seqNoStart=0, seqNoEnd=till_seq_no, digest=cp_digest(till_seq_no)) key = checkpoint_service._checkpoint_key(checkpoint) for sender in senders[:quorum]: checkpoint_service.process_checkpoint(checkpoint, sender) assert sender in checkpoint_service._received_checkpoints[key] check_catchup_not_started() new_key = checkpoint_service._checkpoint_key(new_checkpoint) for sender in senders[:quorum - 1]: checkpoint_service.process_checkpoint(new_checkpoint, sender) assert sender in checkpoint_service._received_checkpoints[new_key] check_catchup_not_started() checkpoint_service.process_checkpoint(new_checkpoint, senders[quorum - 1]) check_catchup_started(till_seq_no)
def testClientSendingSameRequestAgainBeforeFirstIsProcessed(looper, nodeSet, up, wallet1, client1): size = len(client1.inBox) req = sendRandomRequest(wallet1, client1) client1.submitReqs(req) f = getMaxFailures(len(nodeSet)) looper.run(eventually( checkSufficientRepliesRecvd, client1.inBox, req.reqId, f, retryWait=1, timeout=3 * len(nodeSet))) # Only REQACK will be sent twice by the node but not REPLY assert len(client1.inBox) == size + 12
def sendReqsToNodesAndVerifySuffReplies(looper: Looper, wallet: Wallet, client: TestClient, numReqs: int, fVal: int=None, timeoutPerReq: float=None): nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) timeoutPerReq = timeoutPerReq or 5 * nodeCount requests = sendRandomRequests(wallet, client, numReqs) checkSufficientRepliesForRequests(looper, client, requests, fVal, timeoutPerReq) return requests
def newNodeAdded(be, do, poolNodesStarted, philCli, newStewardCli, connectedToTest): be(philCli) if not philCli._isConnectedToAnyEnv(): do('connect test', within=3, expect=connectedToTest) be(newStewardCli) doNodeCmd(do) newNodeData = vals["newNodeData"] def checkClientConnected(client): name = newNodeData[ALIAS] + CLIENT_STACK_SUFFIX assert name in client.nodeReg def checkNodeConnected(nodes): for node in nodes: name = newNodeData[ALIAS] assert name in node.nodeReg timeout = waits.expectedClientToPoolConnectionTimeout( util.getMaxFailures(len(philCli.nodeReg))) newStewardCli.looper.run( eventually(checkClientConnected, newStewardCli.activeClient, timeout=timeout)) timeout = waits.expectedClientToPoolConnectionTimeout( util.getMaxFailures(len(philCli.nodeReg))) philCli.looper.run( eventually(checkClientConnected, philCli.activeClient, timeout=timeout)) timeout = waits.expectedClientToPoolConnectionTimeout( util.getMaxFailures(len(philCli.nodeReg))) poolNodesStarted.looper.run( eventually(checkNodeConnected, list(poolNodesStarted.nodes.values()), timeout=timeout)) return vals
def post_batch_applied(self, three_pc_batch: ThreePcBatch, prev_handler_result=None): node_reg = list(self.node.nodeReg.keys()) number_of_inst = getMaxFailures(len(node_reg)) + 1 view_no = self.node.viewNo if three_pc_batch.original_view_no is None else three_pc_batch.original_view_no validators = TxnPoolManager.calc_node_names_ordered_by_rank( node_reg, copy.deepcopy(self.node.nodeIds)) three_pc_batch.primaries = self.node.primaries_selector.select_primaries( view_no=view_no, instance_count=number_of_inst, validators=validators) return three_pc_batch.primaries
def sendReqsToNodesAndVerifySuffReplies(looper: Looper, client: TestClient, numReqs: int, fVal: int=None, timeout: float=None): nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) timeout = timeout or 3 * nodeCount requests = sendRandomRequests(client, numReqs) for request in requests: looper.run(eventually(checkSufficientRepliesRecvd, client.inBox, request.reqId, fVal, retryWait=1, timeout=timeout)) return requests
def waitForSufficientRepliesForRequests( looper, client, *, # To force usage of names requests=None, requestIds=None, fVal=None, customTimeoutPerReq=None, add_delay_to_timeout: float = 0, override_timeout_limit=False, total_timeout=None): """ Checks number of replies for given requests of specific client and raises exception if quorum not reached at least for one :requests: list of requests; mutually exclusive with 'requestIds' :requestIds: list of request ids; mutually exclusive with 'requests' :returns: nothing """ if requests is not None and requestIds is not None: raise ValueError("Args 'requests' and 'requestIds' are " "mutually exclusive") requestIds = requestIds or [request.reqId for request in requests] nodeCount = len(client.nodeReg) fVal = fVal or getMaxFailures(nodeCount) if not total_timeout: timeoutPerRequest = customTimeoutPerReq or \ waits.expectedTransactionExecutionTime(nodeCount) timeoutPerRequest += add_delay_to_timeout # here we try to take into account what timeout for execution # N request - total_timeout should be in # timeoutPerRequest < total_timeout < timeoutPerRequest * N # we cannot just take (timeoutPerRequest * N) because it is so huge. # (for timeoutPerRequest=5 and N=10, total_timeout=50sec) # lets start with some simple formula: total_timeout = (1 + len(requestIds) / 10) * timeoutPerRequest coros = [] for requestId in requestIds: coros.append( partial(checkSufficientRepliesReceived, client.inBox, requestId, fVal)) chk_all_funcs(looper, coros, retry_wait=1, timeout=total_timeout, override_eventually_timeout=override_timeout_limit)
def __init__(self, n): f = getMaxFailures(n) self.f = f self.propagate = Quorum(f + 1) self.prepare = Quorum(n - f - 1) self.commit = Quorum(n - f) self.reply = Quorum(f + 1) self.view_change = Quorum(n - f) self.election = Quorum(n - f) self.view_change_done = Quorum(n - f) self.same_consistency_proof = Quorum(f + 1) self.consistency_proof = Quorum(f + 1) self.ledger_status = Quorum(f + 1) self.checkpoint = Quorum(2 * f)
def __init__(self, name: str, validators: List[str], primary_name: str, timer: TimerService, bus: InternalBus, network: ExternalBus, write_manager: WriteRequestManager, bls_bft_replica: BlsBftReplica = None): # ToDo: Maybe ConsensusSharedData should be initiated before and passed already prepared? self._data = ConsensusSharedData(name, validators, 0) self._data.primary_name = generateName(primary_name, self._data.inst_id) self.config = getConfig() self.stasher = StashingRouter(self.config.REPLICA_STASH_LIMIT, buses=[bus, network]) self._write_manager = write_manager self._orderer = OrderingService( data=self._data, timer=timer, bus=bus, network=network, write_manager=self._write_manager, bls_bft_replica=bls_bft_replica, freshness_checker=FreshnessChecker( freshness_timeout=self.config.STATE_FRESHNESS_UPDATE_INTERVAL), stasher=self.stasher) self._orderer._validator = OrderingServiceMsgValidator( self._orderer._data) self._checkpointer = CheckpointService(self._data, bus, network, self.stasher, write_manager.database_manager) self._view_changer = ViewChangeService(self._data, timer, bus, network, self.stasher) self._message_requestor = MessageReqService(self._data, bus, network) self._add_ledgers() # TODO: This is just for testing purposes only self._data.checkpoints.append( Checkpoint(instId=0, viewNo=0, seqNoStart=0, seqNoEnd=0, digest='4F7BsTMVPKFshM1MwLf6y23cid6fL3xMpazVoF9krzUw')) # ToDo: it should be done in Zero-view stage. self._data.primaries = self._view_changer._primaries_selector.select_primaries( self._data.view_no, getMaxFailures(len(validators)) + 1, validators)
def __init__(self, clientId: str, nodeReg: Dict[str, HA]=None, ha: Union[HA, Tuple[str, int]]=None, lastReqId: int = 0, signer: Signer=None, basedirpath: str=None): """ Creates a new client. :param clientId: unique identifier for the client :param nodeReg: names and host addresses of all nodes in the pool :param lastReqId: Request Id of the last request sent by client :param stack: node stack or dictionary of node constructor kwargs :param signer: Helper for signer (defines sign method) """ self.clientId = clientId self.lastReqId = lastReqId self._clientStack = None self.minimumNodes = getMaxFailures(len(nodeReg)) + 1 cha = ha if isinstance(ha, HA) else HA(*ha) stackargs = dict(name=clientId, ha=cha, main=False, # stops incoming vacuous joins auto=AutoMode.always) if basedirpath: stackargs['basedirpath'] = basedirpath self.created = time.perf_counter() NodeStacked.__init__(self, stackParams=stackargs, nodeReg=nodeReg) logger.info("Client initialized with the following node registry:") lengths = [max(x) for x in zip(*[ (len(name), len(host), len(str(port))) for name, (host, port) in nodeReg.items()])] fmt = " {{:<{}}} listens at {{:<{}}} on port {{:>{}}}".format( *lengths) for name, (host, port) in nodeReg.items(): logger.info(fmt.format(name, host, port)) Motor.__init__(self) self.inBox = deque() self.signer = signer if signer else SimpleSigner(self.clientId) self.connectNicelyUntil = 0 # don't need to connect nicely as a client
def testClientSendingSameRequestAgainBeforeFirstIsProcessed( looper, nodeSet, up, wallet1, client1): size = len(client1.inBox) req = sendRandomRequest(wallet1, client1) client1.submitReqs(req) f = getMaxFailures(len(nodeSet)) looper.run( eventually(checkSufficientRepliesRecvd, client1.inBox, req.reqId, f, retryWait=1, timeout=3 * len(nodeSet))) # Only REQACK will be sent twice by the node but not REPLY assert len(client1.inBox) == size + 12
def test_start_catchup_on_quorum_of_stashed_checkpoints( checkpoint_service, checkpoint, pre_prepare, tconf, ordered, validators, is_master): master_catchup_handler = Mock() backup_catchup_handler = Mock() checkpoint_service._bus.subscribe(NeedMasterCatchup, master_catchup_handler) checkpoint_service._bus.subscribe(NeedBackupCatchup, backup_catchup_handler) quorum = checkpoint_service._data.quorums.checkpoint.value print(quorum) n = len(validators) assert quorum == n - getMaxFailures(n) - 1 senders = ["sender{}".format(i) for i in range(quorum + 1)] old_key = (1, tconf.CHK_FREQ) key = (old_key[1] + 1, old_key[1] + tconf.CHK_FREQ) new_checkpoint = Checkpoint(instId=ordered.instId, viewNo=ordered.viewNo, seqNoStart=key[0], seqNoEnd=key[1], digest=cp_digest(1, 1)) for sender in senders[:quorum]: assert not checkpoint_service._do_process_checkpoint( checkpoint, sender) assert checkpoint_service._stashed_recvd_checkpoints[ checkpoint.viewNo][old_key][sender] == checkpoint for sender in senders[:quorum - 1]: assert not checkpoint_service._do_process_checkpoint( new_checkpoint, sender) assert checkpoint_service._stashed_recvd_checkpoints[ checkpoint.viewNo][key][sender] == new_checkpoint assert not checkpoint_service._do_process_checkpoint( new_checkpoint, senders[quorum - 1]) if is_master: assert checkpoint_service._data.low_watermark == key[1] master_catchup_handler.assert_called_once_with(NeedMasterCatchup()) else: backup_catchup_handler.assert_called_once_with( NeedBackupCatchup(inst_id=checkpoint_service._data.inst_id, caught_up_till_3pc=(checkpoint_service.view_no, key[1])))
def testCorrectNumOfReplicas(pool): fValue = getMaxFailures(len(pool.nodeset)) for node in pool.nodeset: # num of replicas running on a single node must be f + 1 assert len(node.replicas) == fValue + 1 # num of primary nodes is <= 1 numberOfPrimary = len([r for r in node.replicas if r.isPrimary]) assert numberOfPrimary <= 1 for instId in getProtocolInstanceNums(node): # num of replicas for a instance on a node must be 1 assert len([node.replicas[instId]]) == 1 and \ node.replicas[instId].instId == instId # num of primary on every protocol instance is 1 numberOfPrimary = len([node for node in pool.nodeset if node.replicas[instId].isPrimary]) assert numberOfPrimary == 1
def replied1(looper, nodeSet, client1, committed1, wallet1, faultyNodes): def checkOrderedCount(): instances = getNoInstances(len(nodeSet)) resp = [requestReturnedToNode(node, wallet1.defaultId, committed1.reqId, instId) for node in nodeSet for instId in range(instances)] assert resp.count(True) >= (len(nodeSet) - faultyNodes)*instances looper.run(eventually(checkOrderedCount, retryWait=1, timeout=30)) looper.run(eventually( checkSufficientRepliesRecvd, client1.inBox, committed1.reqId, getMaxFailures(len(nodeSet)), retryWait=2, timeout=30)) return committed1
def checkIfCPsNeeded(self, ledgerType): if self.consistencyProofsTimers[ledgerType] is not None: logger.debug("{} requesting consistency proofs after timeout".format(self)) adjustedF = getMaxFailures(self.owner.totalNodes - 1) recvdConsProof = self.recvdConsistencyProofs[ledgerType] grpdPrf, nullProofs = self._groupConsistencyProofs(recvdConsProof) if nullProofs > adjustedF: return result = self._latestReliableProof(grpdPrf, self.ledgers[ledgerType][ "ledger"]) if not result: cpReq = self.getConsistencyProofRequest(ledgerType, grpdPrf) logger.debug("{} sending consistency proof request: {}". format(self, cpReq)) self.send(cpReq) self.recvdConsistencyProofs[ledgerType] = {} self.consistencyProofsTimers[ledgerType] = None
async def checkReplies(self, reqs, org=None, retryWait=.25, timeout=None, ratchetSteps=10): org = org if org else self.actor if not isinstance(reqs, Iterable): reqs = [reqs] nodeCount = sum(1 for _ in self.nodes) f = getMaxFailures(nodeCount) corogen = (eventually(waitForSufficientRepliesForRequests, org.client.inBox, r.reqId, f, retryWait=retryWait, timeout=timeout, ratchetSteps=ratchetSteps) for r in reqs) return await runall(corogen)
def hasConsensus(self, reqId: int) -> Optional[str]: """ Accepts a request ID and returns True if consensus was reached for the request or else False :param reqId: Request ID :return: bool """ replies = self.getRepliesFromAllNodes(reqId) if not replies: raise KeyError(reqId) # NOT_FOUND # Check if at least f+1 replies are received or not. f = getMaxFailures(len(self.nodeReg)) if f + 1 > len(replies): return False # UNCONFIRMED else: onlyResults = {frm: reply['result'] for frm, reply in replies.items()} resultsList = list(onlyResults.values()) # if all the elements in the resultList are equal - consensus # is reached. if all(result == resultsList[0] for result in resultsList): return resultsList[0] # CONFIRMED else: logging.error( "Received a different result from at least one of the nodes..") # Now we need to know the counts of different results and so. jsonResults = [json.dumps(result, sort_keys=True) for result in resultsList] # counts dictionary for calculating the count of different # results counts = {} for jresult in jsonResults: counts[jresult] = counts.get(jresult, 0) + 1 if counts[max(counts, key=counts.get)] > f + 1: # CONFIRMED, as f + 1 matching results found return json.loads(max(counts, key=counts.get)) else: # UNCONFIRMED, as f + 1 matching results are not found return False
def newNodeAdded(be, do, poolNodesStarted, philCli, newStewardCli, newNodeVals): be(philCli) if not philCli._isConnectedToAnyEnv(): connect_and_check_output(do, philCli.txn_dir) be(newStewardCli) doSendNodeCmd(do, newNodeVals) newNodeData = newNodeVals["newNodeData"] def checkClientConnected(client): name = newNodeData[ALIAS] + CLIENT_STACK_SUFFIX assert name in client.nodeReg def checkNodeConnected(nodes): for node in nodes: name = newNodeData[ALIAS] assert name in node.nodeReg timeout = waits.expectedClientToPoolConnectionTimeout( util.getMaxFailures(len(philCli.nodeReg)) ) newStewardCli.looper.run(eventually(checkClientConnected, newStewardCli.activeClient, timeout=timeout)) philCli.looper.run(eventually(checkClientConnected, philCli.activeClient, timeout=timeout)) poolNodesStarted.looper.run( eventually( checkNodeConnected, list( poolNodesStarted.nodes.values()), timeout=timeout)) return newNodeVals
def __init__(self, n): f = getMaxFailures(n) self.f = f self.weak = Quorum(f + 1) self.strong = Quorum(n - f) self.propagate = Quorum(f + 1) self.prepare = Quorum(n - f - 1) self.commit = Quorum(n - f) self.reply = Quorum(f + 1) self.view_change = Quorum(n - f) self.election = Quorum(n - f) self.view_change_done = Quorum(n - f) self.propagate_primary = Quorum(f + 1) self.same_consistency_proof = Quorum(f + 1) self.consistency_proof = Quorum(f + 1) self.ledger_status = Quorum(n - f - 1) self.ledger_status_last_3PC = Quorum(f + 1) self.checkpoint = Quorum(n - f - 1) self.timestamp = Quorum(f + 1) self.bls_signatures = Quorum(n - f) self.observer_data = Quorum(f + 1) self.backup_instance_faulty = Quorum(f + 1)
def testAdd2NewNodes(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath): """ Add 2 new nodes to trigger replica addition and primary election """ new_nodes = sdk_add_2_nodes(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tdir, tconf, allPluginsPath) for n in new_nodes: logger.debug("{} connected to the pool".format(n)) f = getMaxFailures(len(txnPoolNodeSet)) def checkFValue(): for node in txnPoolNodeSet: assert node.f == f assert len(node.replicas) == (f + 1) timeout = waits.expectedClientToPoolConnectionTimeout(len(txnPoolNodeSet)) looper.run(eventually(checkFValue, retryWait=1, timeout=timeout)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) sdk_pool_refresh(looper, sdk_pool_handle)
def testLogFiltering(cli, validNodeNames, createAllNodes): msg = '{"Hello": "There"}' client, wallet = checkRequest(cli, msg) x = client.handleOneNodeMsg def handleOneNodeMsg(self, wrappedMsg, excludeFromCli=None): return x(wrappedMsg, excludeFromCli=True) client.handleOneNodeMsg = types.MethodType(handleOneNodeMsg, client) client.nodestack.msgHandler = client.handleOneNodeMsg msg = '{"Hello": "Where"}' cli.enterCmd("client {} send {}".format(client.name, msg)) cli.looper.run( eventually( checkSufficientRepliesRecvd, client.inBox, wallet._getIdData().lastReqId, getMaxFailures(len(cli.nodes)), retryWait=2, timeout=10, ) ) assert "got msg from node" not in cli.lastCmdOutput
def f(self): return getMaxFailures(len(self.nodes))
def getRequiredInstances(nodeCount: int) -> int: f_value = getMaxFailures(nodeCount) return f_value + 1