def msgCountOK(nodesSize, faultyNodes, actualMessagesReceived, numOfMsgsWithZNF, numOfSufficientMsgs): if faultyNodes == 0: logging.info("With ZFN: " "Actual Messages: {}, withZeroFaultRequired: {}, " "sufficientRequired: {}".format(actualMessagesReceived, numOfMsgsWithZNF, numOfSufficientMsgs)) return actualMessagesReceived == numOfMsgsWithZNF elif faultyNodes <= getMaxFailures(nodesSize): logging.info("With faults: " "Actual Messages: {}, withZeroFaultRequired: {}, " "sufficientRequired: {}".format(actualMessagesReceived, numOfMsgsWithZNF, numOfSufficientMsgs)) return actualMessagesReceived >= numOfSufficientMsgs else: logging.info("With faults greater than system can tolerate: " "Actual Messages: {}, withZeroFaultRequired: {}, " "sufficientRequired: {}".format(actualMessagesReceived, numOfMsgsWithZNF, numOfSufficientMsgs)) # Less than or equal to `numOfSufficientMsgs` since the faults may # not reduce the number of correct messages return actualMessagesReceived <= numOfSufficientMsgs
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, client1): """ Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT) that come from a view which is greater than the current view """ f = getMaxFailures(nodeCount) # Delay processing of instance change on a node nodeA = nodeSet.Alpha nodeA.nodeIbStasher.delay(icDelay(60)) nonPrimReps = getNonPrimaryReplicas(nodeSet, 0) # Delay processing of PRE-PREPARE from all non primary replicas of master # so master's throughput falls and view changes ppDelayer = ppDelay(5, 0) for r in nonPrimReps: r.node.nodeIbStasher.delay(ppDelayer) sendReqsToNodesAndVerifySuffReplies(looper, client1, 4, timeout=5 * nodeCount) # Every node except Node A should have a view change for node in nodeSet: if node.name != nodeA.name: looper.run(eventually( partial(checkViewChangeInitiatedForNode, node, 0), retryWait=1, timeout=20)) # Node A's view should not have changed yet with pytest.raises(AssertionError): looper.run(eventually(partial( checkViewChangeInitiatedForNode, nodeA, 0), retryWait=1, timeout=20)) # NodeA should not have any pending 3 phase request for a later view for r in nodeA.replicas: # type: TestReplica assert len(r.threePhaseMsgsForLaterView) == 0 # Reset delays on incoming messages from all nodes for node in nodeSet: node.nodeIbStasher.nodelay(ppDelayer) # Send one more request sendRandomRequest(client1) def checkPending3PhaseReqs(): # Get all replicas that have their primary status decided reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None] # Atleast one replica should have its primary status decided assert len(reps) > 0 for r in reps: # type: TestReplica logging.debug("primary status for replica {} is {}" .format(r, r.primaryNames)) assert len(r.threePhaseMsgsForLaterView) > 0 # NodeA should now have pending 3 phase request for a later view looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))
def testCorrectNumOfProtocolInstances(pool): fValue = getMaxFailures(len(pool.nodeSet)) for node in pool.nodeSet: # num of protocol instances running on a node must be f + 1 assert len(getProtocolInstanceNums(node)) == fValue + 1 # There should be one running and up master Instance assert node.masterInst is not None # There should be exactly f non master instances assert len(node.nonMasterInsts) == fValue
def sendReqsToNodesAndVerifySuffReplies(looper: Looper, client: TestClient, numReqs: int, timeout: float = None): nodeCount = len(client.nodeReg) f = getMaxFailures(nodeCount) requests = [sendRandomRequest(client) for i in range(numReqs)] for request in requests: looper.run(eventually(checkSufficientRepliesRecvd, client.inBox, request.reqId, f, retryWait=1, timeout=3 * nodeCount)) return requests
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1): """ Replicas should not accept PRE-PREPARE for view "v" and prepare sequence number "n" if it has already accepted a request with view number "v" and sequence number "n" """ numOfNodes = 4 fValue = getMaxFailures(numOfNodes) request1 = sendRandomRequest(client1) result1 = looper.run( eventually(checkSufficientRepliesRecvd, client1.inBox, request1.reqId, fValue, retryWait=1, timeout=5)) logging.debug("request {} gives result {}".format(request1, result1)) primaryRepl = getPrimaryReplica(nodeSet) logging.debug("Primary Replica: {}".format(primaryRepl)) logging.debug( "Decrementing the primary replica's pre-prepare sequence number by one...") primaryRepl.prePrepareSeqNo -= 1 request2 = sendRandomRequest(client1) looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2, retryWait=1, timeout=10)) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet) logging.debug("Non Primary Replicas: " + str(nonPrimaryReplicas)) prePrepareReq = PrePrepare( primaryRepl.instId, primaryRepl.viewNo, primaryRepl.prePrepareSeqNo, client1.clientId, request2.reqId, request2.digest) logging.debug("""Checking whether all the non primary replicas have received the pre-prepare request with same sequence number""") looper.run(eventually(checkPrePrepareReqRecvd, nonPrimaryReplicas, prePrepareReq, retryWait=1, timeout=10)) logging.debug("""Check that none of the non primary replicas didn't send any prepare message " in response to the pre-prepare message""") for npr in nonPrimaryReplicas: with pytest.raises(AssertionError): looper.run(eventually(checkPrepareReqSent, npr, client1.clientId, request2.reqId, retryWait=1, timeout=10))
def testCorrectNumOfReplicas(pool): fValue = getMaxFailures(len(pool.nodeSet)) for node in pool.nodeSet: # num of replicas running on a single node must be f + 1 assert len(node.replicas) == fValue + 1 # num of primary nodes is <= 1 numberOfPrimary = len([r for r in node.replicas if r.isPrimary]) assert numberOfPrimary <= 1 for instId in getProtocolInstanceNums(node): # num of replicas for a instance on a node must be 1 assert len([node.replicas[instId]]) == 1 and \ node.replicas[instId].instId == instId # num of primary on every protocol instance is 1 numberOfPrimary = len([node for node in pool.nodeSet if node.replicas[instId].isPrimary]) assert numberOfPrimary == 1
def hasConsensus(self, reqId: int) -> Optional[str]: """ Accepts a request ID and returns True if consensus was reached for the request or else False :param reqId: Request ID :return: bool """ replies = self.getRepliesFromAllNodes(reqId) if not replies: raise KeyError(reqId) # NOT_FOUND # Check if at least f+1 replies are received or not. f = getMaxFailures(len(self.nodeReg)) if f + 1 > len(replies): return False # UNCONFIRMED else: onlyResults = {frm: reply['result'] for frm, reply in replies.items()} resultsList = list(onlyResults.values()) # if all the elements in the resultList are equal - consensus # is reached. if all(result == resultsList[0] for result in resultsList): return resultsList[0] # CONFIRMED else: logging.error( "Received a different result from at least one of the nodes..") # Now we need to know the counts of different results and so. jsonResults = [json.dumps(result, sort_keys=True) for result in resultsList] # counts dictionary for calculating the count of different # results counts = {} for jresult in jsonResults: counts[jresult] = counts.get(jresult, 0) + 1 if counts[max(counts, key=counts.get)] > f + 1: # CONFIRMED, as f + 1 matching results found return json.loads(max(counts, key=counts.get)) else: # UNCONFIRMED, as f + 1 matching results are not found return False
def checkCommited(looper, nodeSet, prepared1, instIds, faultyNodes=0): nodeCount = len((list(nodeSet))) f = getMaxFailures(nodeCount) def g(instId): allReplicas = getAllReplicas(nodeSet, instId) primaryReplica = getPrimaryReplica(nodeSet, instId) def replicasSeesCorrectNumOfCOMMITs(): """ num of commit messages must be = n when zero fault; n = num of nodes and greater than or equal to 2f + 1 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount numOfMsgsWithFault = (2 * f) + 1 key = (primaryReplica.viewNo, primaryReplica.prePrepareSeqNo) for r in allReplicas: if key in r.commits: rcvdCommitRqst = r.commits[key] assert rcvdCommitRqst[0] == prepared1.digest actualMsgsReceived = len(rcvdCommitRqst[1]) passes += int(msgCountOK(nodeCount, faultyNodes, actualMsgsReceived, numOfMsgsWithZFN, numOfMsgsWithFault)) assert passes >= len(allReplicas) - faultyNodes def replicasReceivesCorrectNumberOfCOMMITs(): """ num of commit messages seen by replica must be equal to n - 1; when zero fault and greater than or equal to 2f+1 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 1 numOfMsgsWithFault = 2 * f for r in allReplicas: args = getAllArgs(r, r.processCommit) actualMsgsReceived = len(args) passes += int(msgCountOK(nodeCount, faultyNodes, actualMsgsReceived, numOfMsgsWithZFN, numOfMsgsWithFault)) for arg in args: assert arg['commit'].viewNo == primaryReplica.viewNo and \ arg['commit'].ppSeqNo == primaryReplica.prePrepareSeqNo and \ arg['commit'].digest == prepared1.digest assert r.name != arg['sender'] assert passes >= len(allReplicas) - faultyNodes replicasReceivesCorrectNumberOfCOMMITs() replicasSeesCorrectNumOfCOMMITs() coros = [partial(g, instId) for instId in instIds] looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=60))
def checkPrepared(looper, nodeSet, preprepared1, instIds, faultyNodes=0): nodeCount = len(list(nodeSet.nodes)) f = getMaxFailures(nodeCount) def g(instId): allReplicas = getAllReplicas(nodeSet, instId) primary = getPrimaryReplica(nodeSet, instId) nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId) def primaryDontSendAnyPREPAREs(): """ 1. no of PREPARE sent by primary should be 0 """ for r in allReplicas: for param in getAllArgs(r, Replica.processPrepare): sender = param['sender'] assert sender != primary.name def allReplicasSeeCorrectNumberOfPREPAREs(): """ 1. no of PREPARE received by replicas must be n - 1; n = num of nodes without fault, and greater than or equal to 2f with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 1 numOfMsgsWithFaults = 2 * f for replica in allReplicas: key = primary.viewNo, primary.prePrepareSeqNo if key in replica.prepares: actualMsgs = len(replica.prepares[key].voters) passes += int(msgCountOK(nodeCount, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(allReplicas) - faultyNodes def primaryReceivesCorrectNumberOfPREPAREs(): """ num of PREPARE seen by primary replica is n - 1; n = num of nodes without fault, and greater than or equal to 2f with faults. """ actualMsgs = len([param for param in getAllArgs(primary, primary.processPrepare) if (param['prepare'].instId, param['prepare'].viewNo, param['prepare'].ppSeqNo) == (primary.instId, primary.viewNo, primary.prePrepareSeqNo) and param['sender'] != primary.name]) numOfMsgsWithZFN = nodeCount - 1 numOfMsgsWithFaults = 2 * f - 1 assert msgCountOK(nodeCount, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults) # TODO what if the primary is faulty? def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs(): """ num of PREPARE seen by Non primary replica is n - 2 without faults and 2f - 1 with faults. """ passes = 0 numOfMsgsWithZFN = nodeCount - 2 numOfMsgsWithFaults = (2 * f) - 1 for npr in nonPrimaryReplicas: actualMsgs = len([param for param in getAllArgs( npr, npr.processPrepare) if (param['prepare'].instId, param['prepare'].viewNo, param['prepare'].ppSeqNo) == (primary.instId, primary.viewNo, primary.prePrepareSeqNo) ]) passes += int(msgCountOK(nodeCount, faultyNodes, actualMsgs, numOfMsgsWithZFN, numOfMsgsWithFaults)) assert passes >= len(nonPrimaryReplicas) - faultyNodes # TODO how do we know if one of the faulty nodes is a primary or # not? primaryDontSendAnyPREPAREs() allReplicasSeeCorrectNumberOfPREPAREs() primaryReceivesCorrectNumberOfPREPAREs() nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs() coros = [partial(g, instId) for instId in instIds] looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=30))
def getRequiredInstances(nodeCount: int) -> int: f_value = getMaxFailures(nodeCount) return f_value + 1
def __init__(self, name: str, nodeRegistry: Dict[str, HA], clientAuthNr: ClientAuthNr=None, ha: HA=None, cliname: str=None, cliha: HA=None, basedirpath: str=None, primaryDecider: PrimaryDecider = None): """ Create a new node. :param nodeRegistry: names and host addresses of all nodes in the pool :param clientAuthNr: client authenticator implementation to be used :param basedirpath: path to the base directory used by `nstack` and `cstack` :param primaryDecider: the mechanism to be used to decide the primary of a protocol instance """ self.primaryDecider = primaryDecider me = nodeRegistry[name] self.allNodeNames = list(nodeRegistry.keys()) if isinstance(me, NodeDetail): sha = me.ha scliname = me.cliname scliha = me.cliha nodeReg = {k: v.ha for k, v in nodeRegistry.items()} else: sha = me if isinstance(me, HA) else HA(*me) scliname = None scliha = None nodeReg = {k: HA(*v) for k, v in nodeRegistry.items()} if not ha: # pull it from the registry ha = sha if not cliname: # default to the name plus the suffix cliname = scliname if scliname else name + CLIENT_STACK_SUFFIX if not cliha: # default to same ip, port + 1 cliha = scliha if scliha else HA(ha[0], ha[1]+1) nstack = dict(name=name, ha=ha, main=True, auto=AutoMode.never) cstack = dict(name=cliname, ha=cliha, main=True, auto=AutoMode.always) if basedirpath: nstack['basedirpath'] = basedirpath cstack['basedirpath'] = basedirpath self.clientAuthNr = clientAuthNr or SimpleAuthNr() self.nodeInBox = deque() self.clientInBox = deque() self.created = time.perf_counter() HasActionQueue.__init__(self) NodeStacked.__init__(self, nstack, nodeReg) ClientStacked.__init__(self, cstack) Motor.__init__(self) Propagator.__init__(self) self.totalNodes = len(nodeRegistry) self.f = getMaxFailures(self.totalNodes) self.requiredNumberOfInstances = self.f + 1 # per RBFT self.minimumNodes = (2 * self.f) + 1 # minimum for a functional pool self.txnStore = TransactionStore() # Stores which protocol instance is master self._masterInst = None # type: Optional[int] self.replicas = [] # type: List[replica.Replica] self.instanceChanges = InstanceChanges() self.viewNo = 0 # type: int self.rank = self.getRank(self.name, nodeRegistry) self.elector = None # type: PrimaryDecider self.forwardedRequests = set() # type: Set[Tuple[(str, int)]] self.monitor = Monitor(.9, 60, 5) # Requests that are to be given to the replicas by the node. Each # element of the list is a deque for the replica with number equal to # its index in the list and each element of the deque is a named tuple self.msgsToReplicas = [] # type: List[deque] # Requests that are to be given to the elector by the node self.msgsToElector = deque() nodeRoutes = [(Propagate, self.processPropagate), (InstanceChange, self.processInstanceChange)] nodeRoutes.extend((msgTyp, self.sendToElector) for msgTyp in [Nomination, Primary, Reelection]) nodeRoutes.extend((msgTyp, self.sendToReplica) for msgTyp in [PrePrepare, Prepare, Commit]) self.nodeMsgRouter = Router(*nodeRoutes) self.clientMsgRouter = Router((Request, self.processRequest)) self.perfCheckFreq = 10 self._schedule(self.checkPerformance, self.perfCheckFreq) self.clientBlacklister = SimpleBlacklister( self.name + CLIENT_BLACKLISTER_SUFFIX) # type: Blacklister self.nodeBlacklister = SimpleBlacklister( self.name + NODE_BLACKLISTER_SUFFIX) # type: Blacklister # BE CAREFUL HERE # This controls which message types are excluded from signature # verification. These are still subject to RAET's signature verification # but client signatures will not be checked on these. Expressly # prohibited from being in this is ClientRequest and Propagation, # which both require client signature verification self.authnWhitelist = (Nomination, Primary, Reelection, Batch, PrePrepare, Prepare, Commit, InstanceChange) self.addReplicas()
from raet.raeting import AutoMode from zeno.test.eventually import eventually from zeno.common.util import getMaxFailures from zeno.server.node import Node from zeno.test.helper import NotConnectedToAny from zeno.test.helper import TestNodeSet, randomOperation, \ checkLastClientReqForNode, \ getRepliesFromClientInbox from zeno.test.helper import checkResponseCorrectnessFromNodes from zeno.test.helper import sendRandomRequest, genTestClient, \ checkSufficientRepliesRecvd, assertLength nodeCount = 7 F = getMaxFailures(nodeCount) whitelist = ['signer not configured so not signing', 'for EmptySignature', 'discarding message'] # warnings # noinspection PyIncorrectDocstring def testGeneratedRequestSequencing(tdir_for_func): """ Request ids must be generated in an increasing order """ with TestNodeSet(count=4, tmpdir=tdir_for_func) as nodeSet: cli = genTestClient(nodeSet, tmpdir=tdir_for_func) operation = randomOperation()