Exemplo n.º 1
0
def msgCountOK(nodesSize,
               faultyNodes,
               actualMessagesReceived,
               numOfMsgsWithZNF,
               numOfSufficientMsgs):
    if faultyNodes == 0:
        logging.info("With ZFN: "
                     "Actual Messages: {}, withZeroFaultRequired: {}, "
                     "sufficientRequired: {}".format(actualMessagesReceived,
                                                     numOfMsgsWithZNF,
                                                     numOfSufficientMsgs))
        return actualMessagesReceived == numOfMsgsWithZNF

    elif faultyNodes <= getMaxFailures(nodesSize):
        logging.info("With faults: "
                     "Actual Messages: {}, withZeroFaultRequired: {}, "
                     "sufficientRequired: {}".format(actualMessagesReceived,
                                                     numOfMsgsWithZNF,
                                                     numOfSufficientMsgs))
        return actualMessagesReceived >= numOfSufficientMsgs
    else:
        logging.info("With faults greater than system can tolerate: "
                     "Actual Messages: {}, withZeroFaultRequired: {}, "
                     "sufficientRequired: {}".format(actualMessagesReceived,
                                                     numOfMsgsWithZNF,
                                                     numOfSufficientMsgs))

        # Less than or equal to `numOfSufficientMsgs` since the faults may
        # not reduce the number of correct messages
        return actualMessagesReceived <= numOfSufficientMsgs
def testQueueingReqFromFutureView(delayedPerf, looper, nodeSet, up, client1):
    """
    Test if every node queues 3 Phase requests(PRE-PREPARE, PREPARE and COMMIT)
    that come from a view which is greater than the current view
    """

    f = getMaxFailures(nodeCount)

    # Delay processing of instance change on a node
    nodeA = nodeSet.Alpha
    nodeA.nodeIbStasher.delay(icDelay(60))

    nonPrimReps = getNonPrimaryReplicas(nodeSet, 0)
    # Delay processing of PRE-PREPARE from all non primary replicas of master
    # so master's throughput falls and view changes
    ppDelayer = ppDelay(5, 0)
    for r in nonPrimReps:
        r.node.nodeIbStasher.delay(ppDelayer)

    sendReqsToNodesAndVerifySuffReplies(looper, client1, 4, timeout=5 * nodeCount)

    # Every node except Node A should have a view change
    for node in nodeSet:
        if node.name != nodeA.name:
            looper.run(eventually(
                partial(checkViewChangeInitiatedForNode, node, 0),
                retryWait=1,
                timeout=20))

    # Node A's view should not have changed yet
    with pytest.raises(AssertionError):
        looper.run(eventually(partial(
            checkViewChangeInitiatedForNode, nodeA, 0),
            retryWait=1,
            timeout=20))

    # NodeA should not have any pending 3 phase request for a later view
    for r in nodeA.replicas:  # type: TestReplica
        assert len(r.threePhaseMsgsForLaterView) == 0

    # Reset delays on incoming messages from all nodes
    for node in nodeSet:
        node.nodeIbStasher.nodelay(ppDelayer)

    # Send one more request
    sendRandomRequest(client1)

    def checkPending3PhaseReqs():
        # Get all replicas that have their primary status decided
        reps = [rep for rep in nodeA.replicas if rep.isPrimary is not None]
        # Atleast one replica should have its primary status decided
        assert len(reps) > 0
        for r in reps:  # type: TestReplica
            logging.debug("primary status for replica {} is {}"
                          .format(r, r.primaryNames))
            assert len(r.threePhaseMsgsForLaterView) > 0

    # NodeA should now have pending 3 phase request for a later view
    looper.run(eventually(checkPending3PhaseReqs, retryWait=1, timeout=30))
Exemplo n.º 3
0
def testCorrectNumOfProtocolInstances(pool):
    fValue = getMaxFailures(len(pool.nodeSet))
    for node in pool.nodeSet:
        # num of protocol instances running on a node must be f + 1
        assert len(getProtocolInstanceNums(node)) == fValue + 1
        # There should be one running and up master Instance
        assert node.masterInst is not None
        # There should be exactly f non master instances
        assert len(node.nonMasterInsts) == fValue
Exemplo n.º 4
0
def sendReqsToNodesAndVerifySuffReplies(looper: Looper, client: TestClient,
                                        numReqs: int, timeout: float = None):
    nodeCount = len(client.nodeReg)
    f = getMaxFailures(nodeCount)
    requests = [sendRandomRequest(client) for i in range(numReqs)]
    for request in requests:
        looper.run(eventually(checkSufficientRepliesRecvd, client.inBox,
                              request.reqId, f,
                              retryWait=1, timeout=3 * nodeCount))
    return requests
Exemplo n.º 5
0
def testReplicasRejectSamePrePrepareMsg(looper, nodeSet, client1):
    """
    Replicas should not accept PRE-PREPARE for view "v" and prepare sequence
    number "n" if it has already accepted a request with view number "v" and
    sequence number "n"

    """
    numOfNodes = 4
    fValue = getMaxFailures(numOfNodes)
    request1 = sendRandomRequest(client1)
    result1 = looper.run(
        eventually(checkSufficientRepliesRecvd, client1.inBox,
                   request1.reqId, fValue,
                   retryWait=1, timeout=5))
    logging.debug("request {} gives result {}".format(request1, result1))
    primaryRepl = getPrimaryReplica(nodeSet)
    logging.debug("Primary Replica: {}".format(primaryRepl))
    logging.debug(
        "Decrementing the primary replica's pre-prepare sequence number by one...")
    primaryRepl.prePrepareSeqNo -= 1
    request2 = sendRandomRequest(client1)
    looper.run(eventually(checkPrePrepareReqSent, primaryRepl, request2,
                          retryWait=1, timeout=10))

    nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet)
    logging.debug("Non Primary Replicas: " + str(nonPrimaryReplicas))
    prePrepareReq = PrePrepare(
            primaryRepl.instId,
            primaryRepl.viewNo,
            primaryRepl.prePrepareSeqNo,
            client1.clientId,
            request2.reqId,
            request2.digest)

    logging.debug("""Checking whether all the non primary replicas have received
                the pre-prepare request with same sequence number""")
    looper.run(eventually(checkPrePrepareReqRecvd,
                          nonPrimaryReplicas,
                          prePrepareReq,
                          retryWait=1,
                          timeout=10))
    logging.debug("""Check that none of the non primary replicas didn't send any prepare message "
                             in response to the pre-prepare message""")
    for npr in nonPrimaryReplicas:
        with pytest.raises(AssertionError):
            looper.run(eventually(checkPrepareReqSent,
                                  npr,
                                  client1.clientId,
                                  request2.reqId,
                                  retryWait=1,
                                  timeout=10))
Exemplo n.º 6
0
def testCorrectNumOfReplicas(pool):
    fValue = getMaxFailures(len(pool.nodeSet))
    for node in pool.nodeSet:
        # num of replicas running on a single node must be f + 1
        assert len(node.replicas) == fValue + 1
        # num of primary nodes is <= 1
        numberOfPrimary = len([r for r in node.replicas if r.isPrimary])
        assert numberOfPrimary <= 1
        for instId in getProtocolInstanceNums(node):
            # num of replicas for a instance on a node must be 1
            assert len([node.replicas[instId]]) == 1 and \
                   node.replicas[instId].instId == instId
            # num of primary on every protocol instance is 1
            numberOfPrimary = len([node for node in pool.nodeSet
                                   if node.replicas[instId].isPrimary])
            assert numberOfPrimary == 1
Exemplo n.º 7
0
    def hasConsensus(self, reqId: int) -> Optional[str]:
        """
        Accepts a request ID and returns True if consensus was reached
        for the request or else False

        :param reqId: Request ID
        :return: bool
        """
        replies = self.getRepliesFromAllNodes(reqId)
        if not replies:
            raise KeyError(reqId)  # NOT_FOUND
        # Check if at least f+1 replies are received or not.
        f = getMaxFailures(len(self.nodeReg))
        if f + 1 > len(replies):
            return False  # UNCONFIRMED
        else:
            onlyResults = {frm: reply['result'] for frm, reply in
                           replies.items()}
            resultsList = list(onlyResults.values())
            # if all the elements in the resultList are equal - consensus
            # is reached.
            if all(result == resultsList[0] for result in resultsList):
                return resultsList[0]  # CONFIRMED
            else:
                logging.error(
                    "Received a different result from at least one of the nodes..")
                # Now we need to know the counts of different results and so.
                jsonResults = [json.dumps(result, sort_keys=True) for result in
                               resultsList]
                # counts dictionary for calculating the count of different
                # results
                counts = {}
                for jresult in jsonResults:
                    counts[jresult] = counts.get(jresult, 0) + 1
                if counts[max(counts, key=counts.get)] > f + 1:
                    # CONFIRMED, as f + 1 matching results found
                    return json.loads(max(counts, key=counts.get))
                else:
                    # UNCONFIRMED, as f + 1 matching results are not found
                    return False
Exemplo n.º 8
0
def checkCommited(looper, nodeSet, prepared1, instIds, faultyNodes=0):
    nodeCount = len((list(nodeSet)))
    f = getMaxFailures(nodeCount)

    def g(instId):
        allReplicas = getAllReplicas(nodeSet, instId)
        primaryReplica = getPrimaryReplica(nodeSet, instId)

        def replicasSeesCorrectNumOfCOMMITs():
            """
            num of commit messages must be = n when zero fault;
            n = num of nodes and greater than or equal to
            2f + 1 with faults.
            """
            passes = 0
            numOfMsgsWithZFN = nodeCount
            numOfMsgsWithFault = (2 * f) + 1

            key = (primaryReplica.viewNo, primaryReplica.prePrepareSeqNo)
            for r in allReplicas:
                if key in r.commits:
                    rcvdCommitRqst = r.commits[key]
                    assert rcvdCommitRqst[0] == prepared1.digest
                    actualMsgsReceived = len(rcvdCommitRqst[1])

                    passes += int(msgCountOK(nodeCount,
                                             faultyNodes,
                                             actualMsgsReceived,
                                             numOfMsgsWithZFN,
                                             numOfMsgsWithFault))

            assert passes >= len(allReplicas) - faultyNodes

        def replicasReceivesCorrectNumberOfCOMMITs():
            """
            num of commit messages seen by replica must be equal to n - 1;
            when zero fault and greater than or equal to
            2f+1 with faults.
            """
            passes = 0
            numOfMsgsWithZFN = nodeCount - 1
            numOfMsgsWithFault = 2 * f

            for r in allReplicas:
                args = getAllArgs(r, r.processCommit)
                actualMsgsReceived = len(args)

                passes += int(msgCountOK(nodeCount,
                                         faultyNodes,
                                         actualMsgsReceived,
                                         numOfMsgsWithZFN,
                                         numOfMsgsWithFault))

                for arg in args:
                    assert arg['commit'].viewNo == primaryReplica.viewNo and \
                           arg['commit'].ppSeqNo == primaryReplica.prePrepareSeqNo and \
                           arg['commit'].digest == prepared1.digest
                    assert r.name != arg['sender']

            assert passes >= len(allReplicas) - faultyNodes

        replicasReceivesCorrectNumberOfCOMMITs()
        replicasSeesCorrectNumOfCOMMITs()

    coros = [partial(g, instId) for instId in instIds]
    looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=60))
Exemplo n.º 9
0
def checkPrepared(looper, nodeSet, preprepared1, instIds, faultyNodes=0):
    nodeCount = len(list(nodeSet.nodes))
    f = getMaxFailures(nodeCount)

    def g(instId):
        allReplicas = getAllReplicas(nodeSet, instId)
        primary = getPrimaryReplica(nodeSet, instId)
        nonPrimaryReplicas = getNonPrimaryReplicas(nodeSet, instId)

        def primaryDontSendAnyPREPAREs():
            """
            1. no of PREPARE sent by primary should be 0
            """
            for r in allReplicas:
                for param in getAllArgs(r, Replica.processPrepare):
                    sender = param['sender']
                    assert sender != primary.name

        def allReplicasSeeCorrectNumberOfPREPAREs():
            """
            1. no of PREPARE received by replicas must be n - 1;
            n = num of nodes without fault, and greater than or equal to
             2f with faults.
            """
            passes = 0
            numOfMsgsWithZFN = nodeCount - 1
            numOfMsgsWithFaults = 2 * f

            for replica in allReplicas:
                key = primary.viewNo, primary.prePrepareSeqNo
                if key in replica.prepares:
                    actualMsgs = len(replica.prepares[key].voters)

                    passes += int(msgCountOK(nodeCount,
                                             faultyNodes,
                                             actualMsgs,
                                             numOfMsgsWithZFN,
                                             numOfMsgsWithFaults))
            assert passes >= len(allReplicas) - faultyNodes

        def primaryReceivesCorrectNumberOfPREPAREs():
            """
            num of PREPARE seen by primary replica is n - 1;
                n = num of nodes without fault, and greater than or equal to
             2f with faults.
            """
            actualMsgs = len([param for param in
                              getAllArgs(primary,
                                         primary.processPrepare)
                              if (param['prepare'].instId,
                                  param['prepare'].viewNo,
                                  param['prepare'].ppSeqNo) == (primary.instId,
                                                        primary.viewNo,
                                                        primary.prePrepareSeqNo) and
                              param['sender'] != primary.name])

            numOfMsgsWithZFN = nodeCount - 1
            numOfMsgsWithFaults = 2 * f - 1

            assert msgCountOK(nodeCount,
                              faultyNodes,
                              actualMsgs,
                              numOfMsgsWithZFN,
                              numOfMsgsWithFaults)
            # TODO what if the primary is faulty?

        def nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs():
            """
            num of PREPARE seen by Non primary replica is n - 2 without
            faults and 2f - 1 with faults.
            """
            passes = 0
            numOfMsgsWithZFN = nodeCount - 2
            numOfMsgsWithFaults = (2 * f) - 1

            for npr in nonPrimaryReplicas:
                actualMsgs = len([param for param in
                                  getAllArgs(
                                          npr,
                                          npr.processPrepare)
                                  if (param['prepare'].instId,
                                      param['prepare'].viewNo,
                                      param['prepare'].ppSeqNo) == (primary.instId,
                                                            primary.viewNo,
                                                            primary.prePrepareSeqNo)
                                  ])

                passes += int(msgCountOK(nodeCount,
                                         faultyNodes,
                                         actualMsgs,
                                         numOfMsgsWithZFN,
                                         numOfMsgsWithFaults))

            assert passes >= len(nonPrimaryReplicas) - faultyNodes
            # TODO how do we know if one of the faulty nodes is a primary or
            # not?

        primaryDontSendAnyPREPAREs()
        allReplicasSeeCorrectNumberOfPREPAREs()
        primaryReceivesCorrectNumberOfPREPAREs()
        nonPrimaryReplicasReceiveCorrectNumberOfPREPAREs()

    coros = [partial(g, instId) for instId in instIds]
    looper.run(eventuallyAll(*coros, retryWait=1, totalTimeout=30))
Exemplo n.º 10
0
def getRequiredInstances(nodeCount: int) -> int:
    f_value = getMaxFailures(nodeCount)
    return f_value + 1
Exemplo n.º 11
0
    def __init__(self,
                 name: str,
                 nodeRegistry: Dict[str, HA],
                 clientAuthNr: ClientAuthNr=None,
                 ha: HA=None,
                 cliname: str=None,
                 cliha: HA=None,
                 basedirpath: str=None,
                 primaryDecider: PrimaryDecider = None):
        """
        Create a new node.

        :param nodeRegistry: names and host addresses of all nodes in the pool
        :param clientAuthNr: client authenticator implementation to be used
        :param basedirpath: path to the base directory used by `nstack` and
            `cstack`
        :param primaryDecider: the mechanism to be used to decide the primary
        of a protocol instance
        """
        self.primaryDecider = primaryDecider
        me = nodeRegistry[name]

        self.allNodeNames = list(nodeRegistry.keys())
        if isinstance(me, NodeDetail):
            sha = me.ha
            scliname = me.cliname
            scliha = me.cliha
            nodeReg = {k: v.ha for k, v in nodeRegistry.items()}
        else:
            sha = me if isinstance(me, HA) else HA(*me)
            scliname = None
            scliha = None
            nodeReg = {k: HA(*v) for k, v in nodeRegistry.items()}
        if not ha:  # pull it from the registry
            ha = sha
        if not cliname:  # default to the name plus the suffix
            cliname = scliname if scliname else name + CLIENT_STACK_SUFFIX
        if not cliha:  # default to same ip, port + 1
            cliha = scliha if scliha else HA(ha[0], ha[1]+1)

        nstack = dict(name=name,
                      ha=ha,
                      main=True,
                      auto=AutoMode.never)

        cstack = dict(name=cliname,
                      ha=cliha,
                      main=True,
                      auto=AutoMode.always)

        if basedirpath:
            nstack['basedirpath'] = basedirpath
            cstack['basedirpath'] = basedirpath

        self.clientAuthNr = clientAuthNr or SimpleAuthNr()

        self.nodeInBox = deque()
        self.clientInBox = deque()
        self.created = time.perf_counter()

        HasActionQueue.__init__(self)
        NodeStacked.__init__(self, nstack, nodeReg)
        ClientStacked.__init__(self, cstack)
        Motor.__init__(self)
        Propagator.__init__(self)

        self.totalNodes = len(nodeRegistry)
        self.f = getMaxFailures(self.totalNodes)
        self.requiredNumberOfInstances = self.f + 1  # per RBFT
        self.minimumNodes = (2 * self.f) + 1  # minimum for a functional pool

        self.txnStore = TransactionStore()

        # Stores which protocol instance is master
        self._masterInst = None  # type: Optional[int]

        self.replicas = []  # type: List[replica.Replica]

        self.instanceChanges = InstanceChanges()

        self.viewNo = 0                             # type: int

        self.rank = self.getRank(self.name, nodeRegistry)

        self.elector = None  # type: PrimaryDecider

        self.forwardedRequests = set()  # type: Set[Tuple[(str, int)]]

        self.monitor = Monitor(.9, 60, 5)

        # Requests that are to be given to the replicas by the node. Each
        # element of the list is a deque for the replica with number equal to
        # its index in the list and each element of the deque is a named tuple
        self.msgsToReplicas = []  # type: List[deque]

        # Requests that are to be given to the elector by the node
        self.msgsToElector = deque()

        nodeRoutes = [(Propagate, self.processPropagate),
                      (InstanceChange, self.processInstanceChange)]

        nodeRoutes.extend((msgTyp, self.sendToElector) for msgTyp in
                          [Nomination, Primary, Reelection])

        nodeRoutes.extend((msgTyp, self.sendToReplica) for msgTyp in
                          [PrePrepare, Prepare, Commit])

        self.nodeMsgRouter = Router(*nodeRoutes)

        self.clientMsgRouter = Router((Request,
                                       self.processRequest))

        self.perfCheckFreq = 10

        self._schedule(self.checkPerformance, self.perfCheckFreq)

        self.clientBlacklister = SimpleBlacklister(
            self.name + CLIENT_BLACKLISTER_SUFFIX)  # type: Blacklister

        self.nodeBlacklister = SimpleBlacklister(
            self.name + NODE_BLACKLISTER_SUFFIX)  # type: Blacklister

        # BE CAREFUL HERE
        # This controls which message types are excluded from signature
        # verification. These are still subject to RAET's signature verification
        # but client signatures will not be checked on these. Expressly
        # prohibited from being in this is ClientRequest and Propagation,
        # which both require client signature verification
        self.authnWhitelist = (Nomination, Primary, Reelection,
                               Batch,
                               PrePrepare, Prepare,
                               Commit, InstanceChange)
        self.addReplicas()
Exemplo n.º 12
0
from raet.raeting import AutoMode
from zeno.test.eventually import eventually

from zeno.common.util import getMaxFailures
from zeno.server.node import Node
from zeno.test.helper import NotConnectedToAny
from zeno.test.helper import TestNodeSet, randomOperation, \
    checkLastClientReqForNode, \
    getRepliesFromClientInbox
from zeno.test.helper import checkResponseCorrectnessFromNodes
from zeno.test.helper import sendRandomRequest, genTestClient, \
    checkSufficientRepliesRecvd, assertLength

nodeCount = 7

F = getMaxFailures(nodeCount)

whitelist = ['signer not configured so not signing',
             'for EmptySignature',
             'discarding message']  # warnings


# noinspection PyIncorrectDocstring
def testGeneratedRequestSequencing(tdir_for_func):
    """
    Request ids must be generated in an increasing order
    """
    with TestNodeSet(count=4, tmpdir=tdir_for_func) as nodeSet:
        cli = genTestClient(nodeSet, tmpdir=tdir_for_func)
        operation = randomOperation()