示例#1
0
def test_node_requests_missing_three_phase_messages(looper, txnPoolNodeSet,
                                                    wallet1, client1Connected):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPARES and PREPREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 10
    MISSING_REQS_CNT = 1
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1,
                                              client1Connected, INIT_REQS_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                txnPoolNodeSet,
                                                node,
                                                stopNode=False)

    sendRandomRequests(wallet1, client1Connected, MISSING_REQS_CNT)

    def check_pp_out_of_sync(alive_nodes, disconnected_nodes):
        def get_last_pp(node):
            return node.replicas._master_replica.lastPrePrepare

        last_3pc_key_alive = get_last_pp(alive_nodes[0])
        for node in alive_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_alive

        last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0])
        assert last_3pc_key_diconnected != last_3pc_key_alive
        for node in disconnected_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_diconnected

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes,
                   disconnected_nodes,
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in disconnected_nodes:
        reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node)

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1,
                                              client1Connected,
                                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def testClientConnectToRestartedNodes(looper, txnPoolNodeSet,
                                      tdir, tconf,
                                      poolTxnNodeNames, allPluginsPath,
                                      sdk_wallet_new_client,
                                      sdk_pool_handle):
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_new_client, 1)
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    txnPoolNodeSet = []
    for nm in poolTxnNodeNames:
        config_helper = PNodeConfigHelper(nm, tconf, chroot=tdir)
        node = TestNode(nm,
                        config_helper=config_helper,
                        config=tconf, pluginPaths=allPluginsPath)
        looper.add(node)
        txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    def chk():
        for node in txnPoolNodeSet:
            assert node.isParticipating

    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))
    looper.run(eventually(chk, retryWait=1, timeout=timeout))

    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_new_client, sdk_pool_handle)
示例#3
0
def test_node_load_after_add_then_disconnect(sdk_new_node_caught_up, txnPoolNodeSet,
                                             tconf, looper, sdk_pool_handle,
                                             sdk_wallet_client,
                                             tdirWithPoolTxns, allPluginsPath,
                                             capsys):
    """
    A node that restarts after some transactions should eventually get the
    transactions which happened while it was down
    :return:
    """
    new_node = sdk_new_node_caught_up
    with capsys.disabled():
        print("Stopping node {} with pool ledger size {}".
              format(new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node)
    looper.removeProdable(new_node)

    client_batches = 80
    txns_per_batch = 10
    for i in range(client_batches):
        s = perf_counter()
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, txns_per_batch)
        with capsys.disabled():
            print('{} executed {} client txns in {:.2f} seconds'.
                  format(i + 1, txns_per_batch, perf_counter() - s))

    with capsys.disabled():
        print("Starting the stopped node, {}".format(new_node))
    nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha)
    new_node = TestNode(
        new_node.name,
        basedirpath=tdirWithPoolTxns,
        base_data_dir=tdirWithPoolTxns,
        config=tconf,
        ha=nodeHa,
        cliha=nodeCHa,
        pluginPaths=allPluginsPath)
    looper.add(new_node)
    txnPoolNodeSet[-1] = new_node

    # Delay catchup reply processing so LedgerState does not change
    delay_catchup_reply = 5
    new_node.nodeIbStasher.delay(cr_delay(delay_catchup_reply))
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Make sure ledger starts syncing (sufficient consistency proofs received)
    looper.run(eventually(check_ledger_state, new_node, DOMAIN_LEDGER_ID,
                          LedgerState.syncing, retryWait=.5, timeout=5))

    # Not accurate timeout but a conservative one
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \
              2 * delay_catchup_reply
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4],
                         customTimeout=timeout)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 5)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:4])
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client):
    """
    Scenario -> A client sends requests, some nodes delay COMMITs to few
    specific nodes such some nodes achieve commit quorum later for those
    requests compared to other nodes. But all nodes `ORDER` request in the same
    order of ppSeqNos
    https://www.pivotaltracker.com/n/projects/1889887/stories/133655009
    """
    pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \
                   getNonPrimaryReplicas(txnPoolNodeSet, instId=0)
    assert len(replicas) == 6

    rep0 = pr
    rep1 = replicas[0]
    rep2 = replicas[1]
    rep3 = replicas[2]
    rep4 = replicas[3]
    rep5 = replicas[4]
    rep6 = replicas[5]

    node0 = rep0.node
    node1 = rep1.node
    node2 = rep2.node
    node3 = rep3.node
    node4 = rep4.node
    node5 = rep5.node
    node6 = rep6.node

    ppSeqsToDelay = 5
    commitDelay = 3  # delay each COMMIT by this number of seconds
    delayedPpSeqNos = set()

    requestCount = 10

    def specificCommits(wrappedMsg):
        nonlocal node3, node4, node5
        msg, sender = wrappedMsg
        if isinstance(msg, PrePrepare):
            if len(delayedPpSeqNos) < ppSeqsToDelay:
                delayedPpSeqNos.add(msg.ppSeqNo)
                logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo))
        if isinstance(msg, Commit) and msg.instId == 0 and \
                sender in (n.name for n in (node3, node4, node5)) and \
                msg.ppSeqNo in delayedPpSeqNos:
            return commitDelay

    for node in (node1, node2):
        logger.debug('{} would be delaying commits'.format(node))
        node.nodeIbStasher.delay(specificCommits)

    sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, requestCount)

    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))

    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet, custom_timeout=timeout)

    sdk_get_and_check_replies(looper, sdk_reqs)
def testNonPrimaryRecvs3PhaseMessageOutsideWatermarks(chkFreqPatched, looper,
                                                      txnPoolNodeSet, client1,
                                                      wallet1,
                                                      client1Connected,
                                                      reqs_for_logsize):
    """
    A node is slow in processing PRE-PREPAREs and PREPAREs such that lot of
    requests happen and the slow node has started getting 3 phase messages
    outside of it watermarks. Check that it queues up requests outside watermarks and once it
    has received stable checkpoint it processes more requests. It sends other
    nodes 3 phase messages older than their stable checkpoint so they should
    discard them.
    """
    delay = 15
    instId = 1
    reqsToSend = reqs_for_logsize + 2
    npr = getNonPrimaryReplicas(txnPoolNodeSet, instId)
    slowReplica = npr[0]
    slowNode = slowReplica.node
    slowNode.nodeIbStasher.delay(ppDelay(delay, instId))
    slowNode.nodeIbStasher.delay(pDelay(delay, instId))

    def discardCounts(replicas, pat):
        counts = {}
        for r in replicas:
            counts[r.name] = countDiscarded(r, pat)
        return counts

    oldStashCount = slowReplica.spylog.count(
        TestReplica.stashOutsideWatermarks.__name__)
    oldDiscardCounts = discardCounts(
        [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode],
        'achieved stable checkpoint')

    sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, reqsToSend,
                                        1)
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))
    looper.run(
        eventually(checkNodeDataForEquality,
                   slowNode,
                   *[_ for _ in txnPoolNodeSet if _ != slowNode],
                   retryWait=1,
                   timeout=timeout))
    newStashCount = slowReplica.spylog.count(
        TestReplica.stashOutsideWatermarks.__name__)
    assert newStashCount > oldStashCount

    def chk():
        counts = discardCounts(
            [n.replicas[instId] for n in txnPoolNodeSet if n != slowNode],
            'achieved stable checkpoint')
        for nm, count in counts.items():
            assert count > oldDiscardCounts[nm]

    timeout = waits.expectedNodeToNodeMessageDeliveryTime() * \
        len(txnPoolNodeSet) + delay
    looper.run(eventually(chk, retryWait=1, timeout=timeout))
示例#6
0
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet,
                           sdk_node_created_after_some_txns,
                           sdk_wallet_client):
    """
    A newly joined node is catching up and sends catchup requests to other
    nodes but one of the nodes does not reply and the newly joined node cannot
    complete the process till the timeout and then requests the missing
    transactions.
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = sdk_node_created_after_some_txns
    new_node_ledger = new_node.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID]
    old_size = len(new_node_ledger.ledger)
    old_size_others = txnPoolNodeSet[0].ledgerManager.ledgerRegistry[
        DOMAIN_LEDGER_ID].ledger.size

    # So nodes wont tell the clients about the newly joined node so they
    # dont send any request to the newly joined node
    for node in txnPoolNodeSet:
        node.sendPoolInfoToClients = types.MethodType(lambda x, y: None, node)

    def ignoreCatchupReq(self, req, frm):
        logger.info("{} being malicious and ignoring catchup request {} "
                    "from {}".format(self, req, frm))

    # One of the node does not process catchup request.
    npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    badReplica = npr[0]
    badNode = badReplica.node
    txnPoolNodeSet.append(new_node)

    badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType(
        ignoreCatchupReq, badNode.ledgerManager)
    more_requests = 10
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             more_requests)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Since one of the nodes does not reply, this new node will experience a
    # timeout and retry catchup requests, hence a long test timeout.
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \
              reduced_catchup_timeout_conf.CatchupTransactionsTimeout
    waitNodeDataEquality(looper,
                         new_node,
                         *txnPoolNodeSet[:-1],
                         customTimeout=timeout)
    new_size = len(new_node_ledger.ledger)

    # The new node ledger might catchup some transactions from the batch of
    # `more_request` transactions
    assert old_size_others - \
           old_size <= new_node_ledger.num_txns_caught_up <= new_size - old_size
    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2)
    waitNodeDataEquality(looper,
                         new_node,
                         *txnPoolNodeSet[:-1],
                         customTimeout=timeout)
示例#7
0
def ensureClientConnectedToNodesAndPoolLedgerSame(looper, client: TestClient,
                                                  *nodes: TestNode):
    looper.run(client.ensureConnectedToNodes())
    timeout = waits.expectedPoolGetReadyTimeout(len(nodes))
    looper.run(
        eventually(checkClientPoolLedgerSameAsNodes,
                   client,
                   *nodes,
                   retryWait=.5,
                   timeout=timeout))
示例#8
0
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet,
                           looper, tdir, tconf, allPluginsPath,
                           sdk_pool_handle, sdk_wallet_steward,
                           sdk_wallet_client):
    """
    A newly joined node is catching up and sends catchup requests to other
    nodes but one of the nodes does not reply and the newly joined node cannot
    complete the process till the timeout and then requests the missing
    transactions.
    """
    def ignoreCatchupReq(self, req, frm):
        logger.info("{} being malicious and ignoring catchup request {} "
                    "from {}".format(self, req, frm))

    # One of the node does not process catchup request.
    npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    badReplica = npr[0]
    badNode = badReplica.node
    badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType(
        ignoreCatchupReq, badNode.ledgerManager)
    more_requests = 10
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, more_requests)

    _, new_node = sdk_add_new_steward_and_node(looper,
                                               sdk_pool_handle,
                                               sdk_wallet_steward,
                                               'EpsilonSteward',
                                               'Epsilon',
                                               tdir,
                                               tconf,
                                               allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Since one of the nodes does not reply, this new node will experience a
    # timeout and retry catchup requests, hence a long test timeout.
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \
              reduced_catchup_timeout_conf.CatchupTransactionsTimeout
    waitNodeDataEquality(looper,
                         new_node,
                         *txnPoolNodeSet[:-1],
                         customTimeout=timeout,
                         exclude_from_check=['check_last_ordered_3pc_backup'])

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2)
    waitNodeDataEquality(looper,
                         new_node,
                         *txnPoolNodeSet[:-1],
                         customTimeout=timeout,
                         exclude_from_check=['check_last_ordered_3pc_backup'])
示例#9
0
def testClientConnectToRestartedNodes(looper, txnPoolNodeSet, tdirWithPoolTxns,
                                      tdir, tdirWithClientPoolTxns,
                                      poolTxnClientNames, poolTxnData, tconf,
                                      poolTxnNodeNames, allPluginsPath):
    name = poolTxnClientNames[-1]
    newClient, w = genTestClient(tmpdir=tdirWithClientPoolTxns,
                                 nodes=txnPoolNodeSet,
                                 name=name,
                                 usePoolLedger=True)
    looper.add(newClient)
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient,
                                                  *txnPoolNodeSet)
    sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 1, 1)
    for node in txnPoolNodeSet:
        node.stop()
        looper.removeProdable(node)

    # looper.run(newClient.ensureDisconnectedToNodes(timeout=60))
    txnPoolNodeSet = []
    for nm in poolTxnNodeNames:
        config_helper = PNodeConfigHelper(nm, tconf, chroot=tdir)
        node = TestNode(nm,
                        ledger_dir=config_helper.ledger_dir,
                        keys_dir=config_helper.keys_dir,
                        genesis_dir=config_helper.genesis_dir,
                        plugins_dir=config_helper.plugins_dir,
                        config=tconf,
                        pluginPaths=allPluginsPath)
        looper.add(node)
        txnPoolNodeSet.append(node)
    looper.run(checkNodesConnected(txnPoolNodeSet))
    ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet)

    def chk():
        for node in txnPoolNodeSet:
            assert node.isParticipating

    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))
    looper.run(eventually(chk, retryWait=1, timeout=timeout))

    bootstrapClientKeys(w.defaultId, w.getVerkey(), txnPoolNodeSet)

    req = sendRandomRequest(w, newClient)
    waitForSufficientRepliesForRequests(looper, newClient, requests=[req])
    ensureClientConnectedToNodesAndPoolLedgerSame(looper, newClient,
                                                  *txnPoolNodeSet)

    sendReqsToNodesAndVerifySuffReplies(looper, w, newClient, 3, 1)
示例#10
0
def waitNodeDataInequality(looper,
                           referenceNode: TestNode,
                           *otherNodes: TestNode,
                           customTimeout=None):
    """
    Wait for node ledger to become equal

    :param referenceNode: node whose ledger used as a reference
    """

    numOfNodes = len(otherNodes) + 1
    timeout = customTimeout or waits.expectedPoolGetReadyTimeout(numOfNodes)
    looper.run(eventually(checkNodeDataForInequality,
                          referenceNode,
                          *otherNodes,
                          retryWait=1, timeout=timeout))
示例#11
0
def waitNodeDataEquality(looper,
                         referenceNode: TestNode,
                         *otherNodes: TestNode,
                         customTimeout=None,
                         exclude_from_check=None):
    """
    Wait for node ledger to become equal

    :param referenceNode: node whose ledger used as a reference
    """

    numOfNodes = len(otherNodes) + 1
    timeout = customTimeout or waits.expectedPoolGetReadyTimeout(numOfNodes)
    kwargs = {'exclude_from_check': exclude_from_check}
    looper.run(eventually(partial(checkNodeDataForEquality, **kwargs),
                          referenceNode,
                          *otherNodes,
                          retryWait=1, timeout=timeout))
示例#12
0
def waitNodeDataInequality(looper,
                           referenceNode: TestNode,
                           *otherNodes: TestNode,
                           exclude_from_check=None,
                           customTimeout=None):
    """
    Wait for node ledger to become equal

    :param referenceNode: node whose ledger used as a reference
    """

    numOfNodes = len(otherNodes) + 1
    timeout = customTimeout or waits.expectedPoolGetReadyTimeout(numOfNodes)
    kwargs = {'exclude_from_check': exclude_from_check}
    looper.run(eventually(partial(checkNodeDataForInequality, **kwargs),
                          referenceNode,
                          *otherNodes,
                          retryWait=1, timeout=timeout))
def testNodeRequestingTxns(reduced_catchup_timeout_conf, txnPoolNodeSet,
                           looper, tdir, tconf,
                           allPluginsPath, sdk_pool_handle, sdk_wallet_steward, sdk_wallet_client):
    """
    A newly joined node is catching up and sends catchup requests to other
    nodes but one of the nodes does not reply and the newly joined node cannot
    complete the process till the timeout and then requests the missing
    transactions.
    """

    def ignoreCatchupReq(self, req, frm):
        logger.info("{} being malicious and ignoring catchup request {} "
                    "from {}".format(self, req, frm))

    # One of the node does not process catchup request.
    npr = getNonPrimaryReplicas(txnPoolNodeSet, 0)
    badReplica = npr[0]
    badNode = badReplica.node
    badNode.nodeMsgRouter.routes[CatchupReq] = types.MethodType(
        ignoreCatchupReq, badNode.ledgerManager)
    more_requests = 10
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, more_requests)

    _, new_node = sdk_add_new_steward_and_node(
        looper, sdk_pool_handle, sdk_wallet_steward,
        'EpsilonSteward', 'Epsilon', tdir, tconf,
        allPluginsPath=allPluginsPath)
    txnPoolNodeSet.append(new_node)
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Since one of the nodes does not reply, this new node will experience a
    # timeout and retry catchup requests, hence a long test timeout.
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \
              reduced_catchup_timeout_conf.CatchupTransactionsTimeout
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1],
                         customTimeout=timeout)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 2)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1],
                         customTimeout=timeout)
def test_node_catchup_after_restart_with_txns(
        sdk_new_node_caught_up, txnPoolNodeSet, tdir, tconf,
        sdk_node_set_with_node_added_after_some_txns, allPluginsPath):
    """
    A node that restarts after some transactions should eventually get the
    transactions which happened while it was down
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns
    logger.debug("Stopping node {} with pool ledger size {}".format(
        new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node)
    looper.removeProdable(new_node)
    # for n in txnPoolNodeSet[:4]:
    #     for r in n.nodestack.remotes.values():
    #         if r.name == newNode.name:
    #             r.removeStaleCorrespondents()
    # looper.run(eventually(checkNodeDisconnectedFrom, newNode.name,
    #                       txnPoolNodeSet[:4], retryWait=1, timeout=5))
    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    more_requests = 5
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, more_requests)
    logger.debug("Starting the stopped node, {}".format(new_node))
    nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha)
    config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir)
    newNode = TestNode(new_node.name,
                       config_helper=config_helper,
                       config=tconf,
                       ha=nodeHa,
                       cliha=nodeCHa,
                       pluginPaths=allPluginsPath)
    looper.add(newNode)
    txnPoolNodeSet[-1] = newNode

    # Make sure ledger is not synced initially
    check_ledger_state(newNode, DOMAIN_LEDGER_ID, LedgerState.not_synced)

    # Delay catchup reply processing so LedgerState does not change
    # TODO fix delay, sometimes it's not enough and lower 'check_ledger_state'
    # fails because newNode's domain ledger state is 'synced'
    delay_catchup_reply = 10
    newNode.nodeIbStasher.delay(cr_delay(delay_catchup_reply))
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Make sure ledger starts syncing (sufficient consistency proofs received)
    looper.run(
        eventually(check_ledger_state,
                   newNode,
                   DOMAIN_LEDGER_ID,
                   LedgerState.syncing,
                   retryWait=.5,
                   timeout=5))

    confused_node = txnPoolNodeSet[0]
    new_node_ledger = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID]
    cp = new_node_ledger.catchUpTill
    start, end = cp.seqNoStart, cp.seqNoEnd
    cons_proof = confused_node.ledgerManager._buildConsistencyProof(
        DOMAIN_LEDGER_ID, start, end)

    bad_send_time = None

    def chk():
        nonlocal bad_send_time
        entries = newNode.ledgerManager.spylog.getAll(
            newNode.ledgerManager.canProcessConsistencyProof.__name__)
        for entry in entries:
            # `canProcessConsistencyProof` should return False after `syncing_time`
            if entry.result == False and entry.starttime > bad_send_time:
                return
        assert False

    def send_and_chk(ledger_state):
        nonlocal bad_send_time, cons_proof
        bad_send_time = perf_counter()
        confused_node.ledgerManager.sendTo(cons_proof, newNode.name)
        # Check that the ConsistencyProof messages rejected
        looper.run(eventually(chk, retryWait=.5, timeout=5))
        check_ledger_state(newNode, DOMAIN_LEDGER_ID, ledger_state)

    send_and_chk(LedgerState.syncing)

    # Not accurate timeout but a conservative one
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \
              2 * delay_catchup_reply
    waitNodeDataEquality(looper,
                         newNode,
                         *txnPoolNodeSet[:-1],
                         customTimeout=timeout)
    assert new_node_ledger.num_txns_caught_up == more_requests
    send_and_chk(LedgerState.synced)
def test_node_catchup_after_restart_with_txns(
        sdk_new_node_caught_up,
        txnPoolNodeSet,
        tdir,
        tconf,
        sdk_node_set_with_node_added_after_some_txns,
        allPluginsPath):
    """
    A node that restarts after some transactions should eventually get the
    transactions which happened while it was down
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns
    logger.debug("Stopping node {} with pool ledger size {}".
                 format(new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node)
    looper.removeProdable(new_node)
    # for n in txnPoolNodeSet[:4]:
    #     for r in n.nodestack.remotes.values():
    #         if r.name == newNode.name:
    #             r.removeStaleCorrespondents()
    # looper.run(eventually(checkNodeDisconnectedFrom, newNode.name,
    #                       txnPoolNodeSet[:4], retryWait=1, timeout=5))
    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    more_requests = 5
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, more_requests)
    logger.debug("Starting the stopped node, {}".format(new_node))
    nodeHa, nodeCHa = HA(*new_node.nodestack.ha), HA(*new_node.clientstack.ha)
    config_helper = PNodeConfigHelper(new_node.name, tconf, chroot=tdir)
    newNode = TestNode(
        new_node.name,
        config_helper=config_helper,
        config=tconf,
        ha=nodeHa,
        cliha=nodeCHa,
        pluginPaths=allPluginsPath)
    looper.add(newNode)
    txnPoolNodeSet[-1] = newNode

    # Make sure ledger is not synced initially
    check_ledger_state(newNode, DOMAIN_LEDGER_ID, LedgerState.not_synced)

    # Delay catchup reply processing so LedgerState does not change
    # TODO fix delay, sometimes it's not enough and lower 'check_ledger_state'
    # fails because newNode's domain ledger state is 'synced'
    delay_catchup_reply = 10
    newNode.nodeIbStasher.delay(cr_delay(delay_catchup_reply))
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Make sure ledger starts syncing (sufficient consistency proofs received)
    looper.run(eventually(check_ledger_state, newNode, DOMAIN_LEDGER_ID,
                          LedgerState.syncing, retryWait=.5, timeout=5))

    confused_node = txnPoolNodeSet[0]
    new_node_ledger = newNode.ledgerManager.ledgerRegistry[DOMAIN_LEDGER_ID]
    cp = new_node_ledger.catchUpTill
    start, end = cp.seqNoStart, cp.seqNoEnd
    cons_proof = confused_node.ledgerManager._buildConsistencyProof(
        DOMAIN_LEDGER_ID, start, end)

    bad_send_time = None

    def chk():
        nonlocal bad_send_time
        entries = newNode.ledgerManager.spylog.getAll(
            newNode.ledgerManager.canProcessConsistencyProof.__name__)
        for entry in entries:
            # `canProcessConsistencyProof` should return False after `syncing_time`
            if entry.result == False and entry.starttime > bad_send_time:
                return
        assert False

    def send_and_chk(ledger_state):
        nonlocal bad_send_time, cons_proof
        bad_send_time = perf_counter()
        confused_node.ledgerManager.sendTo(cons_proof, newNode.name)
        # Check that the ConsistencyProof messages rejected
        looper.run(eventually(chk, retryWait=.5, timeout=5))
        check_ledger_state(newNode, DOMAIN_LEDGER_ID, ledger_state)

    send_and_chk(LedgerState.syncing)

    # Not accurate timeout but a conservative one
    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet)) + \
              2 * delay_catchup_reply
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1],
                         customTimeout=timeout)

    send_and_chk(LedgerState.synced)
def test_node_requests_missing_preprepares_and_prepares(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPREPARES and PREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                current_node_set,
                                                node,
                                                stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             MISSING_REQS_CNT)

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes,
                   disconnected_nodes,
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) == 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) == 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_pre_prepare) == 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_prepare) == 0

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) > 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) > 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_pre_prepare) > 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_prepare) > 0

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
示例#17
0
def testOrderingCase2(looper, nodeSet, up, client1, wallet1):
    """
    Scenario -> A client sends requests, some nodes delay COMMITs to few
    specific nodes such some nodes achieve commit quorum later for those
    requests compared to other nodes. But all nodes `ORDER` request in the same
    order of ppSeqNos
    https://www.pivotaltracker.com/n/projects/1889887/stories/133655009
    """
    pr, replicas = getPrimaryReplica(nodeSet, instId=0), \
                   getNonPrimaryReplicas(nodeSet, instId=0)
    assert len(replicas) == 6

    rep0 = pr
    rep1 = replicas[0]
    rep2 = replicas[1]
    rep3 = replicas[2]
    rep4 = replicas[3]
    rep5 = replicas[4]
    rep6 = replicas[5]

    node0 = rep0.node
    node1 = rep1.node
    node2 = rep2.node
    node3 = rep3.node
    node4 = rep4.node
    node5 = rep5.node
    node6 = rep6.node

    ppSeqsToDelay = 5
    commitDelay = 3  # delay each COMMIT by this number of seconds
    delayedPpSeqNos = set()

    requestCount = 10

    def specificCommits(wrappedMsg):
        nonlocal node3, node4, node5
        msg, sender = wrappedMsg
        if isinstance(msg, PrePrepare):
            if len(delayedPpSeqNos) < ppSeqsToDelay:
                delayedPpSeqNos.add(msg.ppSeqNo)
                logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo))
        if isinstance(msg, Commit) and msg.instId == 0 and \
            sender in (n.name for n in (node3, node4, node5)) and \
                msg.ppSeqNo in delayedPpSeqNos:
            return commitDelay

    for node in (node1, node2):
        logger.debug('{} would be delaying commits'.format(node))
        node.nodeIbStasher.delay(specificCommits)

    requests = sendRandomRequests(wallet1, client1, requestCount)
    waitForSufficientRepliesForRequests(looper, client1, requests=requests)

    def ensureSlowNodesHaveAllTxns():
        nonlocal node1, node2
        for node in node1, node2:
            assert len(node.domainLedger) == requestCount

    timeout = waits.expectedPoolGetReadyTimeout(len(nodeSet))
    looper.run(
        eventually(ensureSlowNodesHaveAllTxns, retryWait=1, timeout=timeout))

    checkAllLedgersEqual(
        (n.domainLedger for n in (node0, node3, node4, node5, node6)))

    for node in (node1, node2):
        for n in nodeSet:
            if n != node:
                checkLedgerEquality(node.domainLedger, n.domainLedger)

    checkAllLedgersEqual((n.domainLedger for n in nodeSet))
def test_node_requests_missing_preprepares_and_prepares_after_long_disconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle,
        tconf, tdirWithPoolTxns, allPluginsPath):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in.
    Test than waits for some time to ensure that PrePrepare was created
    long enough seconds to be dropped by time checker.
    Two stopped nodes come back alive.
    Another request comes in.
    Check that previously disconnected two nodes request missing PREPREPARES
    and PREPARES and the pool successfully handles both transactions.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    alive_nodes = []
    disconnected_nodes = []

    for node in txnPoolNodeSet:
        if node.hasPrimary:
            alive_nodes.append(node)
        else:
            disconnected_nodes.append(node)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                current_node_set,
                                                node,
                                                stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper,
                             sdk_pool_handle,
                             sdk_wallet_client,
                             MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          disconnected_nodes,
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    preprepare_deviation = 4
    tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation
    time.sleep(preprepare_deviation * 2)

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica._request_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) == 0

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica._request_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) > 0

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
示例#19
0
def test_node_requests_missing_three_phase_messages_after_long_disconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf,
        tdirWithPoolTxns, allPluginsPath):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in.
    Test than waits for some time to ensure that PrePrepare was created
    long enough seconds to be dropped by time checker.
    Two stopped nodes come back alive.
    Another request comes in.
    Check that previously disconnected two nodes request missing PREPARES and
    PREPREPARES and the pool successfully handles both transactions.
    """
    INIT_REQS_CNT = 10
    MISSING_REQS_CNT = 1
    REQS_AFTER_RECONNECT_CNT = 1
    alive_nodes = []
    disconnected_nodes = []

    for node in txnPoolNodeSet:
        if node.hasPrimary is not None:
            alive_nodes.append(node)
        else:
            disconnected_nodes.append(node)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                current_node_set,
                                                node,
                                                stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             MISSING_REQS_CNT)

    def check_pp_out_of_sync(alive_nodes, disconnected_nodes):
        def get_last_pp(node):
            return node.replicas._master_replica.lastPrePrepare

        last_3pc_key_alive = get_last_pp(alive_nodes[0])
        for node in alive_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_alive

        last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0])
        assert last_3pc_key_diconnected != last_3pc_key_alive
        for node in disconnected_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_diconnected

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes,
                   disconnected_nodes,
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    preprepare_deviation = 4
    tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation
    time.sleep(preprepare_deviation * 2)

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
示例#20
0
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    1 of 4 nodes goes down. A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            disconnected_node,
                                            stopNode=False)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, MISSING_REQS_CNT)

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes, [disconnected_node],
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet,
                                        disconnected_node)
    # Give time for the reconnected node to catch up if it is going to do it
    looper.runFor(
        waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
        waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    assert disconnected_node.master_replica.spylog.count(
        Replica._request_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) == 0
    doOrderTimesBefore = disconnected_node.master_replica.spylog.count(
        Replica.doOrder)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert disconnected_node.master_replica.spylog.count(
        Replica._request_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) > 0
    doOrderTimesAfter = disconnected_node.master_replica.spylog.count(
        Replica.doOrder)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    1 of 4 nodes goes down. A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            disconnected_node, stopNode=False)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          [disconnected_node],
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, disconnected_node)
    # Give time for the reconnected node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    assert disconnected_node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_commit) == 0
    doOrderTimesBefore = disconnected_node.master_replica.spylog.count(Replica.doOrder)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert disconnected_node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_commit) > 0
    doOrderTimesAfter = disconnected_node.master_replica.spylog.count(Replica.doOrder)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
示例#22
0
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tdir,
        allPluginsPath):
    """
    1 of 4 nodes goes down ((simulate this by dropping requests)). A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]
    disconnected_node_stashers = disconnected_node.nodeIbStasher

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    with delay_rules_without_processing(disconnected_node_stashers,
                                        delay_3pc()):
        last_ordered_key = txnPoolNodeSet[0].master_replica.last_ordered_3pc
        sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                  sdk_wallet_client, MISSING_REQS_CNT)
        looper.run(
            eventually(check_pp_out_of_sync,
                       alive_nodes, [disconnected_node],
                       last_ordered_key,
                       retryWait=1,
                       timeout=expectedPoolGetReadyTimeout(
                           len(txnPoolNodeSet))))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    ordering_service = disconnected_node.master_replica._ordering_service
    assert ordering_service.spylog.count(
        OrderingService._request_pre_prepare) == 0
    assert ordering_service.spylog.count(OrderingService._request_prepare) == 0
    assert ordering_service.spylog.count(OrderingService._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) == 0
    doOrderTimesBefore = ordering_service.spylog.count(
        OrderingService._do_order)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert ordering_service.spylog.count(
        OrderingService._request_pre_prepare) > 0
    assert ordering_service.spylog.count(OrderingService._request_prepare) > 0
    assert ordering_service.spylog.count(OrderingService._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) > 0
    doOrderTimesAfter = ordering_service.spylog.count(
        OrderingService._do_order)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)

    def check_all_ordered():
        for node in txnPoolNodeSet:
            assert node.domainLedger.size == (init_ledger_size +
                                              MISSING_REQS_CNT +
                                              REQS_AFTER_RECONNECT_CNT)

    looper.run(eventually(check_all_ordered, timeout=20))
示例#23
0
def testOrderingCase2(looper, txnPoolNodeSet, sdk_pool_handle,
                      sdk_wallet_client):
    """
    Scenario -> A client sends requests, some nodes delay COMMITs to few
    specific nodes such some nodes achieve commit quorum later for those
    requests compared to other nodes. But all nodes `ORDER` request in the same
    order of ppSeqNos
    https://www.pivotaltracker.com/n/projects/1889887/stories/133655009
    """
    pr, replicas = getPrimaryReplica(txnPoolNodeSet, instId=0), \
                   getNonPrimaryReplicas(txnPoolNodeSet, instId=0)
    assert len(replicas) == 6

    rep0 = pr
    rep1 = replicas[0]
    rep2 = replicas[1]
    rep3 = replicas[2]
    rep4 = replicas[3]
    rep5 = replicas[4]
    rep6 = replicas[5]

    node0 = rep0.node
    node1 = rep1.node
    node2 = rep2.node
    node3 = rep3.node
    node4 = rep4.node
    node5 = rep5.node
    node6 = rep6.node

    ppSeqsToDelay = 5
    commitDelay = 3  # delay each COMMIT by this number of seconds
    delayedPpSeqNos = set()

    requestCount = 10

    def specificCommits(wrappedMsg):
        nonlocal node3, node4, node5
        msg, sender = wrappedMsg
        if isinstance(msg, PrePrepare):
            if len(delayedPpSeqNos) < ppSeqsToDelay:
                delayedPpSeqNos.add(msg.ppSeqNo)
                logger.debug('ppSeqNo {} be delayed'.format(msg.ppSeqNo))
        if isinstance(msg, Commit) and msg.instId == 0 and \
                sender in (n.name for n in (node3, node4, node5)) and \
                msg.ppSeqNo in delayedPpSeqNos:
            return commitDelay

    for node in (node1, node2):
        logger.debug('{} would be delaying commits'.format(node))
        node.nodeIbStasher.delay(specificCommits)

    sdk_reqs = sdk_send_random_requests(looper, sdk_pool_handle,
                                        sdk_wallet_client, requestCount)

    timeout = waits.expectedPoolGetReadyTimeout(len(txnPoolNodeSet))

    ensure_all_nodes_have_same_data(looper,
                                    txnPoolNodeSet,
                                    custom_timeout=timeout)

    sdk_get_and_check_replies(looper, sdk_reqs)
def test_node_requests_missing_preprepares_and_prepares_after_long_disconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf,
        tdirWithPoolTxns, tdir, allPluginsPath):
    """
    2 of 4 nodes go down (simulate this by dropping requests), so pool can not process any more incoming requests.
    A new request comes in.
    Test than waits for some time to ensure that PrePrepare was created
    long enough seconds to be dropped by time checker.
    Two stopped nodes come back alive.
    Another request comes in.
    Check that previously disconnected two nodes request missing PREPREPARES
    and PREPARES and the pool successfully handles both transactions.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1

    alive_nodes = []
    disconnected_nodes = []

    for node in txnPoolNodeSet:
        if node.hasPrimary:
            alive_nodes.append(node)
        else:
            disconnected_nodes.append(node)
    disconnected_nodes_stashers = [n.nodeIbStasher for n in disconnected_nodes]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    with delay_rules_without_processing(disconnected_nodes_stashers,
                                        delay_3pc()):
        sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                                 MISSING_REQS_CNT)
        last_ordered_key = txnPoolNodeSet[0].master_replica.last_ordered_3pc
        looper.run(
            eventually(check_pp_out_of_sync,
                       alive_nodes,
                       disconnected_nodes,
                       last_ordered_key,
                       retryWait=1,
                       timeout=expectedPoolGetReadyTimeout(
                           len(txnPoolNodeSet))))

    preprepare_deviation = 4
    tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation
    time.sleep(preprepare_deviation * 2)

    for node in disconnected_nodes:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) == 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) == 0
        assert node.master_replica._message_req_service.spylog.count(
            MessageReqService.process_message_rep) == 0

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) > 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) > 0
        assert node.master_replica._message_req_service.spylog.count(
            MessageReqService.process_message_rep) > 0

    def check_all_ordered():
        for node in txnPoolNodeSet:
            assert node.domainLedger.size == (init_ledger_size +
                                              MISSING_REQS_CNT +
                                              REQS_AFTER_RECONNECT_CNT)

    looper.run(eventually(check_all_ordered, timeout=20))
def test_node_requests_missing_preprepares_and_prepares(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPREPARES and PREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper, current_node_set, node, stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          disconnected_nodes,
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica._request_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) == 0

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica._request_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) > 0

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
示例#26
0
def test_node_requests_missing_preprepares_and_prepares(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle,
        tconf, tdir, allPluginsPath):
    """
    2 of 4 nodes go down (simulate this by dropping requests), so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPREPARES and PREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1

    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]
    disconnected_nodes_stashers = [n.nodeIbStasher for n in disconnected_nodes]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    with delay_rules_without_processing(disconnected_nodes_stashers, delay_3pc()):
        sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, MISSING_REQS_CNT)
        last_ordered_key = txnPoolNodeSet[0].master_replica.last_ordered_3pc
        looper.run(eventually(check_pp_out_of_sync,
                              alive_nodes,
                              disconnected_nodes,
                              last_ordered_key,
                              retryWait=1,
                              timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_pre_prepare) == 0
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_prepare) == 0
        assert node.master_replica._message_req_service.spylog.count(MessageReqService.process_message_rep) == 0

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_pre_prepare) > 0
        assert node.master_replica._ordering_service.spylog.count(OrderingService._request_prepare) > 0
        assert node.master_replica._message_req_service.spylog.count(MessageReqService.process_message_rep) > 0

    def check_all_ordered():
        for node in txnPoolNodeSet:
            assert node.domainLedger.size == (init_ledger_size +
                                              MISSING_REQS_CNT +
                                              REQS_AFTER_RECONNECT_CNT)

    looper.run(eventually(check_all_ordered, timeout=20))