def testNodeCatchupAfterDisconnect(
        sdk_new_node_caught_up, txnPoolNodeSet,
        sdk_node_set_with_node_added_after_some_txns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns

    logger.debug("Disconnecting node {} with pool ledger size {}".format(
        new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            new_node,
                                            stopNode=False)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Connecting the stopped node, {}".format(new_node))
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)

    logger.debug("Waiting for the node to catch up, {}".format(new_node))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 10)
    checkNodeDataForEquality(new_node, *txnPoolNodeSet[:-1])
def testNodeCatchupAfterLostConnection(newNodeCaughtUp, txnPoolNodeSet,
                                       nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that has poor internet connection and got unsynced after some
    transactions should eventually get the transactions which happened while
    it was not accessible
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns
    logger.debug("Disconnecting node {}, ledger size {}".format(
        newNode, newNode.domainLedger.size))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            newNode,
                                            stopNode=False)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1])

    # logger.debug("Ensure node {} gets disconnected".format(newNode))
    ensure_node_disconnected(looper, newNode, txnPoolNodeSet[:-1])

    logger.debug("Connecting the node {} back, ledger size {}".format(
        newNode, newNode.domainLedger.size))
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode)

    logger.debug("Waiting for the node to catch up, {}".format(newNode))
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
Exemplo n.º 3
0
def test_node_requests_missing_three_phase_messages(looper, txnPoolNodeSet,
                                                    wallet1, client1Connected):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPARES and PREPREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 10
    MISSING_REQS_CNT = 1
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1,
                                              client1Connected, INIT_REQS_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                txnPoolNodeSet,
                                                node,
                                                stopNode=False)

    sendRandomRequests(wallet1, client1Connected, MISSING_REQS_CNT)

    def check_pp_out_of_sync(alive_nodes, disconnected_nodes):
        def get_last_pp(node):
            return node.replicas._master_replica.lastPrePrepare

        last_3pc_key_alive = get_last_pp(alive_nodes[0])
        for node in alive_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_alive

        last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0])
        assert last_3pc_key_diconnected != last_3pc_key_alive
        for node in disconnected_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_diconnected

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes,
                   disconnected_nodes,
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in disconnected_nodes:
        reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node)

    send_reqs_to_nodes_and_verify_all_replies(looper, wallet1,
                                              client1Connected,
                                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def testNodeCatchupAfterDisconnect(sdk_new_node_caught_up, txnPoolNodeSet,
                                   sdk_node_set_with_node_added_after_some_txns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, new_node, sdk_pool_handle, new_steward_wallet_handle = \
        sdk_node_set_with_node_added_after_some_txns

    logger.debug("Disconnecting node {} with pool ledger size {}".
                 format(new_node, new_node.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, new_node, stopNode=False)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Connecting the stopped node, {}".format(new_node))
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)

    logger.debug("Waiting for the node to catch up, {}".format(new_node))
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              new_steward_wallet_handle, 10)
    checkNodeDataForEquality(new_node, *txnPoolNodeSet[:-1])
def test_idr_cache_update_after_catchup(txnPoolNodeSet,
                                        looper,
                                        sdk_pool_handle,
                                        sdk_wallet_steward):
    wallet_handle, identifier = sdk_wallet_steward
    node_to_disconnect = txnPoolNodeSet[-1]
    req_handler = node_to_disconnect.getDomainReqHandler()
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_disconnect.name,
                                            stopNode=False)
    looper.runFor(2)
    idr, verkey = createHalfKeyIdentifierAndAbbrevVerkey()

    request = looper.loop.run_until_complete(build_nym_request(identifier, idr, verkey, None, None))
    req_signed = looper.loop.run_until_complete(sign_request(wallet_handle, identifier, request))
    result = json.loads(looper.loop.run_until_complete(submit_request(sdk_pool_handle, req_signed)))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, node_to_disconnect.name)
    waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
    key = domain.make_state_path_for_nym(idr)
    root_hash = req_handler.ts_store.get_equal_or_prev(result['result']['txnTime'])
    from_state = req_handler.state.get_for_root_hash(root_hash=root_hash,
                                                     key=key)
    assert from_state
    deserialized = req_handler.stateSerializer.deserialize(from_state)
    assert deserialized
    items_after = req_handler.idrCache.get(idr)
    assert items_after
def testNodeCatchupAfterDisconnect(newNodeCaughtUp, txnPoolNodeSet,
                                   nodeSetWithNodeAddedAfterSomeTxns):
    """
    A node that disconnects after some transactions should eventually get the
    transactions which happened while it was disconnected
    :return:
    """
    looper, newNode, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns

    logger.debug("Stopping node {} with pool ledger size {}".format(
        newNode, newNode.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            newNode,
                                            stopNode=False)
    looper.removeProdable(newNode)

    # TODO: Check if the node has really stopped processing requests?
    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    # Make sure new node got out of sync
    waitNodeDataInequality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Starting the stopped node, {}".format(newNode))
    looper.add(newNode)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, newNode)

    logger.debug("Waiting for the node to catch up, {}".format(newNode))
    waitNodeDataEquality(looper, newNode, *txnPoolNodeSet[:-1])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 10)
    checkNodeDataForEquality(newNode, *txnPoolNodeSet[:-1])
Exemplo n.º 7
0
def test_6_nodes_pool_cannot_reach_quorum_with_2_disconnected(
        txnPoolNodeSet, looper, sdk_pool_handle,
        sdk_wallet_client):
    '''
    Check that we can not reach consensus when more than n-f nodes
    are disconnected: disconnect 2 of 6 nodes
    '''
    faulties = nodes_by_rank(txnPoolNodeSet)[-faultyNodes:]

    current_node_set = set(txnPoolNodeSet)
    for node in faulties:
        for r in node.replicas:
            assert not r.isPrimary
        disconnect_node_and_ensure_disconnected(
            looper, current_node_set, node, stopNode=False)
        current_node_set.remove(node)

    reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 1)
    with pytest.raises(TimeoutError):
        sdk_send_and_check(reqs, looper, txnPoolNodeSet, sdk_pool_handle)
    check_request_is_not_returned_to_nodes(
        txnPoolNodeSet, sdk_json_to_request_object(json.loads(reqs[0])))

    # The following reconnection of nodes is needed in this test to avoid
    # pytest process hangup
    for node in faulties:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)
def test_6_nodes_pool_cannot_reach_quorum_with_2_disconnected(
        txnPoolNodeSet, looper, sdk_pool_handle,
        sdk_wallet_client):
    '''
    Check that we can not reach consensus when more than n-f nodes
    are disconnected: disconnect 2 of 6 nodes
    '''
    faulties = nodes_by_rank(txnPoolNodeSet)[-faultyNodes:]

    current_node_set = set(txnPoolNodeSet)
    for node in faulties:
        for r in node.replicas.values():
            assert not r.isPrimary
        disconnect_node_and_ensure_disconnected(
            looper, current_node_set, node, stopNode=False)
        current_node_set.remove(node)

    reqs = sdk_signed_random_requests(looper, sdk_wallet_client, 1)
    with pytest.raises(PoolLedgerTimeoutException):
        sdk_send_and_check(reqs, looper, txnPoolNodeSet, sdk_pool_handle)
    check_request_is_not_returned_to_nodes(
        txnPoolNodeSet, sdk_json_to_request_object(json.loads(reqs[0])))

    # The following reconnection of nodes is needed in this test to avoid
    # pytest process hangup
    for node in faulties:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)
def test_number_txns_in_catchup_and_vc_queue_valid(looper, txnPoolNodeSet,
                                                   tconf, sdk_pool_handle,
                                                   sdk_wallet_steward):
    num_txns = 5
    master_node = get_master_primary_node(txnPoolNodeSet)
    old_view = master_node.viewNo
    expected_view_no = old_view + 1
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            master_node,
                                            stopNode=False)
    looper.run(
        eventually(checkViewNoForNodes,
                   txnPoolNodeSet[1:],
                   expected_view_no,
                   retryWait=1,
                   timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, num_txns)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, master_node)
    waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:])
    latest_info = master_node._info_tool.info
    assert latest_info['Node_info']['Catchup_status'][
        'Number_txns_in_catchup'][1] == num_txns
    assert latest_info['Node_info']['View_change_status'][
        'View_No'] == expected_view_no
    node_names = [n.name for n in txnPoolNodeSet[1:]]
    for node_name in node_names:
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][
            node_name][0] == master_node.master_primary_name
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][
            node_name][1]
        assert latest_info['Node_info']['View_change_status'][
            'Last_complete_view_no'] == expected_view_no
Exemplo n.º 10
0
def test_fill_ts_store_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle,
                                     sdk_wallet_steward):
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    node_to_disconnect = txnPoolNodeSet[-1]
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_disconnect,
                                            stopNode=False)
    looper.runFor(2)
    sdk_replies = sdk_send_random_and_check(looper, txnPoolNodeSet,
                                            sdk_pool_handle,
                                            sdk_wallet_steward, 2)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet,
                                        node_to_disconnect)
    waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
    req_handler = node_to_disconnect.getDomainReqHandler()
    for reply in sdk_replies:
        key = req_handler.prepare_buy_key(reply[1]['result']['identifier'],
                                          reply[1]['result']['reqId'])
        root_hash = req_handler.ts_store.get_equal_or_prev(
            reply[1]['result']['txnTime'])
        assert root_hash
        from_state = req_handler.state.get_for_root_hash(root_hash=root_hash,
                                                         key=key)
        assert req_handler.stateSerializer.deserialize(from_state)['amount'] == \
               reply[1]['result']['amount']
def test_current_state_propagation(newNodeCaughtUp, txnPoolNodeSet,
                                   nodeSetWithNodeAddedAfterSomeTxns):
    """
    Checks that nodes send CurrentState to lagged nodes.
    """

    # 1. Start pool
    looper, new_node, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns

    # 2. Stop one node
    lagging_node = new_node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagging_node,
                                            stopNode=True)
    looper.removeProdable(new_node)

    # 3. Start it again
    looper.add(new_node)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)
    looper.runFor(5)

    # 4. Check that all nodes sent CurrentState
    for node in txnPoolNodeSet[:-1]:
        sent_times = node.spylog.count(
            node.send_current_state_to_lagging_node.__name__)
        assert sent_times != 0, "{} haven't sent CurrentState".format(node)
    looper.runFor(5)

    # 5. Check that it received CurrentState messages
    received_times = lagging_node.spylog.count(
        lagging_node.process_current_state_message.__name__)
    assert received_times != 0
Exemplo n.º 12
0
def test_large_catchup(looper,
                       txnPoolNodeSet,
                       wallet1,
                       client1,
                       client1Connected,
                       tconf,
                       allPluginsPath,
                       tdirWithPoolTxns):
    """
    Checks that node can catchup large ledgers
    """
    # Prepare nodes
    lagging_node = txnPoolNodeSet[-1]
    rest_nodes = txnPoolNodeSet[:-1]
    all_nodes = txnPoolNodeSet
    looper.run(checkNodesConnected(txnPoolNodeSet))

    # Prepare client
    client, wallet = client1, wallet1
    looper.run(client.ensureConnectedToNodes())

    # Check that requests executed well
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=10)

    # Stop one node
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)
    disconnect_node_and_ensure_disconnected(looper,
                                            rest_nodes,
                                            lagging_node,
                                            stopNode=True)
    looper.removeProdable(lagging_node)

    # Send more requests to active nodes
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, numReqs=100)
    waitNodeDataEquality(looper, *rest_nodes)

    # Make message size limit smaller to ensure that catchup response is
    # larger exceeds the limit
    for node in rest_nodes:
        decrease_max_request_size(node)

    # Restart stopped node and wait for successful catch up
    looper.add(lagging_node)
    reconnect_node_and_ensure_connected(looper, all_nodes, lagging_node)
    waitNodeDataEquality(looper, *all_nodes)
def test_node_catchup_causes_no_desync(looper, txnPoolNodeSet, client1,
                                       wallet1, client1Connected, monkeypatch):
    """
    Checks that transactions received by catchup do not
    break performance monitoring
    """

    client, wallet = client1, wallet1
    lagging_node = get_any_non_primary_node(txnPoolNodeSet)
    rest_nodes = set(txnPoolNodeSet).difference({lagging_node})

    # Make master replica lagging by hiding all messages sent to it
    make_master_replica_lag(lagging_node)
    monkeypatch.setattr(lagging_node.master_replica,
                        '_request_missing_three_phase_messages',
                        lambda *x, **y: None)

    # Send some requests and check that all replicas except master executed it
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    waitNodeDataInequality(looper, lagging_node, *rest_nodes)
    looper.run(eventually(backup_replicas_run_forward, lagging_node))

    # Disconnect lagging node, send some more requests and start it back
    # After start it should fall in a such state that it needs to make catchup
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagging_node,
                                            stopNode=False)
    looper.removeProdable(lagging_node)
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    looper.add(lagging_node)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagging_node)

    # Check that catchup done
    waitNodeDataEquality(looper, lagging_node, *rest_nodes)

    # Send some more requests to ensure that backup and master replicas
    # are in the same state
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)
    looper.run(eventually(replicas_synced, lagging_node))

    # Check that master is not considered to be degraded
    assert not lagging_node.monitor.isMasterDegraded()
Exemplo n.º 14
0
def testNodeCatchupFPlusOne(txnPoolNodeSet, poolAfterSomeTxns):
    """
    Check that f+1 nodes is enough for catchup
    """
    looper, client, wallet = poolAfterSomeTxns

    assert len(txnPoolNodeSet) == 4

    node1 = txnPoolNodeSet[-1]
    node0 = txnPoolNodeSet[-2]

    logger.debug("Stopping node0 with pool ledger size {}".
                 format(node0.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, node0, stopNode=False)
    looper.removeProdable(node0)

    logger.debug("Sending requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 5)

    logger.debug("Stopping node1 with pool ledger size {}".
                 format(node1.poolManager.txnSeqNo))
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, node1, stopNode=False)
    looper.removeProdable(node1)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, node0, *txnPoolNodeSet[:-2])

    # TODO: Check if the node has really stopped processing requests?

    logger.debug("Starting the stopped node0")
    looper.add(node0)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet[:-1], node0)

    logger.debug("Waiting for the node0 to catch up")
    waitNodeDataEquality(looper, node0, *txnPoolNodeSet[:-2])

    logger.debug("Sending more requests")
    sendReqsToNodesAndVerifySuffReplies(looper, wallet, client, 2)
    checkNodeDataForEquality(node0, *txnPoolNodeSet[:-2])
def test_get_last_ordered_timestamp_after_catchup(looper, txnPoolNodeSet,
                                                  sdk_pool_handle,
                                                  sdk_wallet_steward):
    node_to_disconnect = txnPoolNodeSet[-1]
    reply_before = sdk_send_random_and_check(looper, txnPoolNodeSet,
                                             sdk_pool_handle,
                                             sdk_wallet_steward, 1)[0][1]
    looper.runFor(2)
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            node_to_disconnect,
                                            stopNode=False)
    reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                                      sdk_wallet_steward, 1)[0][1]
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet,
                                        node_to_disconnect)
    waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet[:-1])
    ts_from_state = node_to_disconnect.master_replica._get_last_timestamp_from_state(
        DOMAIN_LEDGER_ID)
    assert ts_from_state == reply['result']['txnTime']
    assert ts_from_state != reply_before['result']['txnTime']
Exemplo n.º 16
0
def test_disconnected_node_catchup_plugin_ledger_txns(looper,
                                                      txnPoolNodeSet,
                                                      sdk_wallet_client,
                                                      sdk_pool_handle,
                                                      sdk_new_node_caught_up):
    """
    A node gets disconnected, a few config ledger txns happen,
    the disconnected node comes back up and catches up the config ledger
    """
    new_node = sdk_new_node_caught_up
    disconnect_node_and_ensure_disconnected(
        looper, txnPoolNodeSet, new_node, stopNode=False)

    # Do some demo txns;
    some_demo_txns(looper, sdk_wallet_client, sdk_pool_handle)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
Exemplo n.º 17
0
def test_disconnected_node_catchup_config_ledger_txns(
        looper, some_config_txns_done, txnPoolNodeSet, sdk_wallet_client,
        sdk_pool_handle, newNodeCaughtUp, keys):
    """
    A node gets disconnected, a few config ledger txns happen,
    the disconnected node comes back up and catches up the config ledger
    """
    new_node = newNodeCaughtUp
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            new_node,
                                            stopNode=False)

    # Do some config txns; using a fixture as a method, passing some arguments
    # as None as they only make sense for the fixture (pre-requisites)
    send_some_config_txns(looper, sdk_pool_handle, sdk_wallet_client, keys)

    # Make sure new node got out of sync
    waitNodeDataInequality(looper, new_node, *txnPoolNodeSet[:-1])

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, new_node)
    waitNodeDataEquality(looper, new_node, *txnPoolNodeSet[:-1])
def test_number_txns_in_catchup_and_vc_queue_valid(looper,
                                                   txnPoolNodeSet,
                                                   tconf,
                                                   sdk_pool_handle,
                                                   sdk_wallet_steward):
    num_txns = 5
    master_node = get_master_primary_node(txnPoolNodeSet)
    old_view = master_node.viewNo
    expected_view_no = old_view + 1
    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_node, stopNode=False)
    looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet[1:], expected_view_no, retryWait=1,
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_pool_refresh(looper, sdk_pool_handle)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, num_txns)
    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, master_node)
    waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:])
    latest_info = master_node._info_tool.info
    assert latest_info['Node_info']['Catchup_status']['Number_txns_in_catchup'][1] == num_txns
    assert latest_info['Node_info']['View_change_status']['View_No'] == expected_view_no
    node_names = [n.name for n in txnPoolNodeSet[1:]]
    for node_name in node_names:
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][node_name][0] == master_node.master_primary_name
        assert latest_info['Node_info']['View_change_status']['VCDone_queue'][node_name][1]
        assert latest_info['Node_info']['View_change_status']['Last_complete_view_no'] == expected_view_no
def test_node_requests_missing_preprepares_and_prepares(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPREPARES and PREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                current_node_set,
                                                node,
                                                stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             MISSING_REQS_CNT)

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes,
                   disconnected_nodes,
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) == 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) == 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_pre_prepare) == 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_prepare) == 0

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in disconnected_nodes:
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_pre_prepare) > 0
        assert node.master_replica._ordering_service.spylog.count(
            OrderingService._request_prepare) > 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_pre_prepare) > 0
        assert node.master_replica.spylog.count(
            Replica.process_requested_prepare) > 0

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
Exemplo n.º 20
0
def test_node_requests_missing_three_phase_messages_after_long_disconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf,
        tdirWithPoolTxns, allPluginsPath):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in.
    Test than waits for some time to ensure that PrePrepare was created
    long enough seconds to be dropped by time checker.
    Two stopped nodes come back alive.
    Another request comes in.
    Check that previously disconnected two nodes request missing PREPARES and
    PREPREPARES and the pool successfully handles both transactions.
    """
    INIT_REQS_CNT = 10
    MISSING_REQS_CNT = 1
    REQS_AFTER_RECONNECT_CNT = 1
    alive_nodes = []
    disconnected_nodes = []

    for node in txnPoolNodeSet:
        if node.hasPrimary is not None:
            alive_nodes.append(node)
        else:
            disconnected_nodes.append(node)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                current_node_set,
                                                node,
                                                stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client,
                             MISSING_REQS_CNT)

    def check_pp_out_of_sync(alive_nodes, disconnected_nodes):
        def get_last_pp(node):
            return node.replicas._master_replica.lastPrePrepare

        last_3pc_key_alive = get_last_pp(alive_nodes[0])
        for node in alive_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_alive

        last_3pc_key_diconnected = get_last_pp(disconnected_nodes[0])
        assert last_3pc_key_diconnected != last_3pc_key_alive
        for node in disconnected_nodes[1:]:
            assert get_last_pp(node) == last_3pc_key_diconnected

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes,
                   disconnected_nodes,
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    preprepare_deviation = 4
    tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation
    time.sleep(preprepare_deviation * 2)

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def test_disconnected_node_with_lagged_view_pulls_up_its_view_on_reconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    Verifies that a disconnected node with a lagged view accepts
    the current view from the other nodes on re-connection.
    Steps:
    1. Provoke view change to 1.
    2. Ensure that all the nodes complete view change to 1.
    3. Disconnect one node from the rest of the nodes in the pool.
    4. Provoke view change to 2.
    5. Ensure that that all the nodes except for the disconnected one complete
    view change to 2 and the disconnected node remains in the view 1.
    6. Provoke view change to 3.
    5. Ensure that that all the nodes except for the disconnected one complete
    view change to 3 and the disconnected node remains in the view 1.
    8. Connect the disconnected node to the rest of the nodes in the pool.
    9. Ensure that the re-connected node completes view change to 3.
    10. Ensure that all the nodes participate in consensus.
    """
    checkViewNoForNodes(txnPoolNodeSet, 0)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    lagged_node = getNonPrimaryReplicas(txnPoolNodeSet)[-1].node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagged_node,
                                            stopNode=False)
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper, other_nodes,
                        instances_list=range(getRequiredInstances(len(txnPoolNodeSet))))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 2)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper, other_nodes,
                        instances_list=range(getRequiredInstances(len(txnPoolNodeSet))))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 3)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagged_node)
    waitForViewChange(looper, [lagged_node], 3,
                      customTimeout=waits.expectedPoolElectionTimeout(
                          len(txnPoolNodeSet)))
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 3)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Exemplo n.º 22
0
def test_reconnect_primary_and_not_primary(looper, txnPoolNodeSet,
                                           sdk_wallet_steward, sdk_pool_handle,
                                           tconf):
    """
    Test steps:
    Pool of 7 nodes.
    count of instances must be 3
    1. Choose node, that is not primary on all replicas (3 index)
    2. Disconnect them
    3. Ensure, that number of replicas was decreased
    4. Choose current primary node (must be 0)
    5. Disconnect primary
    6. Ensure, that view change complete and primary was selected
    7. Add node back from 1 step
    8. Add node back from 4 step
    9. Check, that count of instance (f+1 = 3)
    10. Send some requests and check, that pool works.
    """
    restNodes = set(txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    node_after_all_primary = txnPoolNodeSet[3]
    # Disconnect node after all primaries (after all backup primaries)
    disconnect_node_and_ensure_disconnected(looper,
                                            restNodes,
                                            node_after_all_primary,
                                            stopNode=False)
    # -------------------------------------------------------
    restNodes.remove(node_after_all_primary)
    looper.run(
        eventually(partial(check_count_connected_node, restNodes, 6),
                   timeout=5,
                   acceptableExceptions=[AssertionError]))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    # Get primary node for backup replica
    primary_node = txnPoolNodeSet[0]
    assert primary_node.master_replica.isPrimary
    old_view_no = checkViewNoForNodes(restNodes, 0)
    # disconnect primary node
    disconnect_node_and_ensure_disconnected(looper,
                                            restNodes,
                                            primary_node,
                                            stopNode=False)
    # -------------------------------------------------------
    restNodes.remove(primary_node)
    looper.run(
        eventually(partial(check_count_connected_node, restNodes, 5),
                   timeout=5,
                   acceptableExceptions=[AssertionError]))
    looper.run(
        eventually(partial(checkViewNoForNodes,
                           restNodes,
                           expectedViewNo=old_view_no + 1),
                   timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    logger.debug("restNodes: {}".format(restNodes))
    restNodes.add(node_after_all_primary)
    # Return back node after all primary
    reconnect_node_and_ensure_connected(looper, restNodes,
                                        node_after_all_primary)
    looper.run(
        checkNodesConnected(restNodes,
                            customTimeout=5 * tconf.RETRY_TIMEOUT_RESTRICTED))
    looper.run(
        eventually(partial(check_count_connected_node, restNodes, 6),
                   timeout=5,
                   acceptableExceptions=[AssertionError]))
    assert len(set([len(n.replicas) for n in restNodes])) == 1
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle,
                              sdk_wallet_steward, 5)
    # Return back primary node
    restNodes.add(primary_node)
    reconnect_node_and_ensure_connected(looper, restNodes, primary_node)
    looper.run(
        checkNodesConnected(restNodes,
                            customTimeout=5 * tconf.RETRY_TIMEOUT_RESTRICTED))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle,
                              sdk_wallet_steward, 5)
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    1 of 4 nodes goes down. A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet,
                                            disconnected_node, stopNode=False)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          [disconnected_node],
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, disconnected_node)
    # Give time for the reconnected node to catch up if it is going to do it
    looper.runFor(waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
                  waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    assert disconnected_node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_commit) == 0
    doOrderTimesBefore = disconnected_node.master_replica.spylog.count(Replica.doOrder)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert disconnected_node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(Replica.process_requested_commit) > 0
    doOrderTimesAfter = disconnected_node.master_replica.spylog.count(Replica.doOrder)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def test_node_requests_missing_preprepares_and_prepares_after_long_disconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle,
        tconf, tdirWithPoolTxns, allPluginsPath):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in.
    Test than waits for some time to ensure that PrePrepare was created
    long enough seconds to be dropped by time checker.
    Two stopped nodes come back alive.
    Another request comes in.
    Check that previously disconnected two nodes request missing PREPREPARES
    and PREPARES and the pool successfully handles both transactions.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    alive_nodes = []
    disconnected_nodes = []

    for node in txnPoolNodeSet:
        if node.hasPrimary:
            alive_nodes.append(node)
        else:
            disconnected_nodes.append(node)

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper,
                                                current_node_set,
                                                node,
                                                stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper,
                             sdk_pool_handle,
                             sdk_wallet_client,
                             MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          disconnected_nodes,
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    preprepare_deviation = 4
    tconf.ACCEPTABLE_DEVIATION_PREPREPARE_SECS = preprepare_deviation
    time.sleep(preprepare_deviation * 2)

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica._request_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) == 0

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)

    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet)

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica._request_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) > 0

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
Exemplo n.º 25
0
def test_node_requests_missing_preprepares_prepares_and_commits(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    1 of 4 nodes goes down. A new request comes in and is ordered by
    the 3 remaining nodes. After a while the previously disconnected node
    comes back alive. Another request comes in. Check that the previously
    disconnected node requests missing PREPREPARES, PREPARES and COMMITS,
    orders the previous request and all the nodes successfully handles
    the last request.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_node = txnPoolNodeSet[3]
    alive_nodes = txnPoolNodeSet[:3]

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            disconnected_node,
                                            stopNode=False)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, MISSING_REQS_CNT)

    looper.run(
        eventually(check_pp_out_of_sync,
                   alive_nodes, [disconnected_node],
                   retryWait=1,
                   timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet,
                                        disconnected_node)
    # Give time for the reconnected node to catch up if it is going to do it
    looper.runFor(
        waits.expectedPoolConsistencyProof(len(txnPoolNodeSet)) +
        waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))

    for node in alive_nodes:
        assert node.domainLedger.size == init_ledger_size + MISSING_REQS_CNT
    # Ensure that the reconnected node has not caught up though
    assert disconnected_node.domainLedger.size == init_ledger_size

    assert disconnected_node.master_replica.spylog.count(
        Replica._request_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_commit) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) == 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) == 0
    doOrderTimesBefore = disconnected_node.master_replica.spylog.count(
        Replica.doOrder)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_node, *alive_nodes)

    assert disconnected_node.master_replica.spylog.count(
        Replica._request_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica._request_commit) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_pre_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_prepare) > 0
    assert disconnected_node.master_replica.spylog.count(
        Replica.process_requested_commit) > 0
    doOrderTimesAfter = disconnected_node.master_replica.spylog.count(
        Replica.doOrder)
    # Ensure that the reconnected node has ordered both the missed 3PC-batch and the new 3PC-batch
    assert doOrderTimesAfter - doOrderTimesBefore == 2

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size + MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)
def test_reconnect_primary_and_not_primary(looper,
                                        txnPoolNodeSet,
                                        sdk_wallet_steward,
                                        sdk_pool_handle,
                                        tconf):
    """
    Test steps:
    Pool of 7 nodes.
    count of instances must be 3
    1. Choose node, that is not primary on all replicas (3 index)
    2. Disconnect them
    3. Ensure, that number of replicas was decreased
    4. Choose current primary node (must be 0)
    5. Disconnect primary
    6. Ensure, that view change complete and primary was selected
    7. Add node back from 1 step
    8. Add node back from 4 step
    9. Check, that count of instance (f+1 = 3)
    10. Send some requests and check, that pool works.
    """
    restNodes = set(txnPoolNodeSet)
    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5)
    assert txnPoolNodeSet[0].master_replica.isPrimary
    node_after_all_primary = txnPoolNodeSet[3]
    # Disconnect node after all primaries (after all backup primaries)
    disconnect_node_and_ensure_disconnected(looper,
                                            restNodes,
                                            node_after_all_primary,
                                            stopNode=False)
    # -------------------------------------------------------
    restNodes.remove(node_after_all_primary)
    looper.run(eventually(partial(check_count_connected_node, restNodes, 6),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
    # Get primary node for backup replica
    primary_node = txnPoolNodeSet[0]
    assert primary_node.master_replica.isPrimary
    old_view_no = checkViewNoForNodes(restNodes, 0)
    # disconnect primary node
    disconnect_node_and_ensure_disconnected(looper,
                                            restNodes,
                                            primary_node,
                                            stopNode=False)
    # -------------------------------------------------------
    restNodes.remove(primary_node)
    looper.run(eventually(partial(check_count_connected_node, restNodes, 5),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    looper.run(eventually(partial(checkViewNoForNodes, restNodes, expectedViewNo=old_view_no + 1),
                          timeout=tconf.VIEW_CHANGE_TIMEOUT))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
    logger.debug("restNodes: {}".format(restNodes))
    restNodes.add(node_after_all_primary)
    # Return back node after all primary
    reconnect_node_and_ensure_connected(looper, restNodes, node_after_all_primary)
    looper.run(checkNodesConnected(restNodes,
                                   customTimeout=5*tconf.RETRY_TIMEOUT_RESTRICTED))
    looper.run(eventually(partial(check_count_connected_node, restNodes, 6),
                          timeout=5,
                          acceptableExceptions=[AssertionError]))
    assert len(set([len(n.replicas) for n in restNodes])) == 1
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
    # Return back primary node
    restNodes.add(primary_node)
    reconnect_node_and_ensure_connected(looper, restNodes, primary_node)
    looper.run(checkNodesConnected(restNodes,
                                   customTimeout=5*tconf.RETRY_TIMEOUT_RESTRICTED))
    sdk_send_random_and_check(looper, restNodes, sdk_pool_handle, sdk_wallet_steward, 5)
Exemplo n.º 27
0
def test_disconnected_node_with_lagged_view_pulls_up_its_view_on_reconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    Verifies that a disconnected node with a lagged view accepts
    the current view from the other nodes on re-connection.
    Steps:
    1. Provoke view change to 1.
    2. Ensure that all the nodes complete view change to 1.
    3. Disconnect one node from the rest of the nodes in the pool.
    4. Provoke view change to 2.
    5. Ensure that that all the nodes except for the disconnected one complete
    view change to 2 and the disconnected node remains in the view 1.
    6. Provoke view change to 3.
    5. Ensure that that all the nodes except for the disconnected one complete
    view change to 3 and the disconnected node remains in the view 1.
    8. Connect the disconnected node to the rest of the nodes in the pool.
    9. Ensure that the re-connected node completes view change to 3.
    10. Ensure that all the nodes participate in consensus.
    """
    checkViewNoForNodes(txnPoolNodeSet, 0)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    lagged_node = getNonPrimaryReplicas(txnPoolNodeSet)[-1].node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagged_node,
                                            stopNode=False)
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper, other_nodes,
                        numInstances=getRequiredInstances(len(txnPoolNodeSet)))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 2)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper, other_nodes,
                        numInstances=getRequiredInstances(len(txnPoolNodeSet)))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 3)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagged_node)
    waitForViewChange(looper, [lagged_node], 3,
                      customTimeout=waits.expectedPoolElectionTimeout(
                          len(txnPoolNodeSet)))
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 3)

    sdk_send_random_and_check(looper, txnPoolNodeSet,
                              sdk_pool_handle, sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
Exemplo n.º 28
0
def test_disconnected_node_with_lagged_view_pulls_up_its_view_on_reconnection(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle, tconf):
    """
    Verifies that a disconnected node with a lagged view accepts
    the current view from the other nodes on re-connection.
    Steps:
    1. Provoke view change to 1.
    2. Ensure that all the nodes complete view change to 1.
    3. Disconnect one node from the rest of the nodes in the pool.
    4. Provoke view change to 2.
    5. Ensure that all the nodes except for the disconnected one complete
    view change to 2 and the disconnected node remains in the view 1.
    6. Provoke view change to 3.
    5. Ensure that all the nodes except for the disconnected one complete
    view change to 3 and the disconnected node remains in the view 1.
    8. Connect the disconnected node to the rest of the nodes in the pool.
    9. Ensure that the re-connected node completes view change to 3.
    10. Ensure that all the nodes participate in consensus.
    """
    checkViewNoForNodes(txnPoolNodeSet, 0)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    ensure_view_change(looper, txnPoolNodeSet)
    ensureElectionsDone(looper, txnPoolNodeSet)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    lagged_node = getNonPrimaryReplicas(txnPoolNodeSet)[-1].node
    disconnect_node_and_ensure_disconnected(looper,
                                            txnPoolNodeSet,
                                            lagged_node,
                                            stopNode=False)
    other_nodes = list(set(txnPoolNodeSet) - {lagged_node})

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper,
                        other_nodes,
                        instances_list=range(
                            getRequiredInstances(len(txnPoolNodeSet))))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 2)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    ensure_view_change(looper, other_nodes)
    ensureElectionsDone(looper,
                        other_nodes,
                        instances_list=range(
                            getRequiredInstances(len(txnPoolNodeSet))))
    ensure_all_nodes_have_same_data(looper, other_nodes)
    checkViewNoForNodes(other_nodes, 3)
    checkViewNoForNodes([lagged_node], 1)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)

    reconnect_node_and_ensure_connected(looper, txnPoolNodeSet, lagged_node)

    # The node can start view change, receive NEW_VIEW and start waiting for first ordered in the new view
    # But since the node is lagged for more than checkpoint, it can not do re-ordering, and has to wait until the
    # catchjup by checkpoints is started
    waitForViewChange(looper, [lagged_node],
                      3,
                      customTimeout=waits.expectedPoolElectionTimeout(
                          len(txnPoolNodeSet)))

    sdk_send_batches_of_random_and_check(looper,
                                         txnPoolNodeSet,
                                         sdk_pool_handle,
                                         sdk_wallet_client,
                                         num_reqs=2 * tconf.CHK_FREQ)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
    checkViewNoForNodes(txnPoolNodeSet, 3)
    ensureElectionsDone(looper, txnPoolNodeSet)

    sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle,
                              sdk_wallet_client, 1)
    ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def test_node_requests_missing_preprepares_and_prepares(
        looper, txnPoolNodeSet, sdk_wallet_client, sdk_pool_handle):
    """
    2 of 4 nodes go down, so pool can not process any more incoming requests.
    A new request comes in. After a while those 2 nodes come back alive.
    Another request comes in. Check that previously disconnected two nodes
    request missing PREPREPARES and PREPARES and the pool successfully handles
    both transactions after that.
    """
    INIT_REQS_CNT = 5
    MISSING_REQS_CNT = 4
    REQS_AFTER_RECONNECT_CNT = 1
    disconnected_nodes = txnPoolNodeSet[2:]
    alive_nodes = txnPoolNodeSet[:2]

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              INIT_REQS_CNT)
    init_ledger_size = txnPoolNodeSet[0].domainLedger.size

    current_node_set = set(txnPoolNodeSet)
    for node in disconnected_nodes:
        disconnect_node_and_ensure_disconnected(looper, current_node_set, node, stopNode=False)
        current_node_set.remove(node)

    sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, MISSING_REQS_CNT)

    looper.run(eventually(check_pp_out_of_sync,
                          alive_nodes,
                          disconnected_nodes,
                          retryWait=1,
                          timeout=expectedPoolGetReadyTimeout(len(txnPoolNodeSet))))

    for node in disconnected_nodes:
        current_node_set.add(node)
        reconnect_node_and_ensure_connected(looper, current_node_set, node)

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == init_ledger_size

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica._request_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) == 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) == 0

    sdk_send_random_and_check(looper,
                              txnPoolNodeSet,
                              sdk_pool_handle,
                              sdk_wallet_client,
                              REQS_AFTER_RECONNECT_CNT)
    waitNodeDataEquality(looper, disconnected_nodes[0], *txnPoolNodeSet[:-1])

    for node in disconnected_nodes:
        assert node.master_replica.spylog.count(Replica._request_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica._request_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_pre_prepare) > 0
        assert node.master_replica.spylog.count(Replica.process_requested_prepare) > 0

    for node in txnPoolNodeSet:
        assert node.domainLedger.size == (init_ledger_size +
                                          MISSING_REQS_CNT +
                                          REQS_AFTER_RECONNECT_CNT)