def testNodeConnection(allPluginsPath, tconf, tdir, tdir_for_func, tconf_for_func, looper, txnPoolNodeSetNotStarted): console = getConsole() console.reinit(flushy=True, verbosity=console.Wordage.verbose) nodes = txnPoolNodeSetNotStarted[:2] for node in nodes: tellKeysToOthers(node, nodes) A, B = nodes looper.add(A) looper.runFor(4) logger.debug("wait done") looper.add(B) looper.runFor(4) looper.run(checkNodesConnected([A, B])) looper.stopall() looper.removeProdable(A) looper.removeProdable(B) A = start_stopped_node(A, looper, tconf, tdir, allPluginsPath) looper.runFor(4) B = start_stopped_node(B, looper, tconf, tdir, allPluginsPath) looper.run(checkNodesConnected([A, B])) for node in txnPoolNodeSetNotStarted[2:]: looper.add(node) all_nodes = [A, B] + txnPoolNodeSetNotStarted[2:] looper.run(checkNodesConnected(all_nodes)) stopNodes(all_nodes, looper) for node in all_nodes: looper.removeProdable(node)
def test_pool_reaches_quorum_after_f_plus_2_nodes_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, wallet1, client1, client1Connected): nodes = txnPoolNodeSet initial_view_no = nodes[0].viewNo request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request]) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=initial_view_no + 1) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request]) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) checkViewNoForNodes(nodes[2:], initial_view_no + 1) request = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request, looper, client1, nodes) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) checkViewNoForNodes(nodes[3:], initial_view_no + 1) request = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request, looper, client1, nodes) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) request = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request, looper, client1, nodes) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) waitForViewChange(looper, nodes[1:], expectedViewNo=initial_view_no + 1) request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request]) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, numInstances=getRequiredInstances(nodeCount)) waitForViewChange(looper, nodes, expectedViewNo=initial_view_no + 1) request = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request])
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, wallet1, client1): nodes = txnPoolNodeSet request1 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request1]) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=1) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) request2 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request2]) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[2:], expectedViewNo=1) request3 = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request3, looper, client1, nodes) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) request4 = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request4, looper, client1, nodes) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) request5 = sendRandomRequest(wallet1, client1) verify_request_not_replied_and_not_ordered(request5, looper, client1, nodes) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], numInstances=getRequiredInstances(nodeCount)) checkViewNoForNodes(nodes[1:], expectedViewNo=1) request6 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request6]) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, numInstances=getRequiredInstances(nodeCount)) checkViewNoForNodes(nodes, expectedViewNo=1) request7 = sendRandomRequest(wallet1, client1) waitForSufficientRepliesForRequests(looper, client1, requests=[request7])
def test_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) primary_on_backup = txnPoolNodeSet[2] assert primary_on_backup.replicas._replicas[1].isPrimary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_on_backup, stopNode=True) looper.removeProdable(primary_on_backup) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, LOG_SIZE) restarted_node = start_stopped_node(primary_on_backup, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[2] = restarted_node ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.NEW_VIEW_TIMEOUT) # Gamma catchup 1 txn assert restarted_node.replicas._replicas[1].isPrimary assert restarted_node.replicas._replicas[1].h == 1 assert restarted_node.replicas._replicas[1].H == LOG_SIZE + 1
def test_fill_ts_store_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_replies = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) req_handler = node_to_disconnect.getDomainReqHandler() for reply in sdk_replies: key = req_handler.prepare_buy_key(reply[1]['result']['identifier'], reply[1]['result']['reqId']) root_hash = req_handler.ts_store.get_equal_or_prev( reply[1]['result']['txnTime']) assert root_hash from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert req_handler.stateSerializer.deserialize(from_state)['amount'] == \ reply[1]['result']['amount']
def test_restarted_node_complete_vc_by_current_state(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath): node_to_restart = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart, stopNode=True) looper.removeProdable(node_to_restart) old_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) ensure_view_change(looper, txnPoolNodeSet[:-1]) ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT) current_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) assert current_completed_view_no > old_completed_view_no # Delay VIEW_CHANGE_DONE messages for all nodes for node in txnPoolNodeSet[:-1]: node.nodeIbStasher.delay(vcd_delay(1000)) ensure_view_change(looper, txnPoolNodeSet[:-1]) # Start stopped node until other nodes do view_change node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) node_to_restart.nodeIbStasher.delay(vcd_delay(1000)) # check, that restarted node use last completed view no from pool, instead of proposed looper.run(eventually(complete_propagate_primary, node_to_restart, current_completed_view_no, timeout=tconf.VIEW_CHANGE_TIMEOUT))
def test_number_txns_in_catchup_and_vc_queue_valid(looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): num_txns = 5 master_node = get_master_primary_node(txnPoolNodeSet) master_node_index = txnPoolNodeSet.index(master_node) other_nodes = txnPoolNodeSet.copy() other_nodes.remove(master_node) old_view = master_node.viewNo expected_view_no = old_view + 1 disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, master_node, stopNode=True) looper.removeProdable(master_node) looper.run(eventually(checkViewNoForNodes, other_nodes, expected_view_no, retryWait=1, timeout=tconf.NEW_VIEW_TIMEOUT)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_send_random_and_check(looper, other_nodes, sdk_pool_handle, sdk_wallet_steward, num_txns) master_node = start_stopped_node(master_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[master_node_index] = master_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, master_node, *txnPoolNodeSet[-1:], exclude_from_check=['check_last_ordered_3pc_backup']) latest_info = master_node._info_tool.info assert latest_info['Node_info']['Catchup_status']['Number_txns_in_catchup'][1] == num_txns assert latest_info['Node_info']['View_change_status']['View_No'] == expected_view_no for n in other_nodes: assert n._info_tool.info['Node_info']['View_change_status']['Last_complete_view_no'] == expected_view_no
def test_fill_ts_store_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) node_to_disconnect = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_replies = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet, exclude_from_check=['check_last_ordered_3pc_backup']) req_handler = node_to_disconnect.read_manager.request_handlers[GET_BUY] for reply in sdk_replies: key = BuyHandler.prepare_buy_key(get_from(reply[1]['result']), get_req_id(reply[1]['result'])) root_hash = req_handler.database_manager.ts_store.get_equal_or_prev( get_txn_time(reply[1]['result'])) assert root_hash from_state = req_handler.state.get_for_root_hash(root_hash=root_hash, key=key) assert domain_state_serializer.deserialize(from_state)['amount'] == \ get_payload_data(reply[1]['result'])['amount']
def test_cancel_request_cp_and_ls_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): '''Test cancel of schedule with requesting ledger statuses and consistency proofs after catchup.''' node_to_disconnect = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) # add node_to_disconnect to pool node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet, exclude_from_check=['check_last_ordered_3pc_backup']) # check cancel of schedule with requesting ledger statuses and consistency proofs for event in node_to_disconnect.timer._events: name = event.callback.__name__ assert name != '_reask_for_ledger_status' assert name != '_reask_for_last_consistency_proof'
def test_not_set_H_as_maxsize_for_backup_if_is_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): ensure_view_change(looper, txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) primary_on_backup = txnPoolNodeSet[2] assert primary_on_backup.replicas._replicas[1].isPrimary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, primary_on_backup, stopNode=True) looper.removeProdable(primary_on_backup) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, LOG_SIZE) restarted_node = start_stopped_node(primary_on_backup, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[2] = restarted_node ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT) assert restarted_node.replicas._replicas[1].isPrimary assert restarted_node.replicas._replicas[1].h == 0 assert restarted_node.replicas._replicas[1].H == LOG_SIZE
def test_restarted_node_complete_vc_by_current_state(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath): node_to_restart = txnPoolNodeSet[-1] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart, stopNode=True) looper.removeProdable(node_to_restart) old_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) ensure_view_change(looper, txnPoolNodeSet[:-1]) ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT) current_completed_view_no = get_last_completed_view_no(txnPoolNodeSet[:-1]) assert current_completed_view_no > old_completed_view_no # Delay VIEW_CHANGE_DONE messages for all nodes for node in txnPoolNodeSet[:-1]: node.nodeIbStasher.delay(vcd_delay(1000)) ensure_view_change(looper, txnPoolNodeSet[:-1]) # Start stopped node until other nodes do view_change node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) node_to_restart.nodeIbStasher.delay(vcd_delay(1000)) # check, that restarted node use last completed view no from pool, instead of proposed looper.run( eventually(complete_propagate_primary, node_to_restart, current_completed_view_no, timeout=tconf.VIEW_CHANGE_TIMEOUT))
def test_get_last_ordered_timestamp_after_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): node_to_disconnect = txnPoolNodeSet[-1] reply_before = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] looper.runFor(2) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet[:-1], exclude_from_check=['check_last_ordered_3pc_backup']) ts_from_state = node_to_disconnect.master_replica._get_last_timestamp_from_state( DOMAIN_LEDGER_ID) assert ts_from_state == get_txn_time(reply['result']) assert ts_from_state != get_txn_time(reply_before['result'])
def test_vc_by_current_state(txnPoolNodeSet, looper, tdir, tconf, allPluginsPath): node_to_stop = txnPoolNodeSet[-1] old_view_no = node_to_stop.view_changer.last_completed_view_no disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_stop, stopNode=True) looper.removeProdable(node_to_stop) ensure_view_change(looper, txnPoolNodeSet[:-1]) ensureElectionsDone(looper, txnPoolNodeSet[:-1], customTimeout=tconf.VIEW_CHANGE_TIMEOUT) new_view_no = txnPoolNodeSet[0].view_changer.last_completed_view_no assert new_view_no > old_view_no node_to_stop = start_stopped_node(node_to_stop, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_stop ensureElectionsDone(looper, txnPoolNodeSet, customTimeout=tconf.VIEW_CHANGE_TIMEOUT) assert node_to_stop.view_changer.last_completed_view_no == new_view_no
def test_current_state_propagation(newNodeCaughtUp, txnPoolNodeSet, nodeSetWithNodeAddedAfterSomeTxns, tconf, tdir, allPluginsPath): """ Checks that nodes send CurrentState to lagged nodes. """ # 1. Start pool looper, new_node, client, wallet, _, _ = nodeSetWithNodeAddedAfterSomeTxns # 2. Stop one node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node, stopNode=True) looper.removeProdable(new_node) # 3. Start it again restarted_node = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) looper.runFor(5) # 4. Check that all nodes sent CurrentState for node in txnPoolNodeSet[:-1]: sent_times = node.spylog.count( node.send_current_state_to_lagging_node.__name__) assert sent_times != 0, "{} haven't sent CurrentState".format(node) looper.runFor(5) # 5. Check that it received CurrentState messages received_times = restarted_node.spylog.count( restarted_node.process_current_state_message.__name__) assert received_times != 0
def test_get_last_ordered_timestamp_after_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): node_to_disconnect = txnPoolNodeSet[-1] reply_before = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] looper.runFor(2) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) reply = sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1)[0][1] node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet[:-1]) ts_from_state = node_to_disconnect.master_replica._get_last_timestamp_from_state(DOMAIN_LEDGER_ID) assert ts_from_state == get_txn_time(reply['result']) assert ts_from_state != get_txn_time(reply_before['result'])
def test_preprepares_and_prepares_recovery_after_catchup( tdir, tconf, looper, testNodeClass, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, allPluginsPath, chkFreqPatched): """ Test that all preprepares and prepares are recovered from the audit ledger after reboot. """ node_to_restart = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, NUM_OF_REQ) # Check that all of the nodes except the slows one ordered the request looper.run(eventually(check_last_ordered, txnPoolNodeSet, (0, NUM_OF_REQ))) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart, timeout=len(txnPoolNodeSet), stopNode=True) looper.removeProdable(node_to_restart) txnPoolNodeSet.remove(node_to_restart) restarted_node = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(restarted_node) looper.runFor(waits.expectedNodeStartUpTimeout()) looper.run(checkNodesConnected(txnPoolNodeSet)) check_prepared(txnPoolNodeSet)
def test_cancel_request_cp_and_ls_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): '''Test cancel of schedule with requesting ledger statuses and consistency proofs after catchup.''' node_to_disconnect = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) # add node_to_disconnect to pool node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) # check cancel of schedule with requesting ledger statuses and consistency proofs for ledger_id in range(0, 3): scheduled_ls = node_to_disconnect.ledgerManager.request_ledger_status_action_ids assert ledger_id not in scheduled_ls for ledger_id in range(0, 3): scheduled_cp = node_to_disconnect.ledgerManager.request_consistency_proof_action_ids assert ledger_id not in scheduled_cp
def test_demote_backup_primary(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 6 node_to_restart = txnPoolNodeSet[-1] node_to_demote = steward_for_demote_node = demote_node_index = None steward_for_demote_node = None for i, n in enumerate(txnPoolNodeSet): if n.name == txnPoolNodeSet[0].primaries[1]: node_to_demote = n steward_for_demote_node = sdk_wallet_stewards[i] demote_node_index = i break assert node_to_demote demote_node(looper, steward_for_demote_node, sdk_pool_handle, node_to_demote) del txnPoolNodeSet[demote_node_index] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_restart) looper.removeProdable(name=node_to_restart.name) node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_restart ensure_all_nodes_have_same_data(looper, txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_stewards[0], 1) ensure_all_nodes_have_same_data(looper, txnPoolNodeSet)
def testStewardSuspendsNode(looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, sdk_node_theta_added, poolTxnStewardData, allPluginsPath): new_steward_wallet, new_node = sdk_node_theta_added demote_node(looper, new_steward_wallet, sdk_pool_handle, new_node) # Check suspended node does not exist in any nodeReg or remotes of # nodes or clients txnPoolNodeSet = txnPoolNodeSet[:-1] for node in txnPoolNodeSet: looper.run(eventually(checkNodeNotInNodeReg, node, new_node.name)) # Check that a node does not connect to the suspended # node sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle) with pytest.raises(RemoteNotFound): looper.loop.run_until_complete(sendMessageAndCheckDelivery(txnPoolNodeSet[0], new_node)) new_node.stop() looper.removeProdable(new_node) # Check that a node whose suspension is revoked can reconnect to other # nodes and clients can also connect to that node promote_node(looper, new_steward_wallet, sdk_pool_handle, new_node) nodeTheta = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) txnPoolNodeSet.append(nodeTheta) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_propagate_primary_after_primary_restart_view_0( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indexes correctly case viewNo == 0 """ sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [ i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name ][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def test_catchup_with_one_slow_node(tdir, tconf, looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, allPluginsPath, logsearch): ''' 1. Stop the node Delta 2. Order 9 txns. In sending CatchupReq in a first round every node [Alpha, Beta, Gamma] will receive request for 3 txns. 3. Delay CatchupReq messages on Alpha 4. Start Delta 5. Check that all nodes have equality data. 6. Check that Delta re-ask CatchupRep only once. In the second CatchupRep (first re-ask) Delta shouldn't request CatchupRep from Alpha because it didn't answer early. If the behavior is wrong and Delta re-ask txns form all nodes, every node will receive request for 1 txns, Alpha will not answer and Delta will need a new re-ask round. ''' # Prepare nodes lagging_node = txnPoolNodeSet[-1] rest_nodes = txnPoolNodeSet[:-1] # Stop one node waitNodeDataEquality(looper, lagging_node, *rest_nodes) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Send more requests to active nodes sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, len(rest_nodes) * 3) waitNodeDataEquality(looper, *rest_nodes) # Restart stopped node and wait for successful catch up lagging_node = start_stopped_node( lagging_node, looper, tconf, tdir, allPluginsPath, start=False, ) log_re_ask, _ = logsearch( msgs=['requesting .* missing transactions after timeout']) old_re_ask_count = len(log_re_ask) # Delay CatchupRep messages on Alpha with delay_rules(rest_nodes[0].nodeIbStasher, cqDelay()): with delay_rules(lagging_node.nodeIbStasher, cs_delay()): looper.add(lagging_node) txnPoolNodeSet[-1] = lagging_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, *txnPoolNodeSet, customTimeout=120) assert len( log_re_ask ) - old_re_ask_count == 2 # for audit and domain ledgers
def test_recover_stop_primaries(looper, checkpoint_size, txnPoolNodeSet, allPluginsPath, tdir, tconf, client1, wallet1, client1Connected): """ Test that we can recover after having more than f nodes disconnected: - stop current master primary (Alpha) - send txns - restart current master primary (Beta) - send txns """ active_nodes = list(txnPoolNodeSet) assert 4 == len(active_nodes) initial_view_no = active_nodes[0].viewNo logger.info("Stop first node (current Primary)") _, active_nodes = stop_primary(looper, active_nodes) logger.info("Make sure view changed") expected_view_no = initial_view_no + 1 waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) ensureElectionsDone(looper=looper, nodes=active_nodes, numInstances=2) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("send at least one checkpoint") assert nodes_do_not_have_checkpoints(*active_nodes) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, numReqs=2 * checkpoint_size) assert nodes_have_checkpoints(*active_nodes) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info( "Stop second node (current Primary) so the primary looses his state") stopped_node, active_nodes = stop_primary(looper, active_nodes) logger.info("Restart the primary node") restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath) assert nodes_do_not_have_checkpoints(restarted_node) assert nodes_have_checkpoints(*active_nodes) active_nodes = active_nodes + [restarted_node] logger.info("Check that primary selected") ensureElectionsDone(looper=looper, nodes=active_nodes, numInstances=2, customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Check if the pool is able to process requests") sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, numReqs=10 * checkpoint_size) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) assert nodes_have_checkpoints(*active_nodes)
def test_selection_f_plus_one_quorum(looper, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_client): """ Check that quorum f + 1 is used for primary selection when initiated by CurrentState messages. Assumes that view change quorum is n - f. Assumes that primaries selection in round robin fashion. """ # Ensure that we have 4 nodes in total all_nodes = list(txnPoolNodeSet) assert 4 == len(all_nodes) alpha, beta, delta, gamma = all_nodes initial_view_no = alpha.viewNo # Make one node lagging by switching it off for some time lagging_node = gamma non_lagging_nodes = [alpha, beta, delta] disconnect_node_and_ensure_disconnected(looper, all_nodes, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Make nodes to perform view change ensure_view_change(looper, non_lagging_nodes) ensureElectionsDone(looper=looper, nodes=non_lagging_nodes, instances_list=range(2)) ensure_all_nodes_have_same_data(looper, nodes=non_lagging_nodes) # Stop two more of active nodes # (but not primary, which is Beta (because of round robin selection)) stopped_nodes = [alpha] # TODO: add one more here for stopped_node in stopped_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, stopped_node, stopNode=True) looper.removeProdable(stopped_node) # Start lagging node back restarted_node = start_stopped_node(lagging_node, looper, tconf, tdir, allPluginsPath) active_nodes = [beta, delta, restarted_node] # Check that primary selected expected_view_no = initial_view_no + 1 ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): """ View change occurs when master's primary is disconnected """ # Setup nodes = txnPoolNodeSet old_view_no = checkViewNoForNodes(nodes) old_pr_node = get_master_primary_node(nodes) # Stop primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_pr_node, stopNode=True) looper.removeProdable(old_pr_node) remaining_nodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected waitForViewChange(looper, remaining_nodes, old_view_no + 1) ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes) new_pr_node = get_master_primary_node(remaining_nodes) assert old_pr_node != new_pr_node sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Check if old primary can join the pool and still functions old_pr_node = start_stopped_node(old_pr_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remaining_nodes + [old_pr_node] looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=tconf.VIEW_CHANGE_TIMEOUT)) # After node catches up it set view_no from audit ledger and do not need to do view_change assert len( getAllReturnVals(old_pr_node.view_changer, old_pr_node.view_changer.start_view_change, compare_val_to=True)) == 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) assert not old_pr_node.view_changer._next_view_indications
def start_stop_one_node(node_to_restart, pool_of_nodes): """ :param node_to_restart: node, which would be restarted :param pool_of_nodes: current pool :return: new pool with restarted node Node restart procedure consist of: 1. Calling stop() 2. Remove from looper and pool 3. Create new instance of node with the same ha, cliha and node_name (also all path to data, keys and etc would be exactly as for stopped node) 4. Add new instance into looper and pool 5. Check, that other nodes accepted new instance and all pool has the same data """ remaining_nodes = list(set(pool_of_nodes) - {node_to_restart}) disconnect_node_and_ensure_disconnected(looper, pool_of_nodes, node_to_restart, stopNode=True) looper.removeProdable(node_to_restart) ensure_all_nodes_have_same_data(looper, remaining_nodes, custom_timeout=tconf.NEW_VIEW_TIMEOUT) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 1) node_to_restart = start_stopped_node(node_to_restart, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=True) pool_of_nodes = remaining_nodes + [node_to_restart] looper.run(checkNodesConnected(pool_of_nodes)) ensure_all_nodes_have_same_data( looper, pool_of_nodes, custom_timeout=tconf.NEW_VIEW_TIMEOUT, exclude_from_check=['check_last_ordered_3pc_backup']) timeout = waits.expectedPoolCatchupTime(nodeCount=len(pool_of_nodes)) looper.run( eventually(check_ledger_state, node_to_restart, DOMAIN_LEDGER_ID, LedgerState.synced, retryWait=.5, timeout=timeout)) looper.run( eventually(check_ledger_state, node_to_restart, POOL_LEDGER_ID, LedgerState.synced, retryWait=.5, timeout=timeout)) looper.run(eventually(catchuped, node_to_restart, timeout=2 * timeout)) return pool_of_nodes
def restart_node(node, looper, txnPoolNodeSet, tdir, tconf, allPluginsPath, timeout): disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node) txnPoolNodeSet.remove(node) looper.removeProdable(name=node.name) looper.runFor(timeout) node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(node) return node
def test_view_not_changed_when_short_disconnection(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): """ When primary is disconnected but not long enough to trigger the timeout, view change should not happen """ pr_node = get_master_primary_node(txnPoolNodeSet) view_no = checkViewNoForNodes(txnPoolNodeSet) prp_inst_chg_calls = { node.name: node.spylog.count(node.propose_view_change.__name__) for node in txnPoolNodeSet if node != pr_node } recv_inst_chg_calls = { node.name: node.spylog.count( node.view_changer.process_instance_change_msg.__name__) for node in txnPoolNodeSet if node != pr_node } # Disconnect master's primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, pr_node, timeout=2) txnPoolNodeSet.remove(pr_node) looper.removeProdable(name=pr_node.name) timeout = min(tconf.ToleratePrimaryDisconnection - 1, 1) # Reconnect master's primary pr_node = start_stopped_node(pr_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(pr_node) def chk2(): # Schedule an instance change but do not send it # since primary joins again for node in txnPoolNodeSet: if node != pr_node: assert node.spylog.count(node.propose_view_change.__name__ ) > prp_inst_chg_calls[node.name] assert node.view_changer.spylog.count(node.view_changer.process_instance_change_msg.__name__) == \ recv_inst_chg_calls[node.name] looper.run(eventually(chk2, retryWait=.2, timeout=timeout + 1)) assert checkViewNoForNodes(txnPoolNodeSet) == view_no # Send some requests and make sure the request execute sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5)
def testViewChangesIfMasterPrimaryDisconnected(txnPoolNodeSet, looper, wallet1, client1, client1Connected, tconf, tdirWithPoolTxns, allPluginsPath): """ View change occurs when master's primary is disconnected """ # Setup nodes = txnPoolNodeSet old_view_no = checkViewNoForNodes(nodes) old_pr_node = get_master_primary_node(nodes) # Stop primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_pr_node, stopNode=True) looper.removeProdable(old_pr_node) remaining_nodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected waitForViewChange(looper, remaining_nodes, old_view_no + 1) ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes) new_pr_node = get_master_primary_node(remaining_nodes) assert old_pr_node != new_pr_node sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 5) # Check if old primary can join the pool and still functions old_pr_node = start_stopped_node(old_pr_node, looper, tconf, tdirWithPoolTxns, allPluginsPath) txnPoolNodeSet = remaining_nodes + [old_pr_node] looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=10)) assert len( getAllReturnVals(old_pr_node, old_pr_node._start_view_change_if_possible, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) assert not old_pr_node._next_view_indications
def test_steward_suspends_node_and_promote_with_new_ha( looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, sdk_node_theta_added, poolTxnStewardData, allPluginsPath): new_steward_wallet, new_node = sdk_node_theta_added looper.run(checkNodesConnected(txnPoolNodeSet + [new_node])) demote_node(looper, new_steward_wallet, sdk_pool_handle, new_node) # Check suspended node does not exist in any nodeReg or remotes of # nodes or clients txnPoolNodeSet = txnPoolNodeSet[:-1] for node in txnPoolNodeSet: looper.run(eventually(checkNodeNotInNodeReg, node, new_node.name)) # Check that a node does not connect to the suspended # node sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle) with pytest.raises(RemoteNotFound): looper.loop.run_until_complete( sendMessageAndCheckDelivery(txnPoolNodeSet[0], new_node)) new_node.stop() looper.removeProdable(new_node) # Check that a node whose suspension is revoked can reconnect to other # nodes and clients can also connect to that node node_ha, client_ha = genHa(2) node_nym = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_nym, new_node.name, node_ha.host, node_ha.port, client_ha.host, client_ha.port, services=[VALIDATOR]) new_node.nodestack.ha = node_ha new_node.clientstack.ha = client_ha nodeTheta = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) assert all(node.nodestack.remotes[new_node.name].ha == node_ha for node in txnPoolNodeSet) txnPoolNodeSet.append(nodeTheta) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_recover_stop_primaries_no_view_change(looper, checkpoint_size, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_steward): """ Test that we can recover after having more than f nodes disconnected: - send txns - stop current master primary - restart current master primary - send txns """ active_nodes = list(txnPoolNodeSet) assert 4 == len(active_nodes) initial_view_no = active_nodes[0].viewNo logger.info("send at least one checkpoint") assert nodes_do_not_have_checkpoints(*active_nodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2 * checkpoint_size) assert nodes_have_checkpoints(*active_nodes) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Stop first node (current Primary)") stopped_node, active_nodes = stop_primary(looper, active_nodes) logger.info("Restart the primary node") restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath) assert nodes_do_not_have_checkpoints(restarted_node) assert nodes_have_checkpoints(*active_nodes) active_nodes = active_nodes + [restarted_node] logger.info("Check that primary selected") ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=0) ensure_all_nodes_have_same_data( looper, nodes=active_nodes, exclude_from_check=['check_last_ordered_3pc_backup']) logger.info("Check if the pool is able to process requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 10 * checkpoint_size) ensure_all_nodes_have_same_data( looper, nodes=active_nodes, exclude_from_check=['check_last_ordered_3pc_backup']) assert nodes_have_checkpoints(*active_nodes)
def test_propagate_primary_after_primary_restart_view_1( looper, txnPoolNodeSet, tconf, sdk_pool_handle, sdk_wallet_steward, tdir, allPluginsPath): """ Delay instance change msgs to prevent view change during primary restart to test propagate primary for primary node. ppSeqNo should be > 0 to be able to check that propagate primary restores all indices correctly case viewNo > 0 """ ensure_view_change(looper, txnPoolNodeSet) checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=1) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) old_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (old_ppseqno > 0) old_viewNo = checkViewNoForNodes(txnPoolNodeSet) old_primary = get_master_primary_node(txnPoolNodeSet) delay_instance_change(txnPoolNodeSet, IC_DELAY_SEC) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_primary, stopNode=True) looper.removeProdable(old_primary) logger.info("Restart node {}".format(old_primary)) restartedNode = start_stopped_node(old_primary, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) idx = [i for i, n in enumerate(txnPoolNodeSet) if n.name == restartedNode.name][0] txnPoolNodeSet[idx] = restartedNode restartedNode.nodeIbStasher.delay(icDelay(IC_DELAY_SEC)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper=looper, nodes=txnPoolNodeSet) new_viewNo = checkViewNoForNodes(txnPoolNodeSet) assert (new_viewNo == old_viewNo) new_primary = get_master_primary_node(txnPoolNodeSet) assert (new_primary.name == old_primary.name) # check ppSeqNo the same _get_ppseqno(txnPoolNodeSet) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle) new_ppseqno = _get_ppseqno(txnPoolNodeSet) assert (new_ppseqno > old_ppseqno)
def _restart_node(looper, txnPoolNodeSet, node_to_disconnect, tconf, tdir, allPluginsPath): idx = txnPoolNodeSet.index(node_to_disconnect) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) # add node_to_disconnect to pool node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[idx] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
def test_recover_stop_primaries(looper, checkpoint_size, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_steward): """ Test that we can recover after having more than f nodes disconnected: - stop current master primary (Alpha) - send txns - restart current master primary (Beta) - send txns """ active_nodes = list(txnPoolNodeSet) assert 4 == len(active_nodes) initial_view_no = active_nodes[0].viewNo logger.info("Stop first node (current Primary)") _, active_nodes = stop_primary(looper, active_nodes) logger.info("Make sure view changed") expected_view_no = initial_view_no + 1 waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2)) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("send at least one checkpoint") assert nodes_do_not_have_checkpoints(*active_nodes) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2 * checkpoint_size) assert nodes_have_checkpoints(*active_nodes) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Stop second node (current Primary) so the primary looses his state") stopped_node, active_nodes = stop_primary(looper, active_nodes) logger.info("Restart the primary node") restarted_node = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath) assert nodes_do_not_have_checkpoints(restarted_node) assert nodes_have_checkpoints(*active_nodes) active_nodes = active_nodes + [restarted_node] logger.info("Check that primary selected") ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) logger.info("Check if the pool is able to process requests") sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 10 * checkpoint_size) ensure_all_nodes_have_same_data(looper, nodes=active_nodes) assert nodes_have_checkpoints(*active_nodes)
def test_selection_f_plus_one_quorum(looper, txnPoolNodeSet, allPluginsPath, tdir, tconf, sdk_pool_handle, sdk_wallet_client): """ Check that quorum f + 1 is used for primary selection when initiated by CurrentState messages. Assumes that view change quorum is n - f. Assumes that primaries selection in round robin fashion. """ # Ensure that we have 4 nodes in total all_nodes = list(txnPoolNodeSet) assert 4 == len(all_nodes) alpha, beta, delta, gamma = all_nodes initial_view_no = alpha.viewNo # Make one node lagging by switching it off for some time lagging_node = gamma non_lagging_nodes = [alpha, beta, delta] disconnect_node_and_ensure_disconnected(looper, all_nodes, lagging_node, stopNode=True) looper.removeProdable(lagging_node) # Make nodes to perform view change ensure_view_change(looper, non_lagging_nodes) ensureElectionsDone(looper=looper, nodes=non_lagging_nodes, instances_list=range(2)) ensure_all_nodes_have_same_data(looper, nodes=non_lagging_nodes) # Stop two more of active nodes # (but not primary, which is Beta (because of round robin selection)) stopped_nodes = [alpha] # TODO: add one more here for stopped_node in stopped_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, stopped_node, stopNode=True) looper.removeProdable(stopped_node) # Start lagging node back restarted_node = start_stopped_node( lagging_node, looper, tconf, tdir, allPluginsPath) active_nodes = [beta, delta, restarted_node] # Check that primary selected expected_view_no = initial_view_no + 1 ensureElectionsDone(looper=looper, nodes=active_nodes, instances_list=range(2), customTimeout=30) waitForViewChange(looper, active_nodes, expectedViewNo=expected_view_no) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def _restart_node(looper, txnPoolNodeSet, node_to_disconnect, tconf, tdir, allPluginsPath): idx = txnPoolNodeSet.index(node_to_disconnect) disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) # add node_to_disconnect to pool node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[idx] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet)
def testNodesComingUpAtDifferentTimes(allPluginsPath, tconf, tdir, tdir_for_func, tconf_for_func, looper, txnPoolNodeSetNotStarted): console = getConsole() console.reinit(flushy=True, verbosity=console.Wordage.verbose) nodes = txnPoolNodeSetNotStarted names = list(node.name for node in nodes) shuffle(names) waits = [randint(1, 10) for _ in names] rwaits = [randint(1, 10) for _ in names] for node in nodes: tellKeysToOthers(node, nodes) for i, node in enumerate(nodes): looper.add(node) looper.runFor(waits[i]) looper.run(checkNodesConnected(nodes)) logger.debug("connects") logger.debug("node order: {}".format(names)) logger.debug("waits: {}".format(waits)) current_node_set = set(nodes) for node in nodes: disconnect_node_and_ensure_disconnected(looper, current_node_set, node, timeout=len(nodes), stopNode=True) looper.removeProdable(node) current_node_set.remove(node) for i, node in enumerate(nodes): restarted_node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) current_node_set.add(restarted_node) looper.runFor(rwaits[i]) looper.runFor(3) looper.run(checkNodesConnected(current_node_set)) stopNodes(current_node_set, looper) logger.debug("reconnects") logger.debug("node order: {}".format(names)) logger.debug("rwaits: {}".format(rwaits)) for node in current_node_set: looper.removeProdable(node)
def test_old_non_primary_restart_after_view_change(new_node_in_correct_view, looper, txnPoolNodeSet, tdir, allPluginsPath, tconf, sdk_pool_handle, sdk_wallet_client): """ An existing non-primary node crashes and then view change happens, the crashed node comes back up after view change """ node_to_stop = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node old_view_no = node_to_stop.viewNo # Stop non-primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_stop, stopNode=True) looper.removeProdable(node_to_stop) remaining_nodes = list(set(txnPoolNodeSet) - {node_to_stop}) # Send some requests before view change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_view_change(looper, remaining_nodes, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper, remaining_nodes) # Send some requests after view change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) restarted_node = start_stopped_node(node_to_stop, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remaining_nodes + [restarted_node] looper.run( eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=30)) assert len( getAllReturnVals( restarted_node.view_changer, restarted_node.view_changer._start_view_change_if_possible, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert not restarted_node.view_changer._next_view_indications
def test_freshness_after_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, sdk_wallet_steward, tconf, tdir, allPluginsPath): """ A node restart and restores last ordered with freshness. """ view_no = txnPoolNodeSet[0].viewNo restarted_node = txnPoolNodeSet[-1] rest_nodes = txnPoolNodeSet[:-1] # Stop Delta disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, restarted_node, stopNode=True) looper.removeProdable(restarted_node) # Wait for the first freshness update looper.run( eventually(check_freshness_updated_for_all, rest_nodes, timeout=FRESHNESS_TIMEOUT + 5)) # Wait for the second freshness update bls_multi_sigs_after_first_update = get_all_multi_sig_values_for_all_nodes( rest_nodes) looper.run( eventually(check_updated_bls_multi_sig_for_all_ledgers, rest_nodes, bls_multi_sigs_after_first_update, FRESHNESS_TIMEOUT, timeout=FRESHNESS_TIMEOUT + 5)) # Restart Delta and wait for successful catch up restarted_node = start_stopped_node(restarted_node, looper, tconf, tdir, allPluginsPath, start=True) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, *txnPoolNodeSet) assert all(n.viewNo == view_no for n in txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) waitNodeDataEquality(looper, *txnPoolNodeSet)
def test_view_changes_if_master_primary_disconnected(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): """ View change occurs when master's primary is disconnected """ # Setup nodes = txnPoolNodeSet old_view_no = checkViewNoForNodes(nodes) old_pr_node = get_master_primary_node(nodes) # Stop primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, old_pr_node, stopNode=True) looper.removeProdable(old_pr_node) remaining_nodes = list(set(nodes) - {old_pr_node}) # Sometimes it takes time for nodes to detect disconnection ensure_node_disconnected(looper, old_pr_node, remaining_nodes, timeout=20) looper.runFor(tconf.ToleratePrimaryDisconnection + 2) # Give some time to detect disconnection and then verify that view has # changed and new primary has been elected waitForViewChange(looper, remaining_nodes, old_view_no + 1) ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes) new_pr_node = get_master_primary_node(remaining_nodes) assert old_pr_node != new_pr_node sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) # Check if old primary can join the pool and still functions old_pr_node = start_stopped_node(old_pr_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remaining_nodes + [old_pr_node] looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=tconf.VIEW_CHANGE_TIMEOUT)) assert len(getAllReturnVals(old_pr_node.view_changer, old_pr_node.view_changer._start_view_change_if_possible, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) assert not old_pr_node.view_changer._next_view_indications
def test_steward_suspends_node_and_promote_with_new_ha( looper, txnPoolNodeSet, tdir, tconf, sdk_pool_handle, sdk_wallet_steward, sdk_node_theta_added, poolTxnStewardData, allPluginsPath): new_steward_wallet, new_node = sdk_node_theta_added looper.run(checkNodesConnected(txnPoolNodeSet + [new_node])) demote_node(looper, new_steward_wallet, sdk_pool_handle, new_node) # Check suspended node does not exist in any nodeReg or remotes of # nodes or clients txnPoolNodeSet = txnPoolNodeSet[:-1] for node in txnPoolNodeSet: looper.run(eventually(checkNodeNotInNodeReg, node, new_node.name)) # Check that a node does not connect to the suspended # node sdk_ensure_pool_functional(looper, txnPoolNodeSet, new_steward_wallet, sdk_pool_handle) with pytest.raises(RemoteNotFound): looper.loop.run_until_complete(sendMessageAndCheckDelivery(txnPoolNodeSet[0], new_node)) new_node.stop() looper.removeProdable(new_node) # Check that a node whose suspension is revoked can reconnect to other # nodes and clients can also connect to that node node_ha, client_ha = genHa(2) node_nym = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_nym, new_node.name, node_ha.host, node_ha.port, client_ha.host, client_ha.port, services=[VALIDATOR]) new_node.nodestack.ha = node_ha new_node.clientstack.ha = client_ha nodeTheta = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) assert all(node.nodestack.remotes[new_node.name].ha == node_ha for node in txnPoolNodeSet) txnPoolNodeSet.append(nodeTheta) looper.run(checkNodesConnected(txnPoolNodeSet)) sdk_pool_refresh(looper, sdk_pool_handle) sdk_ensure_pool_functional(looper, txnPoolNodeSet, sdk_wallet_steward, sdk_pool_handle)
def test_old_non_primary_restart_after_view_change(new_node_in_correct_view, looper, txnPoolNodeSet, tdir, allPluginsPath, tconf, sdk_pool_handle, sdk_wallet_client): """ An existing non-primary node crashes and then view change happens, the crashed node comes back up after view change """ node_to_stop = getNonPrimaryReplicas(txnPoolNodeSet, 0)[-1].node old_view_no = node_to_stop.viewNo # Stop non-primary disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_stop, stopNode=True) looper.removeProdable(node_to_stop) remaining_nodes = list(set(txnPoolNodeSet) - {node_to_stop}) # Send some requests before view change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) ensure_view_change(looper, remaining_nodes, custom_timeout=tconf.VIEW_CHANGE_TIMEOUT) ensureElectionsDone(looper, remaining_nodes) # Send some requests after view change sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 5) restarted_node = start_stopped_node(node_to_stop, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remaining_nodes + [restarted_node] looper.run(eventually(checkViewNoForNodes, txnPoolNodeSet, old_view_no + 1, timeout=30)) assert len(getAllReturnVals(restarted_node.view_changer, restarted_node.view_changer._start_view_change_if_possible, compare_val_to=True)) > 0 ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) ensureElectionsDone(looper, txnPoolNodeSet) assert not restarted_node.view_changer._next_view_indications
def test_cancel_request_cp_and_ls_after_catchup(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_steward, tconf, tdir, allPluginsPath): '''Test cancel of schedule with requesting ledger statuses and consistency proofs after catchup.''' node_to_disconnect = txnPoolNodeSet[-1] sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 5) # restart node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node_to_disconnect) looper.removeProdable(name=node_to_disconnect.name) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_steward, 2) # add node_to_disconnect to pool node_to_disconnect = start_stopped_node(node_to_disconnect, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = node_to_disconnect looper.run(checkNodesConnected(txnPoolNodeSet)) waitNodeDataEquality(looper, node_to_disconnect, *txnPoolNodeSet) # check cancel of schedule with requesting ledger statuses and consistency proofs assert len(node_to_disconnect.ledgerManager.request_ledger_status_action_ids) == 0 for action, aids in node_to_disconnect.ledgerManager.scheduled.items(): if getCallableName(action) == 'reask_for_ledger_status': assert len(aids) == 0 assert len(node_to_disconnect.ledgerManager.request_consistency_proof_action_ids) == 0 for action, aids in node_to_disconnect.ledgerManager.scheduled.items(): if getCallableName(action) == 'reask_for_last_consistency_proof': assert len(aids) == 0
def test_current_state_propagation(sdk_new_node_caught_up, txnPoolNodeSet, sdk_node_set_with_node_added_after_some_txns, tconf, tdir, allPluginsPath): """ Checks that nodes send CurrentState to lagged nodes. """ # 1. Start pool looper, new_node, _, _ = sdk_node_set_with_node_added_after_some_txns # 2. Stop one node disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, new_node, stopNode=True) looper.removeProdable(new_node) # 3. Start it again restarted_node = start_stopped_node(new_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet[-1] = restarted_node looper.run(checkNodesConnected(txnPoolNodeSet)) looper.runFor(5) # 4. Check that all nodes sent CurrentState for node in txnPoolNodeSet[:-1]: sent_times = node.spylog.count( node.send_current_state_to_lagging_node.__name__) assert sent_times != 0, "{} haven't sent CurrentState".format(node) looper.runFor(5) # 5. Check that it received CurrentState messages received_times = restarted_node.spylog.count( restarted_node.process_current_state_message.__name__) assert received_times != 0
def test_quorum_after_f_plus_2_nodes_but_not_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): nodes = txnPoolNodeSet sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[4], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:4], expectedViewNo=0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[3], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:3], expectedViewNo=0) sdk_reqs3 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs3) sdk_check_reply(req_res[0]) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:2], expectedViewNo=0) sdk_reqs4 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs4) sdk_check_reply(req_res[0]) nodes[4] = start_stopped_node(nodes[4], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[:2] + nodes[4:], expectedViewNo=0) sdk_reqs5 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs5) sdk_check_reply(req_res[0]) nodes[3] = start_stopped_node(nodes[3], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[:2] + nodes[3:], instances_list=range(getRequiredInstances(nodeCount))) checkViewNoForNodes(nodes[:2] + nodes[3:], expectedViewNo=0) sdk_reqs6 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) sdk_get_replies(looper, sdk_reqs6) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, instances_list=range(getRequiredInstances(nodeCount))) checkViewNoForNodes(nodes, expectedViewNo=0) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)
def test_node_erases_last_sent_pp_key_on_pool_restart( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched): # Get a node with a backup primary replica and the rest of the nodes replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 7)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == 0 assert replica.lastPrePrepareSeqNo == 7 assert replica.h == 6 assert replica.H == 6 + LOG_SIZE # Ensure that there is a stored last sent PrePrepare key on the node assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Restart all the nodes in the pool and wait for primary elections done all_nodes = copy(txnPoolNodeSet) for n in all_nodes: disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, n.name, timeout=nodeCount, stopNode=True) looper.removeProdable(n) txnPoolNodeSet.remove(n) for n in all_nodes: txnPoolNodeSet.append(start_stopped_node(n, looper, tconf, tdir, allPluginsPath)) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) node = nodeByName(txnPoolNodeSet, node.name) replica = node.replicas[backup_inst_id] # Verify that the node has erased the stored last sent PrePrepare key assert LAST_SENT_PRE_PREPARE not in node.nodeStatusDB # Verify correspondingly that after the pool restart the replica # (which must again be the primary in its instance) has not restored # lastPrePrepareSeqNo, not adjusted last_ordered_3pc and not shifted # the watermarks assert node.viewNo == 0 assert replica.isPrimary assert replica.lastPrePrepareSeqNo == 0 assert replica.last_ordered_3pc == (0, 0) assert replica.h == 0 assert replica.H == 0 + LOG_SIZE # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (0, 1)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_primary_selection_after_demoted_node_promotion( looper, txnPoolNodeSet, sdk_node_theta_added, sdk_pool_handle, tconf, tdir, allPluginsPath): """ Demote non-primary node Promote it again Restart one node to get the following difference with others: - not restarted - node registry and related pool parameters are kept in memory in some state which is expected as the same as in the pool ledger - restarted one - loaded node registry and pool parameters from the pool ledger at startup Do several view changes and check that all nodes will choose previously demoted / promoted node as a primary for some instanse """ new_steward_wallet, new_node = sdk_node_theta_added # viewNo0 = checkViewNoForNodes(txnPoolNodeSet) check_all_nodes_the_same_pool_list(txnPoolNodeSet) logger.info("1. Demote node Theta") node_dest = hexToFriendly(new_node.nodestack.verhex) sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_dest, new_node.name, None, None, None, None, []) remainingNodes = list(set(txnPoolNodeSet) - {new_node}) check_all_nodes_the_same_pool_list(remainingNodes) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet, 3) # TODO view change might happen unexpectedly by unknown reason # checkViewNoForNodes(remainingNodes, expectedViewNo=viewNo0) logger.info("2. Promote node Theta back") sdk_send_update_node(looper, new_steward_wallet, sdk_pool_handle, node_dest, new_node.name, None, None, None, None, [VALIDATOR]) check_all_nodes_the_same_pool_list(txnPoolNodeSet) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet, 3) # checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=viewNo0) logger.info("3. Restart one node") stopped_node = txnPoolNodeSet[0] disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, stopped_node, stopNode=True) looper.removeProdable(stopped_node) remainingNodes = list(set(txnPoolNodeSet) - {stopped_node}) ensureElectionsDone(looper, remainingNodes) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet, 3) # checkViewNoForNodes(remainingNodes, expectedViewNo=viewNo0) # start node restartedNode = start_stopped_node(stopped_node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet = remainingNodes + [restartedNode] ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet, 3) # checkViewNoForNodes(txnPoolNodeSet, expectedViewNo=viewNo0) logger.info("4. Do view changes to check that nodeTheta will be chosen " "as a primary for some instance by all nodes after some rounds") while txnPoolNodeSet[0].viewNo < 4: ensure_view_change_complete(looper, txnPoolNodeSet) # ensure pool is working properly sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, new_steward_wallet, 3)
def test_backup_primary_restores_pp_seq_no_if_view_is_same( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, tconf, tdir, allPluginsPath, chkFreqPatched, view_no): # Get a node with a backup primary replica replica = getPrimaryReplica(txnPoolNodeSet, instId=backup_inst_id) node = replica.node # Send some 3PC-batches and wait until the replica orders the 3PC-batches sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=7, num_batches=7, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (view_no, 7)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount))) # Check view no of the node and lastPrePrepareSeqNo of the replica assert node.viewNo == view_no assert replica.lastPrePrepareSeqNo == 7 # Ensure that the node has stored the last sent PrePrepare key assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB last_sent_pre_prepare_key = \ PrePrepareKey(**node_status_db_serializer.deserialize( node.nodeStatusDB.get(LAST_SENT_PRE_PREPARE))) assert last_sent_pre_prepare_key == PrePrepareKey(inst_id=backup_inst_id, view_no=view_no, pp_seq_no=7) # Restart the node containing the replica disconnect_node_and_ensure_disconnected(looper, txnPoolNodeSet, node.name, stopNode=True) looper.removeProdable(node) txnPoolNodeSet.remove(node) node = start_stopped_node(node, looper, tconf, tdir, allPluginsPath) txnPoolNodeSet.append(node) looper.run(checkNodesConnected(txnPoolNodeSet)) ensureElectionsDone(looper, txnPoolNodeSet) replica = node.replicas[backup_inst_id] # Verify that after the successful propagate primary procedure the replica # (which must still be the primary in its instance) has restored # lastPrePrepareSeqNo and adjusted last_ordered_3pc and shifted # the watermarks correspondingly assert node.viewNo == view_no assert replica.isPrimary assert replica.lastPrePrepareSeqNo == 7 assert replica.last_ordered_3pc == (view_no, 7) assert replica.h == 7 assert replica.H == 7 + LOG_SIZE # Verify also that the stored last sent PrePrepare key has not been erased assert LAST_SENT_PRE_PREPARE in node.nodeStatusDB # Send a 3PC-batch and ensure that the replica orders it sdk_send_batches_of_random(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, num_reqs=1, num_batches=1, timeout=tconf.Max3PCBatchWait) looper.run( eventually(lambda: assertExp(replica.last_ordered_3pc == (view_no, 8)), retryWait=1, timeout=waits.expectedTransactionExecutionTime(nodeCount)))
def test_view_change_after_back_to_quorum_with_disconnected_primary(txnPoolNodeSet, looper, sdk_pool_handle, sdk_wallet_client, tdir, tconf, allPluginsPath): assert len(txnPoolNodeSet) == 4 pr_node = get_master_primary_node(txnPoolNodeSet) assert pr_node.name == "Alpha" # 1. Initiate view change be primary (Alpha) restart nodes = ensure_view_change_by_primary_restart(looper, txnPoolNodeSet, tconf, tdir, allPluginsPath, customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) # Now primary should be Beta pr_node = get_master_primary_node(nodes) assert pr_node.name == "Beta" # 2. Stop non-primary node Delta, no any view changes are expected non_primary_to_stop = [n for n in nodes if n.name == "Delta"][0] disconnect_node_and_ensure_disconnected( looper, txnPoolNodeSet, non_primary_to_stop) looper.removeProdable(non_primary_to_stop) remaining_nodes = list(set(nodes) - {non_primary_to_stop}) # Primary is going to be stopped, remember instance change messages count # to ensure that no view change happened as number of connected nodes is less # than quorum. ic_cnt = {} for n in remaining_nodes: ic_cnt[n.name] = n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__) # 3. Disconnect primary disconnect_node_and_ensure_disconnected( looper, remaining_nodes, pr_node) looper.removeProdable(pr_node) # Wait for more than ToleratePrimaryDisconnection timeout and check that no IC messages presented. looper.runFor(tconf.ToleratePrimaryDisconnection + 5) remaining_nodes = list(set(remaining_nodes) - {pr_node}) for n in remaining_nodes: assert ic_cnt[n.name] == n.view_changer.spylog.count(ViewChanger.sendInstanceChange.__name__) view_no = checkViewNoForNodes(remaining_nodes) # 4. Start Delta (non-primary), now primary (Beta) is disconnected but there is a quorum # to choose a new one. restartedNode = start_stopped_node(non_primary_to_stop, looper, tconf, tdir, allPluginsPath, delay_instance_change_msgs=False) remaining_nodes = remaining_nodes + [restartedNode] # 5. Check that view change happened. waitForViewChange(looper, remaining_nodes, expectedViewNo=(view_no + 1), customTimeout=2 * tconf.VIEW_CHANGE_TIMEOUT) # ensure pool is working properly sdk_send_random_and_check(looper, remaining_nodes, sdk_pool_handle, sdk_wallet_client, 3) ensure_all_nodes_have_same_data(looper, nodes=remaining_nodes)
def test_quorum_after_f_plus_2_nodes_including_primary_turned_off_and_later_on( looper, allPluginsPath, tdir, tconf, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): timeout = sdk_eval_timeout(1, len(txnPoolNodeSet)) nodes = txnPoolNodeSet sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[0], looper, nodes) waitForViewChange(looper, nodes[1:], expectedViewNo=1) ensureElectionsDone(looper, nodes[1:], instances_list=range(getRequiredInstances(nodeCount))) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) stop_node(nodes[1], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[2:], expectedViewNo=1) sdk_reqs3 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs3, timeout=timeout) sdk_check_reply(req_res[0]) stop_node(nodes[2], looper, nodes) looper.runFor(tconf.ToleratePrimaryDisconnection + waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) sdk_reqs4 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs4, timeout=timeout) sdk_check_reply(req_res[0]) nodes[2] = start_stopped_node(nodes[2], looper, tconf, tdir, allPluginsPath) looper.runFor(waits.expectedPoolElectionTimeout(len(nodes))) checkViewNoForNodes(nodes[3:], expectedViewNo=1) sdk_reqs5 = sdk_send_random_requests(looper, sdk_pool_handle, sdk_wallet_client, 1) with pytest.raises(PoolLedgerTimeoutException): req_res = sdk_get_replies(looper, sdk_reqs5, timeout=timeout) sdk_check_reply(req_res[0]) nodes[1] = start_stopped_node(nodes[1], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes[1:], instances_list=range(getRequiredInstances(nodeCount)), customTimeout=60) checkViewNoForNodes(nodes[1:], expectedViewNo=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1) nodes[0] = start_stopped_node(nodes[0], looper, tconf, tdir, allPluginsPath) ensureElectionsDone(looper, nodes, instances_list=range(getRequiredInstances(nodeCount)), customTimeout=60) checkViewNoForNodes(nodes, expectedViewNo=1) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 1)