def checkNodeDataForEquality(node: TestNode, *otherNodes: TestNode, exclude_from_check=None): def chk_ledger_and_state(first_node, second_node, ledger_id): checkLedgerEquality(first_node.getLedger(ledger_id), second_node.getLedger(ledger_id)) checkStateEquality(first_node.getState(ledger_id), second_node.getState(ledger_id)) # Checks for node's ledgers and state's to be equal for n in otherNodes: if exclude_from_check and 'check_last_ordered_3pc' not in exclude_from_check: check_last_ordered_3pc(node, n) else: logger.debug("Excluding check_last_ordered_3pc check") if exclude_from_check and 'check_seqno_db' not in exclude_from_check: check_seqno_db_equality(node.seqNoDB, n.seqNoDB) else: logger.debug("Excluding check_seqno_db_equality check") chk_ledger_and_state(node, n, DOMAIN_LEDGER_ID) chk_ledger_and_state(node, n, CONFIG_LEDGER_ID) if n.poolLedger: chk_ledger_and_state(node, n, POOL_LEDGER_ID)
def checkNodeDataForEquality(node: TestNode, *otherNodes: TestNode, exclude_from_check=None): def chk_ledger_and_state(first_node, second_node, ledger_id): checkLedgerEquality(first_node.getLedger(ledger_id), second_node.getLedger(ledger_id)) if not exclude_from_check or 'check_state' not in exclude_from_check: checkStateEquality(first_node.getState(ledger_id), second_node.getState(ledger_id)) # Checks for node's ledgers and state's to be equal check_audit_ledger = not exclude_from_check or ('check_audit' not in exclude_from_check) for n in otherNodes: if exclude_from_check and 'check_last_ordered_3pc' not in exclude_from_check: check_last_ordered_3pc(node, n) else: logger.debug("Excluding check_last_ordered_3pc check") if exclude_from_check and 'check_seqno_db' not in exclude_from_check: check_seqno_db_equality(node.seqNoDB, n.seqNoDB) else: logger.debug("Excluding check_seqno_db_equality check") for ledger_id in n.ledgerManager.ledgerRegistry: if not check_audit_ledger and ledger_id == AUDIT_LEDGER_ID: continue chk_ledger_and_state(node, n, ledger_id)
def checkNodeDataForEquality(node: TestNode, *otherNodes: Iterable[TestNode]): # Checks for node's ledgers and state's to be equal for n in otherNodes: check_last_ordered_3pc(node, n) check_seqno_db_equality(node.seqNoDB, n.seqNoDB) checkLedgerEquality(node.domainLedger, n.domainLedger) checkStateEquality(node.getState(DOMAIN_LEDGER_ID), n.getState(DOMAIN_LEDGER_ID)) if n.poolLedger: checkLedgerEquality(node.poolLedger, n.poolLedger) checkStateEquality(node.getState(POOL_LEDGER_ID), n.getState(POOL_LEDGER_ID))
def checkNodeDataForEquality(node: TestNode, *otherNodes: Iterable[TestNode], exclude_from_check=None): # Checks for node's ledgers and state's to be equal for n in otherNodes: if exclude_from_check != 'check_last_ordered_3pc': check_last_ordered_3pc(node, n) else: logger.debug("Excluding check_last_ordered_3pc check") check_seqno_db_equality(node.seqNoDB, n.seqNoDB) checkLedgerEquality(node.domainLedger, n.domainLedger) checkStateEquality(node.getState(DOMAIN_LEDGER_ID), n.getState(DOMAIN_LEDGER_ID)) if n.poolLedger: checkLedgerEquality(node.poolLedger, n.poolLedger) checkStateEquality(node.getState(POOL_LEDGER_ID), n.getState(POOL_LEDGER_ID))
def checkNodeDataForEquality(node: TestNode, *otherNodes: TestNode, exclude_from_check=None): def chk_ledger_and_state(first_node, second_node, ledger_id): checkLedgerEquality(first_node.getLedger(ledger_id), second_node.getLedger(ledger_id)) if not exclude_from_check or 'check_state' not in exclude_from_check: checkStateEquality(first_node.getState(ledger_id), second_node.getState(ledger_id)) # Checks for node's ledgers and state's to be equal for n in otherNodes: if exclude_from_check and 'check_last_ordered_3pc' not in exclude_from_check: check_last_ordered_3pc(node, n) else: logger.debug("Excluding check_last_ordered_3pc check") if exclude_from_check and 'check_seqno_db' not in exclude_from_check: check_seqno_db_equality(node.seqNoDB, n.seqNoDB) else: logger.debug("Excluding check_seqno_db_equality check") for ledger_id in n.ledgerManager.ledgerRegistry: chk_ledger_and_state(node, n, ledger_id)
def check_nodes_last_ordered_3pc(nodes, last_ordered_3pc): for n1, n2 in combinations(nodes, 2): lst_3pc = check_last_ordered_3pc(n1, n2) assert lst_3pc == last_ordered_3pc
def check_nodes_last_ordered_3pc(nodes, last_ordered_3pc): for n1, n2 in combinations(nodes, 2): lst_3pc = check_last_ordered_3pc(n1, n2) assert lst_3pc == last_ordered_3pc
def chk(): for node in txnPoolNodeSet[:-1]: check_last_ordered_3pc(new_node, node)
def test_slow_node_reverts_unordered_state_during_catchup( looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Delay COMMITs to a node such that when it needs to catchup, it needs to revert some unordered state. Also till this time the node should have receive all COMMITs such that it will apply some of the COMMITs ( for which it has not received txns from catchup). For this delay COMMITs by long, do catchup for a little older than the state received in LedgerStatus, once catchup completes, reset delays and try to process delayed COMMITs, some COMMITs will be rejected but some will be processed since catchup was done for older ledger. """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize) nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) slow_node = nprs[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_master_replica = slow_node.master_replica commit_delay = 150 catchup_rep_delay = 25 # Delay COMMITs to one node slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0)) # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the # node catch-ups 2 times. # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all # missing txns, so no stashed ones are applied slow_node.nodeIbStasher.delay(lsDelay(1000)) # Make the slow node receive txns for a smaller ledger so it still finds # the need to catchup delay_batches = 2 make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID, delay_batches * Max3PCBatchSize) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, other_nodes) waitNodeDataInequality(looper, slow_node, *other_nodes) old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp) # `slow_node` is slow to receive CatchupRep, so that it # gets a chance to order COMMITs slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay)) old_last_ordered = txnPoolNodeSet[0].master_replica.last_ordered_3pc # start view change (and hence catchup) ensure_view_change(looper, txnPoolNodeSet) # Check last ordered of `other_nodes` is same for n1, n2 in combinations(other_nodes, 2): check_last_ordered_3pc(n1, n2) assert slow_master_replica.last_prepared_before_view_change == old_last_ordered old_pc_count = slow_master_replica._ordering_service.spylog.count( slow_master_replica._ordering_service._validate) assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0 # Repair the network so COMMITs are received, processed and stashed slow_node.reset_delays_and_process_delayeds(COMMIT) def chk2(): # COMMITs are processed for prepared messages assert slow_master_replica._ordering_service.spylog.count( slow_master_replica._ordering_service._validate) > old_pc_count looper.run(eventually(chk2, retryWait=1, timeout=5)) def chk3(): # (delay_batches * Max3PCBatchSize * commits_count_in_phase) COMMITs are stashed assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == \ delay_batches * Max3PCBatchSize * (len(txnPoolNodeSet) - 1) looper.run(eventually(chk3, retryWait=1, timeout=15)) # fix catchup, so the node gets a chance to be caught-up repair_node_catchup_less(other_nodes) def chk4(): # Some COMMITs were received but stashed and # they will processed after catchup assert slow_node.master_replica.stasher.stash_size(STASH_CATCH_UP) == 0 looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 50)) def chk5(): # Catchup was done once assert slow_node.spylog.count( slow_node.allLedgersCaughtUp) > old_lcu_count looper.run( eventually(chk5, retryWait=1, timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))) # make sure that the pool is functional checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def test_slow_node_reverts_unordered_state_during_catchup( looper, txnPoolNodeSet, client1, wallet1, client1Connected): """ Delay COMMITs to a node such that when it needs to catchup, it needs to revert some unordered state. Also till this time the node should have receive all COMMITs such that it will apply some of the COMMITs ( for which it has not received txns from catchup). For this delay COMMITs by long, do catchup for a little older than the state received in LedgerStatus, once catchup completes, reset delays and try to process delayed COMMITs, some COMMITs will be rejected but some will be processed since catchup was done for older ledger. """ sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 3 * Max3PCBatchSize) nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) slow_node = nprs[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_master_replica = slow_node.master_replica commit_delay = 150 catchup_rep_delay = 15 # Delay COMMITs to one node slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0)) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 6 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, other_nodes) waitNodeDataInequality(looper, slow_node, *other_nodes) # Make the slow node receive txns for a smaller ledger so it still finds # the need to catchup delay_batches = 2 make_a_node_catchup_twice(slow_node, other_nodes, DOMAIN_LEDGER_ID, delay_batches * Max3PCBatchSize) def is_catchup_needed_count(): return len( getAllReturnVals(slow_node, slow_node.is_catchup_needed, compare_val_to=True)) old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp) old_cn_count = is_catchup_needed_count() # `slow_node` is slow to receive CatchupRep, so that it # gets a chance to order COMMITs slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay)) ensure_view_change(looper, txnPoolNodeSet) # Check last ordered of `other_nodes` is same for n1, n2 in combinations(other_nodes, 2): lst_3pc = check_last_ordered_3pc(n1, n2) def chk1(): # `slow_node` has prepared all 3PC messages which # `other_nodes` have ordered assertEquality(slow_master_replica.last_prepared_before_view_change, lst_3pc) looper.run(eventually(chk1, retryWait=1)) old_pc_count = slow_master_replica.spylog.count( slow_master_replica.can_process_since_view_change_in_progress) # Repair the network so COMMITs are received and processed slow_node.reset_delays_and_process_delayeds(COMMIT) def chk2(): # COMMITs are processed for prepared messages assert slow_master_replica.spylog.count( slow_master_replica.can_process_since_view_change_in_progress ) > old_pc_count looper.run(eventually(chk2, retryWait=1, timeout=5)) def chk3(): # Some COMMITs were ordered but stashed and they were processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert delay_batches in rv looper.run(eventually(chk3, retryWait=1, timeout=catchup_rep_delay + 5)) def chk4(): # Catchup was done once assert slow_node.spylog.count( slow_node.allLedgersCaughtUp) > old_lcu_count looper.run( eventually(chk4, retryWait=1, timeout=waits.expectedPoolCatchupTime(len(txnPoolNodeSet)))) def chk5(): # Once catchup was done, need of other catchup was not found assertEquality(is_catchup_needed_count(), old_cn_count) looper.run(eventually(chk5, retryWait=1, timeout=5)) checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sendReqsToNodesAndVerifySuffReplies(looper, wallet1, client1, 2 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)
def chk(): for node in txnPoolNodeSet[:-1]: check_last_ordered_3pc(new_node, node)
def test_slow_node_reverts_unordered_state_during_catchup(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client): """ Delay COMMITs to a node such that when it needs to catchup, it needs to revert some unordered state. Also till this time the node should have receive all COMMITs such that it will apply some of the COMMITs ( for which it has not received txns from catchup). For this delay COMMITs by long, do catchup for a little older than the state received in LedgerStatus, once catchup completes, reset delays and try to process delayed COMMITs, some COMMITs will be rejected but some will be processed since catchup was done for older ledger. """ sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 3 * Max3PCBatchSize) nprs = getNonPrimaryReplicas(txnPoolNodeSet, 0) slow_node = nprs[-1].node other_nodes = [n for n in txnPoolNodeSet if n != slow_node] slow_master_replica = slow_node.master_replica commit_delay = 150 catchup_rep_delay = 25 # Delay COMMITs to one node slow_node.nodeIbStasher.delay(cDelay(commit_delay, 0)) # Delay LEDGER_STAUS on slow node, so that only MESSAGE_REQUEST(LEDGER_STATUS) is sent, and the # node catch-ups 2 times. # Otherwise other nodes may receive multiple LEDGER_STATUSes from slow node, and return Consistency proof for all # missing txns, so no stashed ones are applied slow_node.nodeIbStasher.delay(lsDelay(1000)) # Make the slow node receive txns for a smaller ledger so it still finds # the need to catchup delay_batches = 2 make_a_node_catchup_less(slow_node, other_nodes, DOMAIN_LEDGER_ID, delay_batches * Max3PCBatchSize) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 6 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, other_nodes) waitNodeDataInequality(looper, slow_node, *other_nodes) old_lcu_count = slow_node.spylog.count(slow_node.allLedgersCaughtUp) # `slow_node` is slow to receive CatchupRep, so that it # gets a chance to order COMMITs slow_node.nodeIbStasher.delay(cr_delay(catchup_rep_delay)) # start view change (and hence catchup) ensure_view_change(looper, txnPoolNodeSet) # Check last ordered of `other_nodes` is same for n1, n2 in combinations(other_nodes, 2): lst_3pc = check_last_ordered_3pc(n1, n2) def chk1(): # `slow_node` has prepared all 3PC messages which # `other_nodes` have ordered assertEquality(slow_master_replica.last_prepared_before_view_change, lst_3pc) looper.run(eventually(chk1, retryWait=1)) old_pc_count = slow_master_replica.spylog.count( slow_master_replica.can_process_since_view_change_in_progress) assert len(slow_node.stashedOrderedReqs) == 0 # Repair the network so COMMITs are received, processed and stashed slow_node.reset_delays_and_process_delayeds(COMMIT) def chk2(): # COMMITs are processed for prepared messages assert slow_master_replica.spylog.count( slow_master_replica.can_process_since_view_change_in_progress) > old_pc_count looper.run(eventually(chk2, retryWait=1, timeout=5)) def chk3(): # COMMITs are stashed assert len(slow_node.stashedOrderedReqs) == delay_batches * Max3PCBatchSize looper.run(eventually(chk3, retryWait=1, timeout=15)) # fix catchup, so the node gets a chance to be caught-up repair_node_catchup_less(other_nodes) def chk4(): # Some COMMITs were ordered but stashed and they were processed rv = getAllReturnVals(slow_node, slow_node.processStashedOrderedReqs) assert delay_batches in rv looper.run(eventually(chk4, retryWait=1, timeout=catchup_rep_delay + 5)) def chk5(): # Catchup was done once assert slow_node.spylog.count( slow_node.allLedgersCaughtUp) > old_lcu_count looper.run( eventually( chk5, retryWait=1, timeout=waits.expectedPoolCatchupTime( len(txnPoolNodeSet)))) # make sure that the pool is functional checkProtocolInstanceSetup(looper, txnPoolNodeSet, retryWait=1) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet) sdk_send_random_and_check(looper, txnPoolNodeSet, sdk_pool_handle, sdk_wallet_client, 2 * Max3PCBatchSize) ensure_all_nodes_have_same_data(looper, nodes=txnPoolNodeSet)