def ensure_jns_have_new_txn(nodes, last_txn_id): """ :param nodes: List of Journalnodes :param last_txn_id: Integer of last transaction id :return: Return true on success, false otherwise """ import params num_of_jns = len(nodes) actual_txn_ids = {} jns_updated = 0 if params.journalnode_address is None: raise Fail("Could not retrieve Journal node address") if params.journalnode_port is None: raise Fail("Could not retrieve Journalnode port") time_out_secs = 3 * 60 step_time_secs = 10 iterations = int(time_out_secs / step_time_secs) protocol = "https" if params.https_only else "http" Logger.info("Checking if all Journalnodes are updated.") for i in range(iterations): Logger.info('Try %d out of %d' % (i + 1, iterations)) for node in nodes: # if all JNS are updated break if jns_updated == num_of_jns: Logger.info("All journal nodes are updated") return True # JN already meets condition, skip it if node in actual_txn_ids and actual_txn_ids[ node] and actual_txn_ids[node] >= last_txn_id: continue url = '%s://%s:%s' % (protocol, node, params.journalnode_port) data = utils.get_jmx_data(url, 'Journal-', 'LastWrittenTxId', params.https_only, params.security_enabled) if data: actual_txn_ids[node] = int(data) if actual_txn_ids[node] >= last_txn_id: Logger.info( "Journalnode %s has a higher transaction id: %s" % (node, str(data))) jns_updated += 1 else: Logger.info( "Journalnode %s is still on transaction id: %s" % (node, str(data))) Logger.info("Sleeping for %d secs" % step_time_secs) time.sleep(step_time_secs) return jns_updated == num_of_jns
def ensure_jns_have_new_txn(nodes, last_txn_id): """ :param nodes: List of Journalnodes :param last_txn_id: Integer of last transaction id :return: Return true on success, false otherwise """ import params num_of_jns = len(nodes) actual_txn_ids = {} jns_updated = 0 if params.journalnode_address is None: raise Fail("Could not retrieve Journal node address") if params.journalnode_port is None: raise Fail("Could not retrieve Journalnode port") time_out_secs = 3 * 60 step_time_secs = 10 iterations = int(time_out_secs/step_time_secs) protocol = "https" if params.https_only else "http" Logger.info("Checking if all Journalnodes are updated.") for i in range(iterations): Logger.info('Try %d out of %d' % (i+1, iterations)) for node in nodes: # if all JNS are updated break if jns_updated == num_of_jns: Logger.info("All journal nodes are updated") return True # JN already meets condition, skip it if node in actual_txn_ids and actual_txn_ids[node] and actual_txn_ids[node] >= last_txn_id: continue url = '%s://%s:%s' % (protocol, node, params.journalnode_port) data = get_jmx_data(url, 'Journal-', 'LastWrittenTxId') if data: actual_txn_ids[node] = int(data) if actual_txn_ids[node] >= last_txn_id: Logger.info("Journalnode %s has a higher transaction id: %s" % (node, str(data))) jns_updated += 1 else: Logger.info("Journalnode %s is still on transaction id: %s" % (node, str(data))) Logger.info("Sleeping for %d secs" % step_time_secs) time.sleep(step_time_secs) return jns_updated == num_of_jns
all_journal_node_hosts = default("/clusterHostInfo/journalnode_hosts", []) if len(all_journal_node_hosts) < 3: raise Fail("Need at least 3 Journalnodes to maintain a quorum") try: namenode_ha = namenode_ha_state.NamenodeHAState() except ValueError, err: raise Fail("Could not retrieve Namenode HA addresses. Error: " + str( err)) Logger.info(str(namenode_ha)) nn_address = namenode_ha.get_address(NAMENODE_STATE.ACTIVE) nn_data = utils.get_jmx_data( nn_address, 'org.apache.hadoop.hdfs.server.namenode.FSNamesystem', 'JournalTransactionInfo', namenode_ha.is_encrypted(), params.security_enabled) if not nn_data: raise Fail("Could not retrieve JournalTransactionInfo from JMX") try: last_txn_id = int(nn_data['LastAppliedOrWrittenTxId']) success = ensure_jns_have_new_txn(all_journal_node_hosts, last_txn_id) if not success: raise Fail( "Could not ensure that all Journal nodes have a new log transaction id") except KeyError: raise Fail( "JournalTransactionInfo does not have key LastAppliedOrWrittenTxId from JMX info")
all_journal_node_hosts = default("/clusterHostInfo/journalnode_hosts", []) if len(all_journal_node_hosts) < 3: raise Fail("Need at least 3 Journalnodes to maintain a quorum") try: namenode_ha = NamenodeHAState() except ValueError, err: raise Fail("Could not retrieve Namenode HA addresses. Error: " + str(err)) Logger.info(str(namenode_ha)) nn_address = namenode_ha.get_address(NAMENODE_STATE.ACTIVE) nn_data = get_jmx_data( nn_address, 'org.apache.hadoop.hdfs.server.namenode.FSNamesystem', 'JournalTransactionInfo', namenode_ha.is_encrypted()) if not nn_data: raise Fail("Could not retrieve JournalTransactionInfo from JMX") try: last_txn_id = int(nn_data['LastAppliedOrWrittenTxId']) success = ensure_jns_have_new_txn(all_journal_node_hosts, last_txn_id) if not success: raise Fail( "Could not ensure that all Journal nodes have a new log transaction id" ) except KeyError: raise Fail( "JournalTransactionInfo does not have key LastAppliedOrWrittenTxId from JMX info"
time.sleep(5) all_journal_node_hosts = default("/clusterHostInfo/journalnode_hosts", []) if len(all_journal_node_hosts) < 3: raise Fail("Need at least 3 Journalnodes to maintain a quorum") try: namenode_ha = NamenodeHAState() except ValueError, err: raise Fail("Could not retrieve Namenode HA addresses. Error: " + str(err)) Logger.info(str(namenode_ha)) nn_address = namenode_ha.get_address(NAMENODE_STATE.ACTIVE) nn_data = get_jmx_data(nn_address, 'org.apache.hadoop.hdfs.server.namenode.FSNamesystem', 'JournalTransactionInfo', namenode_ha.is_encrypted()) if not nn_data: raise Fail("Could not retrieve JournalTransactionInfo from JMX") try: last_txn_id = int(nn_data['LastAppliedOrWrittenTxId']) success = ensure_jns_have_new_txn(all_journal_node_hosts, last_txn_id) if not success: raise Fail("Could not ensure that all Journal nodes have a new log transaction id") except KeyError: raise Fail("JournalTransactionInfo does not have key LastAppliedOrWrittenTxId from JMX info") def hdfs_roll_edits(): """
time.sleep(5) all_journal_node_hosts = default("/clusterHostInfo/journalnode_hosts", []) if len(all_journal_node_hosts) < 3: raise Fail("Need at least 3 Journalnodes to maintain a quorum") try: namenode_ha = namenode_ha_state.NamenodeHAState() except ValueError, err: raise Fail("Could not retrieve Namenode HA addresses. Error: " + str(err)) Logger.info(str(namenode_ha)) nn_address = namenode_ha.get_address(NAMENODE_STATE.ACTIVE) nn_data = utils.get_jmx_data(nn_address, 'org.apache.hadoop.hdfs.server.namenode.FSNamesystem', 'JournalTransactionInfo', namenode_ha.is_encrypted(), params.security_enabled) if not nn_data: raise Fail("Could not retrieve JournalTransactionInfo from JMX") try: last_txn_id = int(nn_data['LastAppliedOrWrittenTxId']) success = ensure_jns_have_new_txn(all_journal_node_hosts, last_txn_id) if not success: raise Fail("Could not ensure that all Journal nodes have a new log transaction id") except KeyError: raise Fail("JournalTransactionInfo does not have key LastAppliedOrWrittenTxId from JMX info") def hdfs_roll_edits(): """