def ru_downgrade_state(cls): ''' Downgrades Namenode A downgrade is done - may need to convert state to previous version or state is compatible - again upgrade is being abandoned NOTE: this command will not return until namenode shuts down ''' command = "sudo su - -c 'hadoop namenode -rollingUpgrade downgrade' hdfs" if HDFS.isHAEnabled(): nodes = [] nodes.append(HDFS.getNamenodeByState('standby')) nodes.append(HDFS.getNamenodeByState('active')) for node in nodes: HDFS.resetNamenode('stop', host=node) (exitcode, stdout) = Machine.runas(Machine.getAdminUser(), command, node, None, None, "True", Machine.getAdminPasswd()) ruAssert( "HDFS", exitcode == 0, "[NNDowngrade] hadoop namenode -rollingUpgrade downgrade command failed" ) return HDFS.stopNamenode() node = HDFS.getNamenode() (exitcode, stdout) = Machine.runas(Machine.getAdminUser(), command, node, None, None, "True", Machine.getAdminPasswd()) ruAssert( "HDFS", exitcode == 0, "[NNDowngrade] hadoop namenode -rollingUpgrade downgrade command failed" )
def ru_rollback_state(cls): ''' Saved state is rolled back - upgrade is abandonded NOTE: this command will not return until namenode shuts down :return: ''' logger.info("[INFO][HDFS][Upgrade] HA Journal Node Upgrade Started ") hdfs_user = Config.get('hadoop', 'HDFS_USER') nodes = [] nodes.append(HDFS.getNamenodeByState('standby')) nodes.append(HDFS.getNamenodeByState('active')) logger.info("[INFO][HDFS][Upgrade] HA Namenode Upgrade Started") for node in nodes: HDFS.resetZkfc('stop', hosts=node.split()) HDFS.resetNamenode('stop', host=node) HDFS.resetNamenode('start', config=None, host=nodes[0], option=" -rollingUpgrade rollback") HDFS.resetZkfc('start', hosts=nodes[0].split()) # lets make sure the NN is out of safemode before we proceed to the next namenode HDFS.waitForNNOutOfSafemode(options='-fs hdfs://%s:8020' % nodes[0]) command = "sudo su - -c 'hadoop namenode -bootstrapStandby -force' hdfs" (exitcode, stdout) = Machine.runas(Machine.getAdminUser(), command, nodes[1], None, None, "True", Machine.getAdminPasswd()) ruAssert("HDFS", exitcode == 0, "hadoop namenode -bootstrapStandby -force") HDFS.resetNamenode('start', config=None, host=nodes[1], option="") HDFS.resetZkfc('start', hosts=nodes[1].split()) # lets make sure the NN is out of safemode before we proceed to the next namenode HDFS.waitForNNOutOfSafemode(options='-fs hdfs://%s:8020' % nodes[1])
def updateJobProperties(cls, propFile, properties=None, haEnabled=False, debug=False): fileSystemName = Hadoop.getFSDefaultValue() jobTrackerIP = MAPRED.getJobtrackerAddress() jobTracker = jobTrackerIP[0] + ":" + jobTrackerIP[1] if not properties: properties = {} if not properties.has_key('nameNode'): properties['nameNode'] = fileSystemName if not properties.has_key('jobTracker'): properties['jobTracker'] = jobTracker if "hcatalog" in propFile: if Hadoop.isSecure(): kerberosPrincipal = Hive.getConfigValue( "hive.metastore.kerberos.principal") properties[ 'hive.metastore.kerberos.principal'] = kerberosPrincipal logger.info("Updating for hcatalog workflow") hcatNode = Hive.getConfigValue("hive.metastore.uris").replace( 'thrift', 'hcat') logger.info("Hcat node is " + hcatNode) properties['hcatNode'] = hcatNode if Hadoop.isSecure(): # determine the namenode and the jobtracker principal nnPrincipal = None if haEnabled: nnPrincipal = HDFS.getNameNodePrincipal().replace( '_HOST', HDFS.getNamenodeByState('active')) else: nnPrincipal = HDFS.getNameNodePrincipal().replace( '_HOST', HDFS.getNamenodeHttpAddress()[0]) jtPrincipal = MAPRED.getMasterPrincipal().replace( '_HOST', jobTrackerIP[0]) properties['dfs.namenode.kerberos.principal'] = nnPrincipal properties['mapreduce.jobtracker.kerberos.principal'] = jtPrincipal wfPath = util.getPropertyValueFromFile(propFile, "oozie.wf.application.path") if wfPath != None and wfPath.find("hdfs://localhost:9000") != -1: wfPath = wfPath.replace("hdfs://localhost:9000", fileSystemName) logger.info("Value of replaced oozie.wf.application.path is " + wfPath) properties['oozie.wf.application.path'] = wfPath util.writePropertiesToFile(propFile, propFile, properties) if debug: logger.info('Content of properties file %s' % propFile) f = open(propFile, 'r') # print the file to the console logger.info(f.read()) f.close()
def ensure_nn_is_active(cls, timeout=11 * 60): from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode active_nn = None curr_time = int(time.time()) end_time = int(time.time()) + timeout while not active_nn and curr_time <= end_time: active_nn = HDFS.getNamenodeByState('active') # we we found active NN return it if active_nn: UpgradePerNode.reportProgress( "[INFO][HDFS] Active Namenode is %s" % active_nn) return # wait for 30s time.sleep(30) curr_time = int(time.time()) if not active_nn: UpgradePerNode.reportProgress("[FAILED][HDFS] No Active Namenode") assert active_nn return
def downgrade_master(cls, version, config=None): ''' Downgrade HDFS Master services :param version: Version to be downgraded to :param config: Configuration location ''' from beaver.component.rollingupgrade.ruCommon import hdpSelect from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode if HDFS.isHAEnabled(): UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] HA Namenode Downgrade Started ") nodes = [] nodes.append(HDFS.getNamenodeByState('standby')) nodes.append(HDFS.getNamenodeByState('active')) for node in nodes: HDFS.resetNamenode('stop', host=node) HDFS.resetZkfc('stop', hosts=node.split()) # BUG-25534: There is no package for zkfc. So just updating the NN is enough. hdpSelect.changeVersion("hadoop-hdfs-namenode", version, node) HDFS.resetNamenode('start', config=config, host=node) HDFS.resetZkfc('start', hosts=node.split()) # lets make sure the NN is out of safemode before we proceed to the next namenode HDFS.waitForNNOutOfSafemode(options='-fs hdfs://%s:8020' % node) UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] HA Namenode Downgrade Finished ") jn_nodes = HDFS.getJournalNodes() if len(jn_nodes) < 3: UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] Less than three Journal Nodes. Not going to do Downgrade " ) return #Loop through all the JNs and stop flip start one at a time hdfs_user = Config.get('hadoop', 'HDFS_USER') UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] HA Journal Node Upgrade Started") cls.ensure_all_jns_are_up(jn_nodes) #Loop through all the JNs and stop flip start one at a time for node in jn_nodes: Hadoop.resetService(hdfs_user, node, "journalnode", 'stop', binFolder="sbin") hdpSelect.changeVersion("hadoop-hdfs-journalnode", version, node) Hadoop.resetService(hdfs_user, node, "journalnode", 'start', binFolder="sbin") time.sleep(5) cls.ensure_all_jns_are_up(jn_nodes) cls.ensure_nn_is_active() UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] HA Journal Node Downgrade Finished ") else: ## TODO add code to upgrade SNN UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] Namenode Downgrade Started ") node = HDFS.getNamenode() HDFS.stopNamenode() hdpSelect.changeVersion("hadoop-hdfs-namenode", version, node) HDFS.startNamenode(config=config) UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] Namenode Downgrade Finished ") # upgrade SNN UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade]Secondary Namenode Downgrade Started ") node = HDFS.getSecondaryNamenode() HDFS.stopSecondaryNamenode() hdpSelect.changeVersion("hadoop-hdfs-secondarynamenode", version, node) HDFS.startSecondaryNamenode(config=config) UpgradePerNode.reportProgress( "[INFO][HDFS][Downgrade] Secondary Namenode Downgrade Finished" )