def getJobAndAppIds(text): ''' getJobAndAppIds text - Text from which to get the application and the job id ''' ids = [] # pattern to look for is different when tez is enabled. if Hive.isTezEnabled(): # For this method to be backward compatible, we need to check for 2 patterns # The following pattern is applicable for pre-champlain releases. pattern = 'Status: Running \(application id: (.*)\)' for line in re.finditer(pattern, text): # with tez we only get the application id ids.append({'application': line.group(1)}) # The following pattern is applicable for champlain and above release. if len(ids) == 0: pattern = 'Status: Running \(Executing on YARN cluster with App id (.*)\)' for line in re.finditer(pattern, text): # with tez we only get the application id ids.append({'application': line.group(1)}) else: pattern = 'Starting Job = (.*), Tracking URL = h.*://.*:?\d+?/proxy/(.*)/' for line in re.finditer(pattern, text): ids.append({'job': line.group(1), 'application': line.group(2)}) return ids
def background_job_when_master_upgrade(cls): ''' Start a background application which runs while component master service gets upgraded :return: ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started" ) logger.info("Creating hive tables for short background jobs") query = "drop table if exists shortlr_hive_verify;\n" query += "create table shortlr_hive_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'shortlrsetup.sql') util.writeToFile(query, short_bgjob_setupfile) exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d" % exit_code) logger.info("Running the Background Job when upgrading Hive") UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component upgrades started" ) setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_hive_process = Hive.runQuery(query, background=True) # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted time.sleep(10) logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query, readFromFile=True, background=True) UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished" )
def get_set_queue_cmd(cls, useStandaloneCmd): #For https://hortonworks.jira.com/browse/BUG-27221 from beaver.component.hive import Hive if useStandaloneCmd == True: YARN_QUEUE = "storm" else: YARN_QUEUE = "storm-slider" if Hive.isTezEnabled(): # this wont work because when hive CLI starts hive does not know queues that are not set in hive-site.xml. # See Deepesh email on 10/14/2014. setqueue = "set tez.queue.name=%s; " % YARN_QUEUE else: setqueue = "set mapred.job.queue.name=%s; " % YARN_QUEUE return setqueue
def run_client_smoketest(cls, config=None, env=None): ''' Run Smoke test after upgrading Client :param config: Configuration location :param env: Set Environment variables ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][Smoke] Smoke test for Hive component started") setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table %s_hive_verify select userid, avg(age) from %s group by userid order by userid; " % ( cls._smoketest_tbl, cls._smoketest_tbl) query += "select count(*) from %s_hive_verify;" % cls._smoketest_tbl exit_code, stdout, stderr = Hive.runQuery(query, stderr_as_stdout=False) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed with exit code '%d'" % exit_code) logger.error( "Smoke test for Hive failed with the following error: " + stderr) elif stdout.find("%d" % cls._num_of_rows_smoke) == -1: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed to verify number of rows in output" ) logger.error( "Smoke test for Hive failed to find [%d] in output [%s]" % (cls._num_of_rows_smoke, stdout)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Smoke] Smoke test for Hive Metastore succeeded" ) logger.info("Smoke test for Hive Metastore succeeded") logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ndrop table if exists %s_bline_verify;\n" % cls._smoketest_tbl query += "create table %s_bline_verify (userid string, age int);\n" % cls._smoketest_tbl query += "insert overwrite table %s_bline_verify select userid, avg(age) from %s group by userid order by userid;\n" % ( cls._smoketest_tbl, cls._smoketest_tbl) query += "select count(*) from %s_bline_verify;\n" % cls._smoketest_tbl exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed with exit code '%d'" % exit_code) logger.error( "Smoke test for HiveServer2 failed with the following error: " + stderr) elif stdout.find("%d" % cls._num_of_rows_smoke) == -1: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed to verify number of rows in output" ) logger.error( "Smoke test for HiveServer2 failed to find [%d] in output [%s]" % (cls._num_of_rows_smoke, stdout)) else: logger.info("Smoke test for HiveServer2 succeeded") logger.info("**** Running WebHCat Smoke Test ****") query = "show tables;" webhcatHost = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") url = "http://%s:%s/templeton/v1/ddl" % (webhcatHost, webhcatPort) params = {'exec': query} status_code, stdout = util.curl(url, method='POST', params=params) if status_code != 200: UpgradePerNode.reportProgress( "[FAILED][Hive][Smoke] Smoke test for WebHCat failed due to status code = %d" % status_code) else: logger.info("Smoke test for WebHCat succeeded") UpgradePerNode.reportProgress( "[INFO][Hive][Smoke] Smoke test for Hive component finished")
def run_background_job(cls, runSmokeTestSetup=False, config=None): ''' Runs background long running Hive Job :param runSmokeTestSetup: Runs smoke test setup if set to true :param config: expected configuration location :return: Total number of long running jobs started ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component started") setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " create table if not exists hive_cli_lr (a string); select sleep(%d, 2000, 'hdfs://%s/hive_cli_lr', 'hdfs://%s/END') from (select count(*) from hive_cli_lr) a;" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) Hive.runQuery(query, background=True) logger.info("**** Running Beeline CLI Test ****") # Create the sleep function within the same Beeline session # Function created outside of HS2 instance are not picked query = setqueue + "\n" query += "drop function sleep2;\n" query += "create function sleep2 as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';\n" % cls._hdfs_bgjtest_dir query += "create table if not exists bline_cli_lr (a string);\n" query += "select sleep2(%d, 2000, 'hdfs://%s/bline_cli_lr', 'hdfs://%s/END') from (select count(*) from bline_cli_lr) a;\n" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) Hive.runQueryOnBeeline(query, readFromFile=True, background=True) logger.info("**** Running WebHCat Test ****") webhcatHost = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") url = "http://%s:%s/templeton/v1/hive" % (webhcatHost, webhcatPort) query = setqueue + " set mapred.task.timeout=0; create table if not exists whcat_rest_lr (a string); select sleep(%d, 2000, 'hdfs://%s/whcat_rest_lr', 'hdfs://%s/END') from (select count(*) from whcat_rest_lr) a;" % ( cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir, cls._hdfs_bgjtest_dir) params = {'execute': query} status_code, stdout = util.curl(url, method='POST', params=params) retry = 0 while status_code == 404 and retry < 3: time.sleep(15) status_code, stdout = util.curl(url, method='POST', params=params) retry += 1 if status_code != 200: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJobSetup] Long running job for WebHCat failed due to status code = %d" % status_code) logger.error( "Webhcat request failed with the following error: %s\n" % stdout) if runSmokeTestSetup: logger.info("**** Running Hive Smoke Test Setup ****") cls.smoke_test_setup() return 3