def run_client_smoketest(cls, config=None, env=None): ''' Run wordcount Job passing env variables :param config: Configuration location :param env: Set Environment variables ''' logger.info("**** Running HDFS CLI Test ****") from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][HDFS][ClientSmoke] CLI test for HDFS started ") if not cls._SmokeInputDir: cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput" SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput_cli' HDFS.deleteDirectory(SmokeOutputDir, Config.get('hadoop', 'HADOOPQA_USER')) jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % ( Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name", cls._queue, cls._SmokeInputDir, SmokeOutputDir) exit_code, stdout = Hadoop.run(jobCmd, env=env) ruAssert("HDFS", exit_code == 0, "[ClientSmoke] Hdfs smoketest failed") HDFS.deleteDirectory(SmokeOutputDir) ruAssert("HDFS", exit_code == 0, "[ClientSmoke] could not delete: " + SmokeOutputDir) UpgradePerNode.reportProgress( "[INFO][HDFS][ClientSmoke] CLI test for HDFS Finished ")
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Create 5 input datasets for TestOrderedWordCount :param runSmokeTestSetup: Runs smoke test setup if set to true ''' logger.info("*** Start background job setup for Tez ***") Machine.rm(user=HADOOPQA_USER, host=None, filepath=LOCAL_WORK_DIR, isdir=True) os.mkdir(LOCAL_WORK_DIR) for i in range(0, 4, 1): inputDirName = "HDFS_INPUT%d" % i inputDirPath = os.path.join(LOCAL_WORK_DIR, inputDirName) HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, inputDirPath, 10, 400, 10000) hdfsInputDir = "/user/%s/Input%d" % (HADOOPQA_USER, i) hdfsOutputDir = "/user/%s/output%d" % (HADOOPQA_USER, i) #In case already present, delete the input directory HDFS.deleteDirectory(hdfsInputDir) HDFS.createDirectory(hdfsInputDir) HDFS.deleteDirectory(hdfsOutputDir) HDFS.copyFromLocal(inputDirPath, hdfsInputDir) cls._hdfsInputList.append(hdfsInputDir + "/" + inputDirName) cls._hdfsOutputList.append(hdfsOutputDir) logger.info("Created data for input %d", i) logger.info("*** End background job setup for Tez ***")
def run_background_job(cls, runSmokeTestSetup=True, config=None, flagFile="/tmp/flagFile"): ''' Uploads Files to HDFS before upgrade starts and runs long running sleep job in background :return: number of application started ''' # start long running application which performs I/O operations (BUG-23838) #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode #UpgradePerNode.reportProgress("### Background application for HDFS started ####") #jobArgs = {"mapred.job.queue.name" : cls._queue} #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False ) #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True) # load generator HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') HDFS.deleteDirectory(flagFile) slavelist = HDFS.getDatanodes() jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % ( HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir, cls._lgTestDataDir, flagFile) Hadoop.runInBackground(jobCmd) time.sleep(15) return 1
def setup(): out = HDFS.deleteFile(CREATE_FILE_PATH_IN_HADOOP, user=HDFS_USER) assert out[0] == 0 out = HDFS.deleteDirectory(OUT_PATH_IN_HADOOP, user=HDFS_USER) assert out[0] == 0 out = HDFS.deleteDirectory(CREATE_FILE_2_PATH_IN_HADOOP, user=HDFS_USER) assert out[0] == 0
def background_job_teardown(cls): ''' Cleanup for long running Yarn job ''' HDFS.deleteDirectory(cls._hdfs_input, user=Config.get('hadoop', 'HADOOPQA_USER')) HDFS.deleteDirectory(cls._hdfs_output, user=Config.get('hadoop', 'HADOOPQA_USER'))
def background_job_teardown(cls): ''' Cleanup of HDFS background job ''' HDFS.deleteDirectory(cls._base_hdfs_dir) command = "rm -rf " + cls._lgStructureDir exit_code, stdout = Machine.runas(Machine.getAdminUser(), command, None, None, None, "True", Machine.getAdminPasswd())
def checkClasspathVersion(cls, Version_Num, config=None): Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "rolling_upgrade", "yarn") Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data") Mapper = "data/versionVerifyMapper.py" Reducer = "data/versionVerifyReducer.py" Verify_File_Name = "test.txt" Verify_Test_File = os.path.join(Multi_Version_App_Dir, Verify_File_Name) # Set up env mapred_app_path = MAPRED.getConfigValue( "mapreduce.application.framework.path", None) mapred_classpath = MAPRED.getConfigValue( "mapreduce.application.classpath", None) env = { "mapreduce.application.framework.path": mapred_app_path, "mapreduce.application.classpath": mapred_classpath } verifyInput = cls._hdfs_input + "/verify" HDFS.createDirectory(verifyInput, None, "777", False) # Copy template files for the verifier streaming job templateFile = open(Verify_Test_File, 'w') templateFile.write(Version_Num) templateFile.close() HDFS.copyFromLocal(Verify_Test_File, verifyInput, user=Config.get('hadoop', 'HADOOPQA_USER')) # Submit the special streaming job shortStreamingId = HadoopJobHelper.runStreamJob( Mapper, Reducer, verifyInput, cls._hdfs_output_verify, files=Multi_Version_App_Dir, config=config, extraJobArg=cls._jobArgs, env=env, proposedJobName=cls._shortStreamingName) MAPRED.waitForJobDoneOrTimeout(shortStreamingId, timeoutInSec=180) # Make sure task succeeded #assert YARN.getAppFinalStateFromID(appId) == 'SUCCEEDED' # Check result content retVal, checkContent = HDFS.cat(cls._hdfs_output_verify + '/part-00000') logger.info("CHECK CLASSPATH VERSION OUTPUT") logger.info(retVal) logger.info(checkContent) ruAssert("YARN", retVal == 0) ruAssert("YARN", 'True' in checkContent, "[VersionVerify] Stream job returns false: " + checkContent) #assert retVal == 0 #assert 'True' in checkContent, "Stream job returns false: " + checkContent #assert 'False' not in checkContent, "Stream job returns false: " + checkContent HDFS.deleteDirectory(cls._hdfs_output_verify, user=Config.get('hadoop', 'HADOOPQA_USER'))
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/test_rollingupgrade dir on HDFS Upload 20 files to /user/hrt_qa/test_rollingupgrade ''' if not cls._base_hdfs_dir: cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get( 'hadoop', 'HADOOPQA_USER') exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir, force=True) ruAssert("HDFS", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'HDFS_RU_TEST') localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data") HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1, 20, 40, 1000) HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"), cls._base_hdfs_dir) # set up for loadGenerator cls._lgTestDataDir = cls._base_hdfs_dir + '/testData' cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job' cls._lgStructureDir = Machine.getTempDir() + "/structure" # test dir setup HDFS.deleteDirectory(cls._lgTestDataDir) HDFS.deleteDirectory(cls._lgTestOutputDir) command = "rm -rf " + cls._lgStructureDir exit_code, stdout = Machine.runas(Machine.getAdminUser(), command, None, None, None, "True", Machine.getAdminPasswd()) command = "mkdir " + cls._lgStructureDir stdout = Machine.runas(None, command, None, None, None, "True", None) Machine.chmod("777", cls._lgStructureDir, "True", Machine.getAdminUser(), None, Machine.getAdminPasswd()) HADOOP_TEST_JAR = cls.get_hadoop_test_jar() TEST_USER = Config.get('hadoop', 'HADOOPQA_USER') # structure generator jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] StructureGenerator failed") # data generator jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % ( HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed") if runSmokeTestSetup: logger.info("**** Running HDFS Smoke Test Setup ****") cls.smoke_test_setup()
def background_job_teardown(cls): ''' Cleanup directories for long running tez job ''' for input in cls._hdfsInputList: HDFS.deleteDirectory(input) for output in cls._hdfsOutputList: HDFS.deleteDirectory(output) Machine.rm(user=HADOOPQA_USER, host=None, filepath=LOCAL_WORK_DIR, isdir=True) logger.info("**** Completed background job teardown for Tez ****")
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Setup for background long running job :param runSmokeTestSetup: Runs smoke test setup if set to true ''' cls.run_JHS_test(config=config) logger.info("**** Run Yarn long running application setup ****") HDFS.createDirectory(cls._hdfs_input, None, "777", False) #touch a fake file to trick hadoop streaming HDFS.touchz(cls._hdfs_input + "/input.txt") HDFS.deleteDirectory(cls._hdfs_output, user=Config.get('hadoop', 'HADOOPQA_USER')) if runSmokeTestSetup: logger.info("**** Running HDFS Smoke Test Setup ****") cls.smoke_test_setup()
def smoke_test_setup(cls): ''' Setup function for HDFS smoke test ''' if not cls._SmokeInputDir: cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput" HDFS.deleteDirectory(cls._SmokeInputDir, Config.get('hadoop', 'HADOOPQA_USER')) jobCmd = 'jar %s randomtextwriter \"-D%s=%s\" \"-D%s=%s\" %s' % ( Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'), "mapreduce.randomtextwriter.totalbytes", "4096", "mapred.job.queue.name", cls._queue, cls._SmokeInputDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert( "HDFS", exit_code == 0, '[SmokeSetup] Randomtextwriter job failed and could not create data on hdfs' )
def run_smoke_test(cls, smoketestnumber, config=None): ''' Run word count as HDFS smoke test - Create file of 4096 bytes using randomwriter job - Run wordcount job ''' logger.info("Running HDFS Smoke test") # make sure base hdfs dir is set. if not cls._base_hdfs_dir: cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get( 'hadoop', 'HADOOPQA_USER') from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][HDFS][Smoke] Smoke test for HDFS started ") HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'), "dfs -ls /user/hrt_qa", env=None, logoutput=True, config=None, host=None, skipAuth=False) if not cls._SmokeInputDir: cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput" SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput' + str( smoketestnumber) HDFS.deleteDirectory(SmokeOutputDir, Config.get('hadoop', 'HADOOPQA_USER')) jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % ( Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name", cls._queue, cls._SmokeInputDir, SmokeOutputDir) exit_code, stdout = Hadoop.run(jobCmd) ruAssert("HDFS", exit_code == 0, "[Smoke] Hdfs smoketest failed") HDFS.deleteDirectory(SmokeOutputDir) ruAssert("HDFS", exit_code == 0, "[Smoke] could not delete: " + SmokeOutputDir) UpgradePerNode.reportProgress( "[INFO][HDFS][Smoke] Smoke test for HDFS Finished ")
def background_job_setup(cls, runSmokeTestSetup=True, config=None): ''' Setup for background long running job Upload Data to HDFS before Upgrade starts Creates /user/hrt_qa/ru-pig dir on HDFS Creates and Upload large data file to /user/hrt_qa/ru-pig/input/ :param runSmokeTestSetup: Runs smoke test setup if set to true ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "### Running Pig BackGround Job Setup ####") HDFS.deleteDirectory(cls._base_hdfs_dir) exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir, user=cls._job_user, perm=777, force=True) ruAssert("Pig", exit_code == 0, '[BGJobSetup] could not create dir on hdfs.') HDFS.createDirectory(cls._hdfs_input_dir, force=True) srcFile = os.path.join(cls._artifacts_dir, 'pig-ru-input.txt') if os.path.exists(srcFile): os.remove(srcFile) tmpFile = os.path.join(cls._artifacts_dir, 'pig-ru-tmp-input.txt') if os.path.exists(tmpFile): os.remove(tmpFile) util.copyFileToAnotherFile(cls._golden_src_file, srcFile) util.copyFileToAnotherFile(srcFile, tmpFile) itr = 12 if Machine.isFlubber(): itr = 16 for i in range(itr): util.copyFileToAnotherFile(srcFile, tmpFile) util.copyFileToAnotherFile(tmpFile, srcFile) exit_code, stdout = HDFS.copyFromLocal(srcFile, cls._hdfs_input_path) ruAssert("Pig", exit_code == 0, '[BGJobSetup] Data Load failed') if runSmokeTestSetup: cls.smoke_test_setup()
def run(self, randomwriter_bytes="10", local_dir_name="small_rw_jobs"): # pylint: disable=unused-argument local_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), self.local_dir_name) if not Machine.pathExists(None, None, local_dir, passwd=None): Machine.makedirs(None, None, local_dir, None) Machine.chmod("777", local_dir) while self.signal: input_dir = "rw_%d" % int(999999 * random.random()) HDFS.deleteDirectory(input_dir) HadoopJobHelper.runRandomTextWriterJob(input_dir, self.randomwriter_bytes, bytesPerMap=1, mapsPerHost=1, jobArg="", user=None, config=None, runInBackground=False, redirect_file=os.path.join( local_dir, input_dir))
def run_background_job(cls, runSmokeTestSetup=True, config=None): ''' Runs background long running Yarn Job :param runSmokeTestSetup: Runs smoke test setup if set to true :param config: expected configuration location :return: Total number of long running jobs started ''' from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][YARN][BGJob] Starting background job for Yarn ") Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests", "rolling_upgrade", "yarn") Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data") HDFS.deleteDirectory(cls._hdfs_output, user=Config.get('hadoop', 'HADOOPQA_USER')) Mapper = "data/mvMapper.py" Reducer = "data/mvReducer.py" # Launch job (jobID, STREAMING_APP_ID) = HadoopJobHelper.checkAndRunStreamJob( Mapper, Reducer, cls._hdfs_input, cls._hdfs_output, files=Multi_Version_App_Dir, config=config, forRU=True, extraJobArg=cls._jobArgs, env=None, sleepTimeAfterJobSubmission=60) cls._background_job_appId = STREAMING_APP_ID cls._background_job_jobId = jobID logger.info("Background job started, application ID: " + STREAMING_APP_ID + " job ID: " + jobID) logger.info("Start second long running job for Yarn") num_sec_bkjob = cls.run_second_background_job(config=config) return 2
def tear_down_hdfs_topology(cls, topologyName, useStandaloneCmd): """ kills hdfs topologies and deletes the hdfs directories. """ Machine.rm(user=None, host="localhost", filepath=LOCAL_HDFS_WORK_DIR, isdir=True, passwd=None) Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) HDFS.deleteDirectory("/tmp/mySeqTopology", HDFS_USER) HDFS.deleteDirectory("/tmp/dest", HDFS_USER) HDFS.deleteDirectory("/tmp/dest2", HDFS_USER) HDFS.deleteDirectory("/tmp/foo", HDFS_USER) HDFS.deleteDirectory("/tmp/trident", HDFS_USER) HDFS.deleteDirectory("/tmp/trident-seq", HDFS_USER)
def background_job_teardown(cls): ''' Cleanup for long running Yarn job ''' HDFS.deleteDirectory(cls.HDFS_CLUSTER_INPUT_DIR)
def setupOozieDataDir(cls, directory): HDFS.deleteDirectory(directory) HDFS.createDirectory(directory)