示例#1
0
 def run_client_smoketest(cls, config=None, env=None):
     '''
     Run wordcount Job passing env variables
     :param config: Configuration location
     :param env: Set Environment variables
     '''
     logger.info("**** Running HDFS CLI Test ****")
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     UpgradePerNode.reportProgress(
         "[INFO][HDFS][ClientSmoke] CLI test for HDFS started ")
     if not cls._SmokeInputDir:
         cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
     SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput_cli'
     HDFS.deleteDirectory(SmokeOutputDir,
                          Config.get('hadoop', 'HADOOPQA_USER'))
     jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % (
         Config.get('hadoop',
                    'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name",
         cls._queue, cls._SmokeInputDir, SmokeOutputDir)
     exit_code, stdout = Hadoop.run(jobCmd, env=env)
     ruAssert("HDFS", exit_code == 0, "[ClientSmoke] Hdfs smoketest failed")
     HDFS.deleteDirectory(SmokeOutputDir)
     ruAssert("HDFS", exit_code == 0,
              "[ClientSmoke] could not delete: " + SmokeOutputDir)
     UpgradePerNode.reportProgress(
         "[INFO][HDFS][ClientSmoke] CLI test for HDFS Finished ")
示例#2
0
    def background_job_setup(cls, runSmokeTestSetup=True, config=None):
        '''
        Create 5 input datasets for TestOrderedWordCount
        :param runSmokeTestSetup: Runs smoke test setup if set to true
        '''
        logger.info("*** Start background job setup for Tez ***")
        Machine.rm(user=HADOOPQA_USER,
                   host=None,
                   filepath=LOCAL_WORK_DIR,
                   isdir=True)
        os.mkdir(LOCAL_WORK_DIR)
        for i in range(0, 4, 1):
            inputDirName = "HDFS_INPUT%d" % i
            inputDirPath = os.path.join(LOCAL_WORK_DIR, inputDirName)
            HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, inputDirPath,
                                                10, 400, 10000)

            hdfsInputDir = "/user/%s/Input%d" % (HADOOPQA_USER, i)
            hdfsOutputDir = "/user/%s/output%d" % (HADOOPQA_USER, i)

            #In case already present, delete the input directory
            HDFS.deleteDirectory(hdfsInputDir)

            HDFS.createDirectory(hdfsInputDir)
            HDFS.deleteDirectory(hdfsOutputDir)

            HDFS.copyFromLocal(inputDirPath, hdfsInputDir)
            cls._hdfsInputList.append(hdfsInputDir + "/" + inputDirName)
            cls._hdfsOutputList.append(hdfsOutputDir)
            logger.info("Created data for input %d", i)
        logger.info("*** End background job setup for Tez ***")
示例#3
0
 def run_background_job(cls,
                        runSmokeTestSetup=True,
                        config=None,
                        flagFile="/tmp/flagFile"):
     '''
     Uploads Files to HDFS before upgrade starts and runs long running sleep job in background
     :return:  number of application started
     '''
     # start long running application which performs I/O operations (BUG-23838)
     #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     #UpgradePerNode.reportProgress("### Background application for HDFS started ####")
     #jobArgs = {"mapred.job.queue.name" : cls._queue}
     #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False )
     #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True)
     # load generator
     HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
     TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
     HDFS.deleteDirectory(flagFile)
     slavelist = HDFS.getDatanodes()
     jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % (
         HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir,
         cls._lgTestDataDir, flagFile)
     Hadoop.runInBackground(jobCmd)
     time.sleep(15)
     return 1
def setup():
    out = HDFS.deleteFile(CREATE_FILE_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0    
    out = HDFS.deleteDirectory(OUT_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0        
    out = HDFS.deleteDirectory(CREATE_FILE_2_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0
示例#5
0
 def background_job_teardown(cls):
     '''
     Cleanup for long running Yarn job
     '''
     HDFS.deleteDirectory(cls._hdfs_input,
                          user=Config.get('hadoop', 'HADOOPQA_USER'))
     HDFS.deleteDirectory(cls._hdfs_output,
                          user=Config.get('hadoop', 'HADOOPQA_USER'))
示例#6
0
 def background_job_teardown(cls):
     '''
     Cleanup of HDFS background job
     '''
     HDFS.deleteDirectory(cls._base_hdfs_dir)
     command = "rm -rf " + cls._lgStructureDir
     exit_code, stdout = Machine.runas(Machine.getAdminUser(), command,
                                       None, None, None, "True",
                                       Machine.getAdminPasswd())
示例#7
0
    def checkClasspathVersion(cls, Version_Num, config=None):
        Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests",
                                      "rolling_upgrade", "yarn")
        Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data")
        Mapper = "data/versionVerifyMapper.py"
        Reducer = "data/versionVerifyReducer.py"
        Verify_File_Name = "test.txt"
        Verify_Test_File = os.path.join(Multi_Version_App_Dir,
                                        Verify_File_Name)
        # Set up env
        mapred_app_path = MAPRED.getConfigValue(
            "mapreduce.application.framework.path", None)
        mapred_classpath = MAPRED.getConfigValue(
            "mapreduce.application.classpath", None)
        env = {
            "mapreduce.application.framework.path": mapred_app_path,
            "mapreduce.application.classpath": mapred_classpath
        }
        verifyInput = cls._hdfs_input + "/verify"
        HDFS.createDirectory(verifyInput, None, "777", False)
        # Copy template files for the verifier streaming job
        templateFile = open(Verify_Test_File, 'w')
        templateFile.write(Version_Num)
        templateFile.close()
        HDFS.copyFromLocal(Verify_Test_File,
                           verifyInput,
                           user=Config.get('hadoop', 'HADOOPQA_USER'))
        # Submit the special streaming job
        shortStreamingId = HadoopJobHelper.runStreamJob(
            Mapper,
            Reducer,
            verifyInput,
            cls._hdfs_output_verify,
            files=Multi_Version_App_Dir,
            config=config,
            extraJobArg=cls._jobArgs,
            env=env,
            proposedJobName=cls._shortStreamingName)
        MAPRED.waitForJobDoneOrTimeout(shortStreamingId, timeoutInSec=180)
        # Make sure task succeeded
        #assert YARN.getAppFinalStateFromID(appId) == 'SUCCEEDED'

        # Check result content
        retVal, checkContent = HDFS.cat(cls._hdfs_output_verify +
                                        '/part-00000')
        logger.info("CHECK CLASSPATH VERSION OUTPUT")
        logger.info(retVal)
        logger.info(checkContent)
        ruAssert("YARN", retVal == 0)
        ruAssert("YARN", 'True' in checkContent,
                 "[VersionVerify] Stream job returns false: " + checkContent)
        #assert retVal == 0
        #assert 'True' in checkContent, "Stream job returns false: " + checkContent
        #assert 'False' not in checkContent, "Stream job returns false: " + checkContent
        HDFS.deleteDirectory(cls._hdfs_output_verify,
                             user=Config.get('hadoop', 'HADOOPQA_USER'))
示例#8
0
    def background_job_setup(cls, runSmokeTestSetup=True, config=None):
        '''
        Upload Data to HDFS before Upgrade starts
        Creates /user/hrt_qa/test_rollingupgrade dir on HDFS
        Upload 20 files to /user/hrt_qa/test_rollingupgrade
        '''
        if not cls._base_hdfs_dir:
            cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get(
                'hadoop', 'HADOOPQA_USER')
        exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir,
                                                 force=True)
        ruAssert("HDFS", exit_code == 0,
                 '[BGJobSetup] could not create dir on hdfs.')
        LOCAL_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                      'HDFS_RU_TEST')
        localTestWorkDir1 = os.path.join(LOCAL_WORK_DIR, "Temp_data")
        HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, localTestWorkDir1,
                                            20, 40, 1000)
        HDFS.copyFromLocal(os.path.join(localTestWorkDir1, "*"),
                           cls._base_hdfs_dir)

        # set up for loadGenerator
        cls._lgTestDataDir = cls._base_hdfs_dir + '/testData'
        cls._lgTestOutputDir = cls._base_hdfs_dir + '/lg_job'
        cls._lgStructureDir = Machine.getTempDir() + "/structure"
        # test dir setup
        HDFS.deleteDirectory(cls._lgTestDataDir)
        HDFS.deleteDirectory(cls._lgTestOutputDir)
        command = "rm -rf " + cls._lgStructureDir
        exit_code, stdout = Machine.runas(Machine.getAdminUser(), command,
                                          None, None, None, "True",
                                          Machine.getAdminPasswd())
        command = "mkdir " + cls._lgStructureDir
        stdout = Machine.runas(None, command, None, None, None, "True", None)
        Machine.chmod("777", cls._lgStructureDir, "True",
                      Machine.getAdminUser(), None, Machine.getAdminPasswd())

        HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
        TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
        # structure generator
        jobCmd = 'jar %s NNstructureGenerator -maxDepth 5 -minWidth 2 -maxWidth 5 -numOfFiles 100 -avgFileSize 3 -outDir %s' % (
            HADOOP_TEST_JAR, cls._lgStructureDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0,
                 "[BGJobSetup] StructureGenerator failed")
        # data generator
        jobCmd = 'jar %s NNdataGenerator -inDir %s -root %s' % (
            HADOOP_TEST_JAR, cls._lgStructureDir, cls._lgTestDataDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0, "[BGJobSetup] DataGenerator failed")

        if runSmokeTestSetup:
            logger.info("**** Running HDFS Smoke Test Setup ****")
            cls.smoke_test_setup()
示例#9
0
 def background_job_teardown(cls):
     '''
     Cleanup directories for long running tez job
     '''
     for input in cls._hdfsInputList:
         HDFS.deleteDirectory(input)
     for output in cls._hdfsOutputList:
         HDFS.deleteDirectory(output)
     Machine.rm(user=HADOOPQA_USER,
                host=None,
                filepath=LOCAL_WORK_DIR,
                isdir=True)
     logger.info("**** Completed background job teardown for Tez ****")
示例#10
0
 def background_job_setup(cls, runSmokeTestSetup=True, config=None):
     '''
     Setup for background long running job
     :param runSmokeTestSetup: Runs smoke test setup if set to true
     '''
     cls.run_JHS_test(config=config)
     logger.info("**** Run Yarn long running application setup ****")
     HDFS.createDirectory(cls._hdfs_input, None, "777", False)
     #touch a fake file to trick hadoop streaming
     HDFS.touchz(cls._hdfs_input + "/input.txt")
     HDFS.deleteDirectory(cls._hdfs_output,
                          user=Config.get('hadoop', 'HADOOPQA_USER'))
     if runSmokeTestSetup:
         logger.info("**** Running HDFS Smoke Test Setup ****")
         cls.smoke_test_setup()
示例#11
0
 def smoke_test_setup(cls):
     '''
     Setup function for HDFS smoke test
     '''
     if not cls._SmokeInputDir:
         cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
     HDFS.deleteDirectory(cls._SmokeInputDir,
                          Config.get('hadoop', 'HADOOPQA_USER'))
     jobCmd = 'jar %s randomtextwriter \"-D%s=%s\" \"-D%s=%s\" %s' % (
         Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'),
         "mapreduce.randomtextwriter.totalbytes", "4096",
         "mapred.job.queue.name", cls._queue, cls._SmokeInputDir)
     exit_code, stdout = Hadoop.run(jobCmd)
     ruAssert(
         "HDFS", exit_code == 0,
         '[SmokeSetup] Randomtextwriter job failed and could not create data on hdfs'
     )
示例#12
0
    def run_smoke_test(cls, smoketestnumber, config=None):
        '''
        Run word count as HDFS smoke test
        - Create file of 4096 bytes using randomwriter job
        - Run wordcount job
        '''
        logger.info("Running HDFS Smoke test")
        # make sure base hdfs dir is set.
        if not cls._base_hdfs_dir:
            cls._base_hdfs_dir = '/user/%s/test_rollingupgrade' % Config.get(
                'hadoop', 'HADOOPQA_USER')

        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][Smoke] Smoke test for HDFS started ")
        HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                   "dfs -ls /user/hrt_qa",
                   env=None,
                   logoutput=True,
                   config=None,
                   host=None,
                   skipAuth=False)
        if not cls._SmokeInputDir:
            cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
        SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput' + str(
            smoketestnumber)
        HDFS.deleteDirectory(SmokeOutputDir,
                             Config.get('hadoop', 'HADOOPQA_USER'))
        jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % (
            Config.get('hadoop',
                       'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name",
            cls._queue, cls._SmokeInputDir, SmokeOutputDir)
        exit_code, stdout = Hadoop.run(jobCmd)
        ruAssert("HDFS", exit_code == 0, "[Smoke] Hdfs smoketest failed")
        HDFS.deleteDirectory(SmokeOutputDir)
        ruAssert("HDFS", exit_code == 0,
                 "[Smoke] could not delete: " + SmokeOutputDir)
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][Smoke] Smoke test for HDFS Finished ")
示例#13
0
    def background_job_setup(cls, runSmokeTestSetup=True, config=None):
        '''
        Setup for background long running job
        Upload Data to HDFS before Upgrade starts
        Creates /user/hrt_qa/ru-pig dir on HDFS
        Creates and Upload large data file to /user/hrt_qa/ru-pig/input/
        :param runSmokeTestSetup: Runs smoke test setup if set to true
        '''
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "### Running Pig BackGround Job Setup ####")
        HDFS.deleteDirectory(cls._base_hdfs_dir)
        exit_code, stdout = HDFS.createDirectory(cls._base_hdfs_dir,
                                                 user=cls._job_user,
                                                 perm=777,
                                                 force=True)
        ruAssert("Pig", exit_code == 0,
                 '[BGJobSetup] could not create dir on hdfs.')

        HDFS.createDirectory(cls._hdfs_input_dir, force=True)
        srcFile = os.path.join(cls._artifacts_dir, 'pig-ru-input.txt')
        if os.path.exists(srcFile):
            os.remove(srcFile)
        tmpFile = os.path.join(cls._artifacts_dir, 'pig-ru-tmp-input.txt')
        if os.path.exists(tmpFile):
            os.remove(tmpFile)
        util.copyFileToAnotherFile(cls._golden_src_file, srcFile)
        util.copyFileToAnotherFile(srcFile, tmpFile)
        itr = 12
        if Machine.isFlubber():
            itr = 16
        for i in range(itr):
            util.copyFileToAnotherFile(srcFile, tmpFile)
            util.copyFileToAnotherFile(tmpFile, srcFile)
        exit_code, stdout = HDFS.copyFromLocal(srcFile, cls._hdfs_input_path)
        ruAssert("Pig", exit_code == 0, '[BGJobSetup] Data Load failed')

        if runSmokeTestSetup:
            cls.smoke_test_setup()
示例#14
0
    def run(self, randomwriter_bytes="10", local_dir_name="small_rw_jobs"):  # pylint: disable=unused-argument
        local_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                 self.local_dir_name)

        if not Machine.pathExists(None, None, local_dir, passwd=None):
            Machine.makedirs(None, None, local_dir, None)
            Machine.chmod("777", local_dir)

        while self.signal:
            input_dir = "rw_%d" % int(999999 * random.random())
            HDFS.deleteDirectory(input_dir)

            HadoopJobHelper.runRandomTextWriterJob(input_dir,
                                                   self.randomwriter_bytes,
                                                   bytesPerMap=1,
                                                   mapsPerHost=1,
                                                   jobArg="",
                                                   user=None,
                                                   config=None,
                                                   runInBackground=False,
                                                   redirect_file=os.path.join(
                                                       local_dir, input_dir))
示例#15
0
 def run_background_job(cls, runSmokeTestSetup=True, config=None):
     '''
     Runs background long running Yarn Job
     :param runSmokeTestSetup: Runs smoke test setup if set to true
     :param config: expected configuration location
     :return: Total number of long running jobs started
     '''
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     UpgradePerNode.reportProgress(
         "[INFO][YARN][BGJob] Starting background job for Yarn ")
     Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests",
                                   "rolling_upgrade", "yarn")
     Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data")
     HDFS.deleteDirectory(cls._hdfs_output,
                          user=Config.get('hadoop', 'HADOOPQA_USER'))
     Mapper = "data/mvMapper.py"
     Reducer = "data/mvReducer.py"
     # Launch job
     (jobID, STREAMING_APP_ID) = HadoopJobHelper.checkAndRunStreamJob(
         Mapper,
         Reducer,
         cls._hdfs_input,
         cls._hdfs_output,
         files=Multi_Version_App_Dir,
         config=config,
         forRU=True,
         extraJobArg=cls._jobArgs,
         env=None,
         sleepTimeAfterJobSubmission=60)
     cls._background_job_appId = STREAMING_APP_ID
     cls._background_job_jobId = jobID
     logger.info("Background job started, application ID: " +
                 STREAMING_APP_ID + " job ID: " + jobID)
     logger.info("Start second long running job for Yarn")
     num_sec_bkjob = cls.run_second_background_job(config=config)
     return 2
示例#16
0
    def tear_down_hdfs_topology(cls, topologyName, useStandaloneCmd):
        """
            kills hdfs topologies and deletes the hdfs directories.
        """
        Machine.rm(user=None,
                   host="localhost",
                   filepath=LOCAL_HDFS_WORK_DIR,
                   isdir=True,
                   passwd=None)

        Storm.killTopology(topologyName,
                           logoutput=True,
                           useStandaloneCmd=useStandaloneCmd)
        HDFS.deleteDirectory("/tmp/mySeqTopology", HDFS_USER)
        HDFS.deleteDirectory("/tmp/dest", HDFS_USER)
        HDFS.deleteDirectory("/tmp/dest2", HDFS_USER)
        HDFS.deleteDirectory("/tmp/foo", HDFS_USER)
        HDFS.deleteDirectory("/tmp/trident", HDFS_USER)
        HDFS.deleteDirectory("/tmp/trident-seq", HDFS_USER)
示例#17
0
 def background_job_teardown(cls):
     '''
     Cleanup for long running Yarn job
     '''
     HDFS.deleteDirectory(cls.HDFS_CLUSTER_INPUT_DIR)
示例#18
0
 def setupOozieDataDir(cls, directory):
     HDFS.deleteDirectory(directory)
     HDFS.createDirectory(directory)