Python HDFS示例，beaver.component.hadoop.HDFS Python示例

示例#1

0

显示文件

文件： ruFlume.py 项目： thakkardharmik/beaver

 def run_background_job(cls, runSmokeTestSetup=True, config=None):
     '''
     Runs background long running Flume Job
     :param runSmokeTestSetup: Runs smoke test setup if set to true
     :param config: expected configuration location
     :return: Total number of long running jobs started
     '''
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     HDFS.createDirectory(cls._hdfs_test_dir, perm="777", force=True)
     UpgradePerNode.reportProgress(
         "[INFO][FLUME][BGJob] Long running job for Flume component started"
     )
     logger.info("Starting the Flume Agent Topology")
     addlParams = "-Dflume.log.dir=%s -Dflume.log.file=agent2.log" % cls._local_work_dir
     agent2.start("agent2",
                  cls._flume_test_src,
                  addlParams=addlParams,
                  enableDebugLogOnConsole=False)
     logger.info(
         "Sleeping for 10 seconds before starting the other Flume agent")
     time.sleep(10)
     addlParams = "-Dflume.log.dir=%s -Dflume.log.file=agent.log" % cls._local_work_dir
     agent1.start("agent",
                  cls._flume_test_src,
                  addlParams=addlParams,
                  enableDebugLogOnConsole=False)
     time.sleep(5)
     return 1

示例#2

0

显示文件

 def run_background_job(cls,
                        runSmokeTestSetup=True,
                        config=None,
                        flagFile="/tmp/flagFile"):
     '''
     Uploads Files to HDFS before upgrade starts and runs long running sleep job in background
     :return:  number of application started
     '''
     # start long running application which performs I/O operations (BUG-23838)
     #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     #UpgradePerNode.reportProgress("### Background application for HDFS started ####")
     #jobArgs = {"mapred.job.queue.name" : cls._queue}
     #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False )
     #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True)
     # load generator
     HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
     TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
     HDFS.deleteDirectory(flagFile)
     slavelist = HDFS.getDatanodes()
     jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % (
         HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir,
         cls._lgTestDataDir, flagFile)
     Hadoop.runInBackground(jobCmd)
     time.sleep(15)
     return 1

示例#3

0

显示文件

文件： test_cleanUpOfFiles.py 项目： sds-logigear/LogigearSource

def setup():
    out = HDFS.deleteFile(CREATE_FILE_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0    
    out = HDFS.deleteDirectory(OUT_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0        
    out = HDFS.deleteDirectory(CREATE_FILE_2_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0

示例#4

0

显示文件

    def ru_prepare_save_state_for_upgrade(cls):
        '''
        Prepare Namenode to save State for Upgrade
        '''
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][Prepare] Preparing state for HDFS upgrade")
        # BUG-26726: we need to be in safemode only in non HA cluster
        if not HDFS.isHAEnabled():
            exit_code, output = HDFS.runasAdmin("dfsadmin -safemode enter")
            ruAssert("HDFS", exit_code == 0,
                     '[Preparation] enter safemode failed')

        exit_code, output = HDFS.runas(
            Config.get('hadoop', 'HDFS_USER'),
            "dfsadmin -Ddfs.client.test.drop.namenode.response.number=0 -rollingUpgrade prepare"
        )
        ruAssert("HDFS", exit_code == 0,
                 '[Preparation] -rollingUpgrade prepare failed')
        if not HDFS.isHAEnabled():
            exit_code, output = HDFS.runasAdmin("dfsadmin -safemode leave")
            ruAssert("HDFS", exit_code == 0,
                     '[Preparation] leave safemode failed')
        UpgradePerNode.reportProgress(
            "[INFO][HDFS][Prepare] Preparing state for HDFS upgrade finished ")

示例#5

0

显示文件

    def createState4Rollback2(cls):
        exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                       "dfs -rm -skipTrash rollback_state1")
        ruAssert("HDFS", exit_code == 0,
                 "can't get remove file rollback_state1")
        exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                       "dfs -touchz rollback_state2")
        ruAssert("HDFS", exit_code == 0,
                 "can't get create file rollback_state2")
        # truncate the file and validate the truncated size
        logger.info("**** Truncate file to 1 byte ****")
        exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                       "dfs -truncate 1 testFileTr")
        ruAssert("HDFS", exit_code == 0, "can't truncate file testFileTr")
        if os.path.isfile(cls.localTestFileTr):
            os.remove(cls.localTestFileTr)

        logger.info("**** Wait 30 second for file to be recovered ****")
        time.sleep(30)
        command = "dfs -copyToLocal testFileTr " + cls.localTestFileTr
        exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                       command)
        ruAssert("HDFS", exit_code == 0, "can't copy file testFileTr")
        size = os.path.getsize(cls.localTestFileTr)
        ruAssert("HDFS", size == 1, "size not 1. Actual size:" + ` size `)

示例#6

0

显示文件

def getNameNodeURL(nameservice2=False):
    if Hadoop.isEncrypted():
        baseUrl = "https://%s" % (HDFS.getNamenodeHttpsAddress(nameservice2))
    else:
        baseUrl = "http://%s" % (HDFS.getNamenodeHttpAddress(nameservice2))
    logger.info("URL being returned is - %s" % baseUrl)
    return baseUrl

示例#7

0

显示文件

文件： hiveutils.py 项目： thakkardharmik/beaver

def setupHS2ConcurrencyDataset():
    logger.info("Setup test data")
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    query = """drop table if exists student_txt;
        create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
        drop table if exists voter_txt;
        create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
        drop table if exists student;
        create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        drop table if exists voter;
        create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        Insert into table student select * from student_txt;
        Insert into table voter select * from voter_txt;"""

    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"

示例#8

0

显示文件

文件： oozie.py 项目： thakkardharmik/beaver

    def updateJobProperties(cls,
                            propFile,
                            properties=None,
                            haEnabled=False,
                            debug=False):
        fileSystemName = Hadoop.getFSDefaultValue()
        jobTrackerIP = MAPRED.getJobtrackerAddress()
        jobTracker = jobTrackerIP[0] + ":" + jobTrackerIP[1]

        if not properties:
            properties = {}
        if not properties.has_key('nameNode'):
            properties['nameNode'] = fileSystemName
        if not properties.has_key('jobTracker'):
            properties['jobTracker'] = jobTracker

        if "hcatalog" in propFile:
            if Hadoop.isSecure():
                kerberosPrincipal = Hive.getConfigValue(
                    "hive.metastore.kerberos.principal")
                properties[
                    'hive.metastore.kerberos.principal'] = kerberosPrincipal

            logger.info("Updating for hcatalog workflow")
            hcatNode = Hive.getConfigValue("hive.metastore.uris").replace(
                'thrift', 'hcat')
            logger.info("Hcat node is " + hcatNode)
            properties['hcatNode'] = hcatNode

        if Hadoop.isSecure():
            # determine the namenode and the jobtracker principal
            nnPrincipal = None
            if haEnabled:
                nnPrincipal = HDFS.getNameNodePrincipal().replace(
                    '_HOST', HDFS.getNamenodeByState('active'))
            else:
                nnPrincipal = HDFS.getNameNodePrincipal().replace(
                    '_HOST',
                    HDFS.getNamenodeHttpAddress()[0])
            jtPrincipal = MAPRED.getMasterPrincipal().replace(
                '_HOST', jobTrackerIP[0])
            properties['dfs.namenode.kerberos.principal'] = nnPrincipal
            properties['mapreduce.jobtracker.kerberos.principal'] = jtPrincipal

        wfPath = util.getPropertyValueFromFile(propFile,
                                               "oozie.wf.application.path")
        if wfPath != None and wfPath.find("hdfs://localhost:9000") != -1:
            wfPath = wfPath.replace("hdfs://localhost:9000", fileSystemName)
            logger.info("Value of replaced oozie.wf.application.path is " +
                        wfPath)
            properties['oozie.wf.application.path'] = wfPath

        util.writePropertiesToFile(propFile, propFile, properties)

        if debug:
            logger.info('Content of properties file %s' % propFile)
            f = open(propFile, 'r')
            # print the file to the console
            logger.info(f.read())
            f.close()

示例#9

0

显示文件

 def run_client_smoketest(cls, config=None, env=None):
     '''
     Run wordcount Job passing env variables
     :param config: Configuration location
     :param env: Set Environment variables
     '''
     logger.info("**** Running HDFS CLI Test ****")
     from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     UpgradePerNode.reportProgress(
         "[INFO][HDFS][ClientSmoke] CLI test for HDFS started ")
     if not cls._SmokeInputDir:
         cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
     SmokeOutputDir = cls._base_hdfs_dir + '/smokeHdfsOutput_cli'
     HDFS.deleteDirectory(SmokeOutputDir,
                          Config.get('hadoop', 'HADOOPQA_USER'))
     jobCmd = 'jar %s wordcount \"-Dmapreduce.reduce.input.limit=-1\" \"-D%s=%s\" %s %s' % (
         Config.get('hadoop',
                    'HADOOP_EXAMPLES_JAR'), "mapred.job.queue.name",
         cls._queue, cls._SmokeInputDir, SmokeOutputDir)
     exit_code, stdout = Hadoop.run(jobCmd, env=env)
     ruAssert("HDFS", exit_code == 0, "[ClientSmoke] Hdfs smoketest failed")
     HDFS.deleteDirectory(SmokeOutputDir)
     ruAssert("HDFS", exit_code == 0,
              "[ClientSmoke] could not delete: " + SmokeOutputDir)
     UpgradePerNode.reportProgress(
         "[INFO][HDFS][ClientSmoke] CLI test for HDFS Finished ")

示例#10

0

显示文件

文件： ruYarn.py 项目： thakkardharmik/beaver

 def background_job_teardown(cls):
     '''
     Cleanup for long running Yarn job
     '''
     HDFS.deleteDirectory(cls._hdfs_input,
                          user=Config.get('hadoop', 'HADOOPQA_USER'))
     HDFS.deleteDirectory(cls._hdfs_output,
                          user=Config.get('hadoop', 'HADOOPQA_USER'))

示例#11

0

显示文件

 def background_job_setup(cls, runSmokeTestSetup=True, config=None):
     '''
     Setup for background long running job
     :param runSmokeTestSetup: Runs smoke test setup if set to true
     '''
     logger.info("runSmokeTestSetup = %s, config = %s", runSmokeTestSetup,
                 config)
     HDFS.createDirectory(cls.HDFS_CLUSTER_INPUT_DIR)

示例#12

0

显示文件

 def background_job_teardown(cls):
     '''
     Cleanup of HDFS background job
     '''
     HDFS.deleteDirectory(cls._base_hdfs_dir)
     command = "rm -rf " + cls._lgStructureDir
     exit_code, stdout = Machine.runas(Machine.getAdminUser(), command,
                                       None, None, None, "True",
                                       Machine.getAdminPasswd())

示例#13

0

显示文件

文件： spark_longRunning_event_generator.py 项目： thakkardharmik/beaver

    def run(self):
        """
        Move files to HDFS Input Dir after each interval period for n times.
        """
        for count in range(0, self.times):
            text = "hello world \n Testing HDFS Word count Spark application"
            random_name = ''.join(
                random.choice(string.lowercase) for i in range(5))
            filename = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                    random_name)
            util.writeToFile(text, filename, isAppend=False)
            max_retry = 3
            count = 0
            while count < max_retry:
                try:
                    if "hdfs://ns2" in self.hdfs_input_dir:
                        cp_status = HDFS.copyFromLocal(filename,
                                                       "hdfs://ns2/tmp",
                                                       enableDebug=True)
                    else:
                        cp_status = HDFS.copyFromLocal(filename,
                                                       "/tmp",
                                                       enableDebug=True)
                    assert cp_status[
                        0] == 0, "Failed to copy file to HDFS 'tmp'"
                    logger.info("copyFromLocal command finished for %s" %
                                filename)
                    if "hdfs://ns2" in self.hdfs_input_dir:
                        mv_status = HDFS.mv(None,
                                            "hdfs://ns2/tmp/" + random_name,
                                            self.hdfs_input_dir,
                                            config=None)
                    else:
                        mv_status = HDFS.mv(None,
                                            "/tmp/" + random_name,
                                            self.hdfs_input_dir,
                                            config=None)
                    assert mv_status[
                        0] == 0, "Failed to move file from 'tmp' to test directory"
                except:
                    if count < max_retry:
                        count = count + 1
                        logger.info(
                            "File copy into HDFS test directory failed after %s attempts, retrying after 120s sleep interval"
                            % count)
                        time.sleep(120)
                    else:
                        logger.error(
                            "Failed to copy file into HDFS test directory, expect failures in HDFSWordCOunt"
                        )
                else:
                    break

            logger.info("%s moved to %s" % (filename, self.hdfs_input_dir))
            logger.info("sleeping for %s seconds" % self.interval)
            time.sleep(self.interval)

示例#14

0

显示文件

文件： ruSqoop.py 项目： thakkardharmik/beaver

 def generate_test_data(cls, hdfs_test_dir, num_of_rows):
     test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                   "sqooptest.dat")
     f = open(test_data_file, 'w')
     userid = 100000
     for i in xrange(num_of_rows):
         f.write("%d,%d\n" % (userid + i, random.randint(10, 80)))
     f.close()
     HDFS.createDirectory(hdfs_test_dir,
                          user=cls._hdfs_user,
                          perm='777',
                          force=True)
     HDFS.copyFromLocal(test_data_file, hdfs_test_dir)

示例#15

0

显示文件

 def background_job_teardown(cls):
     '''
     Cleanup directories for long running tez job
     '''
     for input in cls._hdfsInputList:
         HDFS.deleteDirectory(input)
     for output in cls._hdfsOutputList:
         HDFS.deleteDirectory(output)
     Machine.rm(user=HADOOPQA_USER,
                host=None,
                filepath=LOCAL_WORK_DIR,
                isdir=True)
     logger.info("**** Completed background job teardown for Tez ****")

示例#16

0

显示文件

文件： ruYarn.py 项目： thakkardharmik/beaver

 def stopYarnLongRunningJob(cls):
     '''
     Stop Long running Yarn Dshell Job
     '''
     logger.info("**** Touch the file ****")
     HDFS.createDirectory(cls._multi_version_signal_file_dir,
                          user=None,
                          perm="777",
                          force=False)
     multi_version_signal_file_path = cls._multi_version_signal_file_dir + "/signal"
     HDFS.touchz(multi_version_signal_file_path)
     #YARN.waitForApplicationFinish(cls._background_job_appId)
     time.sleep(2)
     logger.info("**** Done checking status ****")

示例#17

0

显示文件

def getKnoxHDFSURL(nameservice2=False):
    KNOX_HOST = Config.get('knox', 'KNOX_HOST').split(',')[0]
    if HDFS.isFederated():
        if nameservice2:
            baseUrl = "https://%s:8443/gateway/ui_%s/hdfs/" % (
                KNOX_HOST, HDFS.getNameServices()[1])
        else:
            baseUrl = "https://%s:8443/gateway/ui_%s/hdfs/" % (
                KNOX_HOST, HDFS.getNameServices()[0])
    else:
        baseUrl = "https://%s:8443/gateway/ui/hdfs/" % KNOX_HOST
    baseUrlWithNNHost = "%s?host=%s" % (baseUrl, getNameNodeURL(nameservice2))
    logger.info("URL being returned is - %s" % baseUrlWithNNHost)
    return baseUrl, baseUrlWithNNHost

示例#18

0

显示文件

    def background_job_setup(cls, runSmokeTestSetup=True, config=None):
        '''
        Create 5 input datasets for TestOrderedWordCount
        :param runSmokeTestSetup: Runs smoke test setup if set to true
        '''
        logger.info("*** Start background job setup for Tez ***")
        Machine.rm(user=HADOOPQA_USER,
                   host=None,
                   filepath=LOCAL_WORK_DIR,
                   isdir=True)
        os.mkdir(LOCAL_WORK_DIR)
        for i in range(0, 4, 1):
            inputDirName = "HDFS_INPUT%d" % i
            inputDirPath = os.path.join(LOCAL_WORK_DIR, inputDirName)
            HadoopJobHelper.runCustomWordWriter(LOCAL_WORK_DIR, inputDirPath,
                                                10, 400, 10000)

            hdfsInputDir = "/user/%s/Input%d" % (HADOOPQA_USER, i)
            hdfsOutputDir = "/user/%s/output%d" % (HADOOPQA_USER, i)

            #In case already present, delete the input directory
            HDFS.deleteDirectory(hdfsInputDir)

            HDFS.createDirectory(hdfsInputDir)
            HDFS.deleteDirectory(hdfsOutputDir)

            HDFS.copyFromLocal(inputDirPath, hdfsInputDir)
            cls._hdfsInputList.append(hdfsInputDir + "/" + inputDirName)
            cls._hdfsOutputList.append(hdfsOutputDir)
            logger.info("Created data for input %d", i)
        logger.info("*** End background job setup for Tez ***")

示例#19

0

显示文件

    def ru_downgrade_state(cls):
        '''
        Downgrades Namenode
        A downgrade is done - may need to convert state to previous version or state is compatible - again upgrade is being abandoned
        NOTE: this command will not return until namenode shuts down
        '''
        command = "sudo su - -c 'hadoop namenode -rollingUpgrade downgrade' hdfs"
        if HDFS.isHAEnabled():
            nodes = []
            nodes.append(HDFS.getNamenodeByState('standby'))
            nodes.append(HDFS.getNamenodeByState('active'))
            for node in nodes:
                HDFS.resetNamenode('stop', host=node)
                (exitcode, stdout) = Machine.runas(Machine.getAdminUser(),
                                                   command, node, None, None,
                                                   "True",
                                                   Machine.getAdminPasswd())
                ruAssert(
                    "HDFS", exitcode == 0,
                    "[NNDowngrade] hadoop namenode -rollingUpgrade downgrade command failed"
                )
            return

        HDFS.stopNamenode()
        node = HDFS.getNamenode()
        (exitcode, stdout) = Machine.runas(Machine.getAdminUser(), command,
                                           node, None, None, "True",
                                           Machine.getAdminPasswd())
        ruAssert(
            "HDFS", exitcode == 0,
            "[NNDowngrade] hadoop namenode -rollingUpgrade downgrade command failed"
        )

示例#20

0

显示文件

 def createState4Rollback1(cls):
     exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                    "dfs -rm -skipTrash rollback_state1")
     exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                    "dfs -rm -skipTrash rollback_state2")
     exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                    "dfs -rm -skipTrash testFileTr")
     exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                    "dfs -touchz rollback_state1")
     ruAssert("HDFS", exit_code == 0,
              "can't get create file rollback_state1")
     command = "dfs -put " + cls.testFileTr + " testFileTr"
     exit_code, stdout = HDFS.runas(Config.get('hadoop', 'HADOOPQA_USER'),
                                    command)
     ruAssert("HDFS", exit_code == 0, "can't upload" + cls.testFileTr)

示例#21

0

显示文件

 def ensure_all_jns_are_up(cls, nodes):
     # run roll edits
     HDFS.rollEdits()
     time.sleep(5)
     # capture LastAppliedOrWrittenTxId from the NN JMX.
     nn_url = HDFS.getNNWebAppAddress() + '/jmx'
     nn_data = util.getJMXData(nn_url,
                               'Hadoop:service=NameNode,name=NameNodeInfo',
                               'JournalTransactionInfo')
     json_data = json.loads(nn_data)
     last_tx_id = int(json_data['LastAppliedOrWrittenTxId'])
     logger.info(
         '******************** NN LAST TX ID: %s *************************'
         % last_tx_id)
     cls.ensure_jns_have_new_txn(nodes, last_tx_id)

示例#22

0

显示文件

文件： hiveutils.py 项目： thakkardharmik/beaver

def setupSchemaEvolutionDataset():
    logger.info("Setup Schema Evolution dataset")
    HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )
    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)
    hiveServer2Url = str(Hive.getHiveServer2Url())
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-schemaevolution", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0

示例#23

0

显示文件

文件： expressupgrade.py 项目： thakkardharmik/beaver

 def perform_post_upgrade_steps(self):
     if Config.getEnv("HDP_STACK_INSTALLED").lower() == "true":
         from beaver.component.hadoop import Hadoop, HDFS
         from beaver.component.hive import Hive
         COMPONENT = str(self.COMPONENT)
         HDFS_USER = Config.get('hadoop', 'HDFS_USER')
         if 'experiment' in COMPONENT and Hive.isInstalled():
             HIVE_WAREHOUSE_DIR = Hive.getConfigValue(
                 "hive.metastore.warehouse.dir", defaultValue="/apps/hive/warehouse"
             )
             HDFS.chmod(HDFS_USER, 777, HIVE_WAREHOUSE_DIR, True)
         else:
             UpgradeLogger.reportProgress("No additional post-upgrade steps defined for EU", True)
     else:
         logger.info("No additional post-upgrade steps defined for EU on HDF")

示例#24

0

显示文件

文件： ruHive.py 项目： thakkardharmik/beaver

 def verifyLongRunningQuery(cls, file_to_verify):
     lfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), file_to_verify)
     exit_code, stdout = HDFS.copyToLocal(
         cls._hdfs_bgjtest_dir + "/" + file_to_verify, lfile)
     if exit_code != 0:
         logger.info("Error fetching the timestamp file from HDFS")
         return False
     lines = open(lfile, 'r').readlines()
     if len(lines) == 0:
         logger.info("Empty timestamp file")
         return False
     try:
         ts = int(lines[-1])
         # Shutdown gracefully
         if ts == -1:
             return True
         # Timestamp should be less than 5 minutes, which indicates
         # UDF wrote something atleast once in the last 5 minutes
         timegap = time.time() - (ts / 1000)
         if timegap > 300:
             logger.info(
                 "Time gap is %d seconds, last line in the timestamp file was '%d'"
                 % (timegap, ts))
             return False
     except ValueError:
         logger.info("Error parsing last line in the timestamp file => '" +
                     lines[-1] + "'")
         return False
     return True

示例#25

0

显示文件

    def wait4DNLive(cls, node):
        i = 1
        maxTries = 30  # ie 150sec   - note the delay in QE configs for initial BR is 120sec
        logger.info('*** Waiting for DN %s to become live ****' % node)
        while i < maxTries:
            livenodes = HDFS.getDatanodesFromJmx()
            if node in livenodes:
                return True
            # saw strange behavious where the dns were ip addresses sometimes; convert
            livenodesIp = []
            for iNode in livenodes:  # convert to ip addresses
                livenodesIp.append(util.getIpAddress(iNode))
            if node in livenodesIp:
                return True
            logger.info('*** Waiting for DN %s to become live ****' % node)
            logger.info('*** Live nodes list is: %s  %s ****' %
                        (livenodes, livenodesIp))
            time.sleep(5)
            i = i + 1

        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        UpgradePerNode.reportProgress(
            "[WARNING][HDFS][XXX] Datanode %s did not become live after 150 secs of restart, continuing "
            % node)
        return False

示例#26

0

显示文件

文件： ruStorm.py 项目： thakkardharmik/beaver

    def submit_storm_hive_topology(cls, tcId, className, args,
                                   useStandaloneCmd):
        if Hadoop.isSecure():
            if Config.hasOption('machine', 'USER_REALM'):
                user_realm = Config.get('machine', 'USER_REALM', '')
            else:
                nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='')
                atloc = nnKerbPrincipal.find("@")
                if (atloc != -1):
                    user_realm = nnKerbPrincipal[atloc:]
            if user_realm != None:
                args += " " + Machine.getHeadlessUserKeytab(
                    Config.getEnv('USER')) + " " + Config.getEnv(
                        'USER') + '@' + user_realm

        exit_code, stdout = Storm.runStormHdfsTopology(
            TARGET_HIVE_STORM_JAR,
            className,
            args,
            None,
            logoutput=True,
            inBackground=False,
            useStandaloneCmd=useStandaloneCmd)
        logger.info(exit_code)

        ruAssert("Storm", exit_code == 0,
                 "[StormHiveSubmit] %s Failed" % (tcId))

示例#27

0

显示文件

文件： spark_ha.py 项目： thakkardharmik/beaver

    def validate_wordcount_written_to_HDFS(cls,
                                           hdfs_dir,
                                           patterns,
                                           expected_count,
                                           appId=None):
        """
          Validate the wordcount results, written into HDFS directories by a streaming job.
          Use wildcards in the 'hdfs_dir' to recursively read sub-directories.

          :param hdfs_dir: HDFS directory from where contents will be read
          :param patterns: list of words to check
          :param expected_count: the expected number of occurence for each word in the 'patterns'
          :param appId: application ID (Optional parameter)
          :return:
          """
        count = 0
        word_count = {}
        # initialize word_count dictonary
        for p in patterns:
            word_count[p] = 0

        exit_code, cat_content = HDFS.cat(hdfs_dir, logoutput=True)
        assert exit_code == 0, "Could not read from %s, Error: %s, appId: %s" % (
            hdfs_dir, cat_content, appId)
        for line in cat_content:
            words = line.split()
            for word in words:
                if word in word_count.keys():
                    word_count[word] = word_count[word] + 1

        logger.info(word_count)
        for key, value in word_count.iteritems():
            assert value >= expected_count, "%s wordcount is %s. expected_count is %s, appId: %s" % \
                                            (key, value, expected_count, appId)

示例#28

0

显示文件

文件： ruFalcon.py 项目： thakkardharmik/beaver

    def createClusterEntities(cls, colo, desc, name):
        try:
            from beaver.component.falcon import Falcon
        except ImportError:
            ## Import fails when Falcon is not installed on this machine. Nothing to do
            return

        from beaver.component.hadoop import Hadoop, HDFS, YARN
        write_endpoint = Hadoop.getFSDefaultValue()
        webhdfs_scheme = 'webhdfs'
        if HDFS.isHttpsEnabled():
            webhdfs_scheme = 'swebhdfs'
        read_endpoint = '%s://%s:%s' % (
            webhdfs_scheme, write_endpoint.split('/')[2].split(':')[0],
            HDFS.getNNWebPort())
        execute_endpoint = YARN.getResourceManager()
        falconNode = Falcon.get_falcon_server()

        from beaver.component.oozie import Oozie
        oozieUrl = Oozie.getOozieUrl()
        entityText = "<?xml version=\"1.0\"?>" \
                     "<cluster colo=\"" + colo + "\" description=\"" + desc + "\" name=\"" + name + "\" " \
                     "xmlns=\"uri:falcon:cluster:0.1\"> " \
                        "<interfaces> " \
                            "<interface type=\"readonly\" endpoint=\""+read_endpoint+"\" version=\"0.20.2\"/> " \
                            "<interface type=\"write\" endpoint=\""+write_endpoint+"\" version=\"0.20.2\"/> " \
                            "<interface type=\"execute\" endpoint=\"" + execute_endpoint + "\" version=\"0.20.2\"/> " \
                            "<interface type=\"workflow\" endpoint=\"" + oozieUrl + "\" version=\"3.1\"/>" \
                            "<interface type=\"messaging\" endpoint=\"" \
                                "tcp://" + falconNode + ":61616?daemon=true\" version=\"5.1.6\"/>" \
                        "</interfaces>" \
                        "<locations>" \
                            "<location name=\"staging\" path=\"/apps/falcon/" + name + "/staging\" />" \
                            "<location name=\"temp\" path=\"/tmp\" />" \
                            "<location name=\"working\" path=\"/apps/falcon/" + name + "/working\" />" \
                        "</locations>" \
                        "<ACL owner=\"" + cls._job_user + "\" group=\"users\" permission=\"0755\"/>"
        if Hadoop.isSecure():
            realm = HDFS.getConfigValue(
                'dfs.namenode.kerberos.principal').split('@')[1]
            entityText += "<properties> <property name=\"dfs.namenode.kerberos.principal\" value=\"nn/_HOST@" + realm + "\"/> </properties>"
        entityText += "</cluster>"
        textFile = open(os.path.join(cls._local_workspace, name + ".xml"), "w")
        textFile.write("%s" % entityText)
        textFile.close()

        return

示例#29

0

显示文件

 def smoke_test_setup(cls):
     '''
     Setup function for HDFS smoke test
     '''
     if not cls._SmokeInputDir:
         cls._SmokeInputDir = cls._base_hdfs_dir + "/smokeHdfsInput"
     HDFS.deleteDirectory(cls._SmokeInputDir,
                          Config.get('hadoop', 'HADOOPQA_USER'))
     jobCmd = 'jar %s randomtextwriter \"-D%s=%s\" \"-D%s=%s\" %s' % (
         Config.get('hadoop', 'HADOOP_EXAMPLES_JAR'),
         "mapreduce.randomtextwriter.totalbytes", "4096",
         "mapred.job.queue.name", cls._queue, cls._SmokeInputDir)
     exit_code, stdout = Hadoop.run(jobCmd)
     ruAssert(
         "HDFS", exit_code == 0,
         '[SmokeSetup] Randomtextwriter job failed and could not create data on hdfs'
     )

示例#30

0

显示文件

文件： hiveutils.py 项目： thakkardharmik/beaver

def setupTableauDataset():
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau")
    DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data')
    SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0')
    HIVE_TABLES = [
        'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI',
        'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy'
    ]
    TABLEAU_TEST_DIR = "/user/hrt_qa/tableau"
    DATABASE_NAME = 'tableau'

    logger.info("Setup Tableau dataset")

    if not os.path.exists(LOCAL_DATA_DIR):
        TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ)
        Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create test directory on hdfs to store tableau data files")
    HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    logger.info("create tableau database before creating tables")
    Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME)
    Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME)

    for tbl in HIVE_TABLES:
        hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl
        hdfsFile = hdfsDir + '/%s' % tbl
        localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl)
        sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl)

        logger.info("create directory for %s table" % tbl)
        exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True)
        assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl

        logger.info("copy file for table %s to hdfs" % tbl)
        exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile)
        assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl

        logger.info("create %s table " % tbl)
        # thing-to-do Modify Hive.runQueryonBeeline to accept query file name
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(
            ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True
        )
        assert exit_code == 0, '%s table creation failed' % tbl

示例#31

0

显示文件

文件： test_cleanUpOfFiles.py 项目： sds-logigear/LogigearSource

def insertFileIntoHdfs(fileName):
    pathFileName = '/user/' + HADOOPQA_USER + '/' + fileName    
    if (not(HDFS.fileExists(pathFileName))):
        sourceFile = DATA_PATH + '/' + fileName
        destFile = '/user/' + HADOOPQA_USER + '/' + fileName
        putCmd = "dfs -put " + sourceFile + ' ' + destFile
        out = Hadoop.run(putCmd)
        return out

示例#32

0

显示文件

 def HDFS_getGateway(cls, logoutput=True):
     try:
         from beaver.component.hadoop import HDFS
         return HDFS.getGateway()
     except Exception:
         if logoutput:
             logger.error("Exception occured during HDFS_getGateway() call")
             logger.error(traceback.format_exc())
         return None

示例#33

0

显示文件

文件： reportutil.py 项目： sds-logigear/LogigearSource

def generateTestReportConf(infile, outfile, results):
    config = ConfigParser()
    config.optionxform=str
    config.read(infile)
    if config.has_section(SECTION):
        for option, value in config.items(SECTION):
            if value != "": continue
            elif option == "BUILD_ID" and config.has_option(SECTION, "REPO_URL"):
                config.set(SECTION, option, getBuildId(config.get(SECTION, "REPO_URL")))
                config.remove_option(SECTION, "REPO_URL")
            elif option == "HOSTNAME":
                config.set(SECTION, option, socket.getfqdn())
            elif option == "COMPONENT_VERSION":
                if not config.has_option(SECTION, "COMPONENT") or config.get(SECTION, "COMPONENT") == "":
                    config.set(SECTION, "COMPONENT", "Hadoop")
                config.set(SECTION, option, getComponentVersion(config.get(SECTION, "COMPONENT")))
            elif option == "OS":
                config.set(SECTION, option, platform.platform())
            elif option == "SECURE" and Config.hasOption('hadoop', 'IS_SECURE'):
                config.set(SECTION, option, Config.get('hadoop', 'IS_SECURE').lower())
            elif option == "BLOB":
                pass
            elif option == "RAN":
                config.set(SECTION, option, results[0] + len(results[1]))
            elif option == "PASS":
                config.set(SECTION, option, results[0])
            elif option == "FAIL":
                config.set(SECTION, option, len(results[1]))
            elif option == "SKIPPED":
                config.set(SECTION, option, results[2])
            elif option == "ABORTED":
                config.set(SECTION, option, results[3])
            elif option == "FAILED_TESTS":
                config.set(SECTION, option, ",".join(results[1]))
            elif option == "SINGLE_NODE":
                from beaver.component.hadoop import HDFS
                if HDFS.getDatanodeCount() > 1:
                    config.set(SECTION, option, "false")
                else:
                    config.set(SECTION, option, "true")
        config.write(open(outfile, 'w'))