Exemplos de Hive em Python, exemplos de beaver.component.hive.Hive em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: oozie.py Projeto: thakkardharmik/beaver

    def updateJobProperties(cls,
                            propFile,
                            properties=None,
                            haEnabled=False,
                            debug=False):
        fileSystemName = Hadoop.getFSDefaultValue()
        jobTrackerIP = MAPRED.getJobtrackerAddress()
        jobTracker = jobTrackerIP[0] + ":" + jobTrackerIP[1]

        if not properties:
            properties = {}
        if not properties.has_key('nameNode'):
            properties['nameNode'] = fileSystemName
        if not properties.has_key('jobTracker'):
            properties['jobTracker'] = jobTracker

        if "hcatalog" in propFile:
            if Hadoop.isSecure():
                kerberosPrincipal = Hive.getConfigValue(
                    "hive.metastore.kerberos.principal")
                properties[
                    'hive.metastore.kerberos.principal'] = kerberosPrincipal

            logger.info("Updating for hcatalog workflow")
            hcatNode = Hive.getConfigValue("hive.metastore.uris").replace(
                'thrift', 'hcat')
            logger.info("Hcat node is " + hcatNode)
            properties['hcatNode'] = hcatNode

        if Hadoop.isSecure():
            # determine the namenode and the jobtracker principal
            nnPrincipal = None
            if haEnabled:
                nnPrincipal = HDFS.getNameNodePrincipal().replace(
                    '_HOST', HDFS.getNamenodeByState('active'))
            else:
                nnPrincipal = HDFS.getNameNodePrincipal().replace(
                    '_HOST',
                    HDFS.getNamenodeHttpAddress()[0])
            jtPrincipal = MAPRED.getMasterPrincipal().replace(
                '_HOST', jobTrackerIP[0])
            properties['dfs.namenode.kerberos.principal'] = nnPrincipal
            properties['mapreduce.jobtracker.kerberos.principal'] = jtPrincipal

        wfPath = util.getPropertyValueFromFile(propFile,
                                               "oozie.wf.application.path")
        if wfPath != None and wfPath.find("hdfs://localhost:9000") != -1:
            wfPath = wfPath.replace("hdfs://localhost:9000", fileSystemName)
            logger.info("Value of replaced oozie.wf.application.path is " +
                        wfPath)
            properties['oozie.wf.application.path'] = wfPath

        util.writePropertiesToFile(propFile, propFile, properties)

        if debug:
            logger.info('Content of properties file %s' % propFile)
            f = open(propFile, 'r')
            # print the file to the console
            logger.info(f.read())
            f.close()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def background_job_when_master_upgrade(cls):
        '''
        Start a background application which runs while component master service gets upgraded
        :return:
        '''
        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started"
        )

        logger.info("Creating hive tables for short background jobs")
        query = "drop table if exists shortlr_hive_verify;\n"
        query += "create table shortlr_hive_verify (userid string, age int);\n"
        query += "drop table if exists shortlr_bline_verify;\n"
        query += "create table shortlr_bline_verify (userid string, age int);\n"
        query += "drop table if exists shortlr_bline_verify;\n"
        query += "create table shortlr_bline_verify (userid string, age int);\n"
        short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                             'shortlrsetup.sql')
        util.writeToFile(query, short_bgjob_setupfile)

        exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile)
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d"
                % exit_code)

        logger.info("Running the Background Job when upgrading Hive")
        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Long running job for Hive component upgrades started"
        )

        setqueue = ""
        if Hive.isTezEnabled():
            setqueue = "set tez.queue.name=%s; " % cls._yarn_queue
        else:
            setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue

        logger.info("**** Running Hive CLI Test ****")
        query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl
        cls._shortbgj_hive_process = Hive.runQuery(query, background=True)

        # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted
        time.sleep(10)

        logger.info("**** Running Beeline CLI Test ****")
        query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl
        cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query,
                                                             readFromFile=True,
                                                             background=True)

        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished"
        )

Exemplo n.º 3

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupHS2ConcurrTestData(stdauth=True):
    # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session.
    # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2
    changes = {
        'hive-site.xml': {
            'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
            'hive.support.concurrency': 'true',
            'hive.compactor.initiator.on': 'true',
            'hive.compactor.worker.threads': '3',
            'hive.compactor.check.interval': '10',
            'hive.timedout.txn.reaper.interval': '20s'
        },
        'hiveserver2-site.xml': {
            'hive.compactor.initiator.on': 'false',
            'hive.exec.dynamic.partition.mode': 'nonstrict'
        }
    }
    if not Hive.isHive2():
        changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true'
    else:
        changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false'
        changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager'
        changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true'
    Hive.modifyConfig(changes)
    time.sleep(60)
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    test_user = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address")
    query = """drop table if exists student;
create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
drop table if exists voter;
create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
drop table if exists customer_address;
create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address';
drop table if exists customer_address_partitioned;
create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true');
insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;"""
    if stdauth:
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;"
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"

Exemplo n.º 4

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def verifyLogMessageInServiceLog(text, service, timestamp=0, dateTimeFormat=None):
    '''
  Returns true when given log message appears in service log
  '''
    hiveLog = Hive.getServiceLog(service)
    if not hiveLog or not text:
        return None
    hiveHost = Hive.getHiveHost(service)
    destlog = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp-%d.log' % int(999999 * random.random()))
    Machine.copyToLocal(None, hiveHost, hiveLog, destlog)
    return util.findMatchingPatternInFileAfterTimestamp(destlog, text, timestamp, dateTimeFormat=dateTimeFormat)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def startLLAPWithChaosMonkey(interval='300'):
    hive_changes = {'tez-site.xml': {'tez.am.task.max.failed.attempts': '0'}}
    Hive.modifyConfig(hive_changes, services=['hiveserver2'])

    AMBARI_AGENT_TMP_DIR = '/var/lib/ambari-agent/tmp'
    ARTIFACTS_DIR = Config.getEnv('ARTIFACTS_DIR')
    LLAP_START_USER = Config.get('hive', 'HIVE_USER')
    dirs = [
        name for name in os.listdir(AMBARI_AGENT_TMP_DIR) if os.path.isdir(os.path.join(AMBARI_AGENT_TMP_DIR, name))
    ]

    llap_dirs = []
    for dir in dirs:
        if dir.startswith('llap-slider'): llap_dirs.append(dir)

    if len(llap_dirs) < 1:
        logger.info("Could not find llap dir under %s" % AMBARI_AGENT_TMP_DIR)
        Hive.startService(services=['hiveserver2'])
    else:
        llap_dir = llap_dirs[-1]

        resourceConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'resources.json')
        tmpResourceConfig = os.path.join(ARTIFACTS_DIR, 'resources.json')
        propertyMap = [(["components", "LLAP"], {"yarn.container.failure.threshold": "1000"})]
        util.writePropertiesToConfigJSONFileMulti(resourceConfig, tmpResourceConfig, propertyMap)
        Machine.copy(tmpResourceConfig, resourceConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd())

        appConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'appConfig.json')
        tmpAppConfig = os.path.join(ARTIFACTS_DIR, 'appConfig.json')
        propertyMap = [
            (
                ["global"], {
                    "internal.chaos.monkey.probability.containerfailure": "10000",
                    "internal.chaos.monkey.interval.seconds": interval,
                    "internal.chaos.monkey.enabled": "True"
                }
            )
        ]
        util.writePropertiesToConfigJSONFileMulti(appConfig, tmpAppConfig, propertyMap)
        Machine.copy(tmpAppConfig, appConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd())

        llapShellScript = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'run.sh')
        exit_code, stdout = Machine.runas(LLAP_START_USER, llapShellScript)
        if exit_code != 0: logger.info("LLAP Shell Script failed to run successfully with %d" % exit_code)

        for i in range(10):
            time.sleep(30)
            logger.info("@%d: Check if LLAP cluster is successfully deployed" % i)
            exit_code, stdout = Machine.runas(LLAP_START_USER, 'slider status llap0')
            if exit_code == 0:
                break
            elif i == 9:
                logger.info("LLAP cluster failed to deploy")

Exemplo n.º 6

0

Exibir arquivo

Arquivo: expressupgrade.py Projeto: thakkardharmik/beaver

 def perform_post_upgrade_steps(self):
     if Config.getEnv("HDP_STACK_INSTALLED").lower() == "true":
         from beaver.component.hadoop import Hadoop, HDFS
         from beaver.component.hive import Hive
         COMPONENT = str(self.COMPONENT)
         HDFS_USER = Config.get('hadoop', 'HDFS_USER')
         if 'experiment' in COMPONENT and Hive.isInstalled():
             HIVE_WAREHOUSE_DIR = Hive.getConfigValue(
                 "hive.metastore.warehouse.dir", defaultValue="/apps/hive/warehouse"
             )
             HDFS.chmod(HDFS_USER, 777, HIVE_WAREHOUSE_DIR, True)
         else:
             UpgradeLogger.reportProgress("No additional post-upgrade steps defined for EU", True)
     else:
         logger.info("No additional post-upgrade steps defined for EU on HDF")

Exemplo n.º 7

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupHS2ConcurrencyDataset():
    logger.info("Setup test data")
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    query = """drop table if exists student_txt;
        create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
        drop table if exists voter_txt;
        create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
        drop table if exists student;
        create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        drop table if exists voter;
        create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        Insert into table student select * from student_txt;
        Insert into table voter select * from voter_txt;"""

    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"

Exemplo n.º 8

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupAcidDataset(testsuite, LOCAL_DIR):
    ddl_location = None
    if testsuite == 'acid':
        ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-tablesetup.sql")
    elif testsuite == 'unbucketed':
        ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-unbucketed-tablesetup.sql")
    else:
        assert 1 == 0, "The testsuite passed in not correct. Please use value 'acid' or 'unbuckted'"
    # change timezone on test machines
    Machine.resetTimeZoneOnCluster()

    # Download TPCH acids data
    tpch_newdata_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G")
    TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz")
    if not os.path.isfile(TPCH_STAGE_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ)
        Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR)

    # Load the acid tables in Hive
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/lineitem_acid", user=HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(tpch_newdata_dir, "lineitem*"), "/tmp/lineitem_acid", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/lineitem_acid", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        ddl_location, hivevar={'HDFS_LOCATION': '/tmp'}, logoutput=True, queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH acid data in Hive"

Exemplo n.º 9

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def getJobAndAppIds(text):
    '''
    getJobAndAppIds
      text - Text from which to get the application and the job id
  '''
    ids = []
    # pattern to look for is different when tez is enabled.
    if Hive.isTezEnabled():
        # For this method to be backward compatible, we need to check for 2 patterns
        # The following pattern is applicable for pre-champlain releases.
        pattern = 'Status: Running \(application id: (.*)\)'
        for line in re.finditer(pattern, text):
            # with tez we only get the application id
            ids.append({'application': line.group(1)})
        # The following pattern is applicable for champlain and above release.
        if len(ids) == 0:
            pattern = 'Status: Running \(Executing on YARN cluster with App id (.*)\)'
            for line in re.finditer(pattern, text):
                # with tez we only get the application id
                ids.append({'application': line.group(1)})
    else:
        pattern = 'Starting Job = (.*), Tracking URL = h.*://.*:?\d+?/proxy/(.*)/'
        for line in re.finditer(pattern, text):
            ids.append({'job': line.group(1), 'application': line.group(2)})
    return ids

Exemplo n.º 10

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupMondrianDataset():
    DATABASE_NAME = 'foodmart'
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), DATABASE_NAME)
    FOODMART_DDL = os.path.join(LOCAL_DATA_DIR, "foodmart.ddl")
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')

    logger.info("Setup Mondrian dataset")
    if not os.path.exists(LOCAL_DATA_DIR):
        MONDRIAN_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'MONDRIAN_DATASET'), MONDRIAN_DATA_TGZ)
        Machine.tarExtractAll(MONDRIAN_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create foodmart database and tables")
    HDFS.createDirectory("/tmp/mondrian", HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(LOCAL_DATA_DIR, "/tmp/mondrian", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/mondrian", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        FOODMART_DDL,
        hivevar={
            'DB': 'foodmart',
            'LOCATION': '/tmp/mondrian/foodmart'
        },
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Unable to deploy foodmart dataset"

Exemplo n.º 11

0

Exibir arquivo

Arquivo: ruStorm.py Projeto: thakkardharmik/beaver

    def getHiveQueryOutput(cls,
                           query,
                           willRunMR=True,
                           delim=",",
                           useStandaloneCmd=True):
        from beaver.component.hive import Hive

        hiveconf = {}
        if willRunMR:
            hiveconf = {
                'hive.input.format':
                'org.apache.hadoop.hive.ql.io.HiveInputFormat',
                'hive.vectorized.execution.enabled': 'false',
                'hive.txn.manager':
                'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
                'hive.support.concurrency': 'true'
            }

        exit_code, stdout, stderr = Hive.runQuery(
            cls.get_set_queue_cmd(useStandaloneCmd) + query,
            hiveconf=hiveconf,
            stderr_as_stdout=False)
        ruAssert("Storm", exit_code == 0,
                 "[HiveQueryOutput] Failed to run Hive query [%s]" % query)
        return stdout.replace('\t', delim)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def background_job_teardown(cls):
        '''
        Cleanup for long running Hive jobs
        '''
        from beaver.component.hive import Hive

        logger.info(
            "Make sure to switch the HiveServer2 to use the default port")
        adminUser = Machine.getAdminUser()
        hiveHost = Hive.getHiveHost()
        for port in cls._hs2_live_ports:
            pid = Machine.getPIDByPort(port, host=hiveHost, user=adminUser)
            if pid:
                Machine.killProcessRemote(pid, host=hiveHost, user=adminUser)
                time.sleep(2)
        if len(cls._hs2_live_ports) > 0:
            Hive.startService(services=["hiveserver2"])

Exemplo n.º 13

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupMergeScaleDataset(LOCAL_DIR):
    # change timezone on test machines
    Machine.resetTimeZoneOnCluster()

    # Download the TPCH dataset if not there
    tpch_data_dir = os.path.join(LOCAL_DIR, "data")
    TPCH_DATA_TGZ = os.path.join(LOCAL_DIR, "tpch_data.tgz")
    if not os.path.isfile(TPCH_DATA_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_DNLD_URL'), TPCH_DATA_TGZ)
        Machine.tarExtractAll(TPCH_DATA_TGZ, LOCAL_DIR)

    # Load the tables in Hive
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/tpch", user=HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(tpch_data_dir, "/tmp/tpch", user=HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/tpch", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        os.path.join(LOCAL_DIR, "ddl", "merge-tpch-tablesetup.sql"),
        hivevar={'HDFS_LOCATION': '/tmp/tpch/data'},
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH data in Hive"

    # Download TPCH staging data
    tpch_stage_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G")
    TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz")
    if not os.path.isfile(TPCH_STAGE_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ)
        Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR)

    # Load the staged tables in Hive
    HDFS.createDirectory(
        "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", user=HADOOPQA_USER, perm='777', force=True
    )
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "lineitem*"), "/tmp/lineitem_stage", HADOOPQA_USER)
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "order*"), "/tmp/orders_stage", HADOOPQA_USER)
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "delete*"), "/tmp/delete_stage", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        os.path.join(LOCAL_DIR, "ddl", "merge-staged-tpch-tablesetup.sql"),
        hivevar={'HDFS_LOCATION': '/tmp'},
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH staging data in Hive"

Exemplo n.º 14

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupTableauDataset():
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau")
    DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data')
    SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0')
    HIVE_TABLES = [
        'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI',
        'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy'
    ]
    TABLEAU_TEST_DIR = "/user/hrt_qa/tableau"
    DATABASE_NAME = 'tableau'

    logger.info("Setup Tableau dataset")

    if not os.path.exists(LOCAL_DATA_DIR):
        TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ)
        Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create test directory on hdfs to store tableau data files")
    HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    logger.info("create tableau database before creating tables")
    Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME)
    Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME)

    for tbl in HIVE_TABLES:
        hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl
        hdfsFile = hdfsDir + '/%s' % tbl
        localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl)
        sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl)

        logger.info("create directory for %s table" % tbl)
        exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True)
        assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl

        logger.info("copy file for table %s to hdfs" % tbl)
        exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile)
        assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl

        logger.info("create %s table " % tbl)
        # thing-to-do Modify Hive.runQueryonBeeline to accept query file name
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(
            ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True
        )
        assert exit_code == 0, '%s table creation failed' % tbl

Exemplo n.º 15

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def grantPrivilegesToUsersOnTable(users, tableName, privilege="all"):
    query = ""
    for user in users:
        query += "grant %s on table %s to user %s with grant option;\n" % (privilege, tableName, user)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Failed to grant privilege [%s] on table [%s] to users [%s]" % (
        privilege, tableName, ",".join(users)
    )

Exemplo n.º 16

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupTPCDSOriginalDataset(CURR_DIR):
    tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds")
    TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_original.tgz")
    hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tpcds_original', 'data')
    tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data')

    downloadDataset(
        tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_ORIGINAL_DNLD_URL'), hdfs_localcopy_dir,
        tpcds_text_data_dir
    )

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )

    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)

    if Machine.type() == 'Windows':
        HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf'

    query_file_1 = os.path.join(CURR_DIR, 'ddl_queries', 'alltables_text.sql')
    query_file_2 = os.path.join(CURR_DIR, 'ddl_queries', 'alltables_orc.sql')
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        query_file_1,
        hivevar={
            'LOCATION': HDFS_TEST_DIR + '/data',
            'DB': 'tpcds_src'
        },
        cwd=CURR_DIR,
        logoutput=True,
        queryIsFile=True
    )
    logger.info("Check if populating the data in Hive for text tables is successful")
    assert exit_code == 0, "Failed to populate the data in Hive"
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        query_file_2, hivevar={
            'FILE': 'ORC',
            'SOURCE': 'tpcds_src'
        }, cwd=CURR_DIR, logoutput=True, queryIsFile=True
    )
    logger.info("Check if populating the data in Hive for ORC tables is successful")
    assert exit_code == 0, "Failed to populate the data in Hive"

Exemplo n.º 17

0

Exibir arquivo

 def Hive_getHiveLogDir(cls, logoutput=True):
     try:
         from beaver.component.hive import Hive
         return Hive.getHiveLogDir(logoutput)
     except Exception:
         if logoutput:
             logger.error(
                 "Exception occured during Hive_getHiveLogDir() call")
             logger.error(traceback.format_exc())
         return None

Exemplo n.º 18

0

Exibir arquivo

Arquivo: flume.py Projeto: thakkardharmik/beaver

 def getDatabaseFlavor(cls):
     dbdriver = Hive.getConfigValue("javax.jdo.option.ConnectionDriverName")
     if ("oracle" in dbdriver):
         return "oracle"
     elif ("postgresql" in dbdriver):
         dbUrl = Hive.getConfigValue("javax.jdo.option.ConnectionURL")
         m = re.search('jdbc:postgresql://(.*):.*', dbUrl)
         dbHost = Machine.getfqdn()
         if m and m.group(1):
             dbHost = m.group(1)
         dbVersion = Machine.getDBVersion('postgres', host=dbHost)
         if dbVersion:
             return "postgres-%s" % dbVersion
         else:
             return "postgres"
     elif ("derby" in dbdriver):
         return "derby"
     elif ("mysql" in dbdriver):
         return "mysql"
     return ""

Exemplo n.º 19

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def doSetup(cls, hdfs_test_dir, tbl_name, num_of_rows, type):

        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        logger.info("Generating test table dataset with %d rows" % num_of_rows)
        test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                      tbl_name + ".dat")
        f = open(test_data_file, 'w')
        userid = 100000
        for i in xrange(num_of_rows):
            for j in range(random.randint(3, 8)):
                f.write("%d|%d\n" % (userid + i, random.randint(10, 80)))
        f.close()

        hdfs_tbl_dir = hdfs_test_dir + "/" + tbl_name
        logger.info("Copying the test dataset to HDFS directory '%s'" %
                    hdfs_tbl_dir)
        HDFS.createDirectory(hdfs_test_dir,
                             user=cls._hdfs_user,
                             perm='777',
                             force=True)
        HDFS.createDirectory(hdfs_tbl_dir, perm='777')
        HDFS.copyFromLocal(test_data_file, hdfs_tbl_dir)
        HDFS.chmod(cls._hdfs_user, '777', hdfs_tbl_dir)

        logger.info("Creating table '%s' and verification tables" % tbl_name)
        query = "drop table if exists %s;\n" % tbl_name
        query += "create external table %s (userid string, age int) row format delimited fields terminated by '|' stored as textfile location '%s';\n" % (
            tbl_name, hdfs_tbl_dir)
        query += "drop table if exists %s_hive_verify;\n" % tbl_name
        query += "create table %s_hive_verify (userid string, age int);\n" % tbl_name
        if type == "Long running":
            for i in range(cls._num_of_webhcat_bgj):
                query += "drop table if exists %s_wh_%d;\n" % (tbl_name, i + 1)
                query += "create table %s_wh_%d (userid string, age int);\n" % (
                    tbl_name, i + 1)
        hivesetupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                     "hivesetup.sql")
        util.writeToFile(query, hivesetupfile)
        exit_code, stdout = Hive.run("-f " + hivesetupfile, logoutput=False)
        if type:
            msg = "%s job setup for Hive component" % type
            if exit_code != 0:
                UpgradePerNode.reportProgress(
                    "[FAILED][Hive][Setup] %s failed due to exitcode = %d" %
                    (msg, exit_code))
            else:
                UpgradePerNode.reportProgress(
                    "[PASSED][Hive][Setup] %s finished successfully" % msg)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupSchemaEvolutionDataset():
    logger.info("Setup Schema Evolution dataset")
    HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )
    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)
    hiveServer2Url = str(Hive.getHiveServer2Url())
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-schemaevolution", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0

Exemplo n.º 21

0

Exibir arquivo

Arquivo: ruStorm.py Projeto: thakkardharmik/beaver

    def get_set_queue_cmd(cls, useStandaloneCmd):
        #For https://hortonworks.jira.com/browse/BUG-27221
        from beaver.component.hive import Hive
        if useStandaloneCmd == True:
            YARN_QUEUE = "storm"
        else:
            YARN_QUEUE = "storm-slider"

        if Hive.isTezEnabled():
            # this wont work because when hive CLI starts hive does not know queues that are not set in hive-site.xml.
            # See Deepesh email on 10/14/2014.
            setqueue = "set tez.queue.name=%s; " % YARN_QUEUE
        else:
            setqueue = "set mapred.job.queue.name=%s; " % YARN_QUEUE
        return setqueue

Exemplo n.º 22

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupTestData(stdauth=True):
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hive-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hive-simple-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HIVE_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    HDFS.createDirectory("/tmp/hs2data", user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    query = """drop table if exists student;
create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
drop table if exists voter;
create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';"""
    if stdauth:
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;"
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"

Exemplo n.º 23

0

Exibir arquivo

Arquivo: ruStorm.py Projeto: thakkardharmik/beaver

    def tear_down_hive_topology(cls, topologyName, useStandaloneCmd):
        """
        tear down hbase topology.
        """
        from beaver.component.hive import Hive

        Machine.rm(user=None,
                   host="localhost",
                   filepath=LOCAL_HIVE_WORK_DIR,
                   isdir=True,
                   passwd=None)

        Storm.killTopology(topologyName,
                           logoutput=True,
                           useStandaloneCmd=useStandaloneCmd)
        #Hive.restoreConfig(services=['metastore'])
        drop_table_q = "use %s; drop table if exists %s; " % (DATABASE_NAME,
                                                              HIVE_TABLE_NAME)
        exit_code, stdout = Hive.runQuery(
            cls.get_set_queue_cmd(useStandaloneCmd) + drop_table_q)
        ruAssert("Storm", exit_code == 0)

Exemplo n.º 24

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def doBackgroundJobSetup(cls, hdfs_test_dir):

        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        logger.info("Preparing the test setup for Hive background job")
        udfjar = os.path.join(Config.getEnv('WORKSPACE'), "tests", "hive",
                              "hive-udf", "hive-udfs-0.1.jar")
        HDFS.createDirectory(hdfs_test_dir,
                             user=cls._hdfs_user,
                             perm='777',
                             force=True)
        HDFS.copyFromLocal(udfjar, hdfs_test_dir)
        query = "drop function sleep; create function sleep as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';" % hdfs_test_dir
        exit_code, stdout = Hive.runQuery(query)
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][Setup] Long running failed due to exitcode = %d"
                % exit_code)
        else:
            UpgradePerNode.reportProgress(
                "[PASSED][Hive][Setup] Long running finished successfully")

Exemplo n.º 25

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def setupTPCDSDataset():
    tpcds_data_dir = os.path.join(SRC_DIR, "data", "tpcds")
    TPCDS_DATA_TGZ = os.path.join(tpcds_data_dir, "tpcds_data.tgz")
    hdfs_localcopy_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'data')
    tpcds_text_data_dir = os.path.join(tpcds_data_dir, 'data')

    downloadDataset(
        tpcds_data_dir, TPCDS_DATA_TGZ, Config.get('hive', 'TPCDS_DNLD_URL_HDP3'), hdfs_localcopy_dir,
        tpcds_text_data_dir
    )

    HIVE_TEST_CMD = "-Dhive.use.beeline=true -Dhadoop.home=%s -Dhive.home=%s -Dhcat.home=%s -Dpig.home=%s -Dhbase.home=%s" % (
        HADOOP_HOME, HIVE_HOME, HCATALOG_HOME, PIG_HOME, HIVE_HOME
    )

    if Hadoop.isHadoop2():
        HIVE_TEST_CMD += " -Dmapred.home=%s -Dhadoop.conf.dir=%s" % (Config.get('hadoop', 'MAPRED_HOME'), HADOOP_CONF)

    if Machine.type() == 'Windows':
        HIVE_TEST_CMD += ' -Dharness.conf=conf\windows.conf'

    hiveServer2Url = str(Hive.getHiveServer2Url())

    # generate data
    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-tpcds-orc", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0

    exit_code, stdout = Ant.run(HIVE_TEST_CMD + " deploy-tpcds", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url})
    assert exit_code == 0

    exit_code, stdout = Ant.run(
        HIVE_TEST_CMD + " deploy-tpcds-parquet", cwd=SRC_DIR, env={"HIVE_SERVER2_URL": hiveServer2Url}
    )
    assert exit_code == 0

Exemplo n.º 26

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def run_client_smoketest(cls, config=None, env=None):
        '''
        Run Smoke test after upgrading Client
        :param config: Configuration location
        :param env: Set Environment variables
        '''
        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        UpgradePerNode.reportProgress(
            "[INFO][Hive][Smoke] Smoke test for Hive component started")

        setqueue = ""
        if Hive.isTezEnabled():
            setqueue = "set tez.queue.name=%s; " % cls._yarn_queue
        else:
            setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue

        logger.info("**** Running Hive CLI Test ****")
        query = setqueue + " insert overwrite table %s_hive_verify select userid, avg(age) from %s group by userid order by userid; " % (
            cls._smoketest_tbl, cls._smoketest_tbl)
        query += "select count(*) from %s_hive_verify;" % cls._smoketest_tbl
        exit_code, stdout, stderr = Hive.runQuery(query,
                                                  stderr_as_stdout=False)
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed with exit code '%d'"
                % exit_code)
            logger.error(
                "Smoke test for Hive failed with the following error: " +
                stderr)
        elif stdout.find("%d" % cls._num_of_rows_smoke) == -1:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][Smoke] Smoke test for Hive Metastore failed to verify number of rows in output"
            )
            logger.error(
                "Smoke test for Hive failed to find [%d] in output [%s]" %
                (cls._num_of_rows_smoke, stdout))
        else:
            UpgradePerNode.reportProgress(
                "[PASSED][Hive][Smoke] Smoke test for Hive Metastore succeeded"
            )
            logger.info("Smoke test for Hive Metastore succeeded")

        logger.info("**** Running Beeline CLI Test ****")
        query = setqueue + "\ndrop table if exists %s_bline_verify;\n" % cls._smoketest_tbl
        query += "create table %s_bline_verify (userid string, age int);\n" % cls._smoketest_tbl
        query += "insert overwrite table %s_bline_verify select userid, avg(age) from %s group by userid order by userid;\n" % (
            cls._smoketest_tbl, cls._smoketest_tbl)
        query += "select count(*) from %s_bline_verify;\n" % cls._smoketest_tbl
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(query,
                                                           readFromFile=True)
        if exit_code != 0:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed with exit code '%d'"
                % exit_code)
            logger.error(
                "Smoke test for HiveServer2 failed with the following error: "
                + stderr)
        elif stdout.find("%d" % cls._num_of_rows_smoke) == -1:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][Smoke] Smoke test for HiveServer2 failed to verify number of rows in output"
            )
            logger.error(
                "Smoke test for HiveServer2 failed to find [%d] in output [%s]"
                % (cls._num_of_rows_smoke, stdout))
        else:
            logger.info("Smoke test for HiveServer2 succeeded")

        logger.info("**** Running WebHCat Smoke Test ****")
        query = "show tables;"
        webhcatHost = Config.get('templeton',
                                 'TEMPLETON_HOST',
                                 default=Machine.getfqdn())
        webhcatPort = Config.get('templeton',
                                 'TEMPLETON_PORT',
                                 default="50111")
        url = "http://%s:%s/templeton/v1/ddl" % (webhcatHost, webhcatPort)
        params = {'exec': query}
        status_code, stdout = util.curl(url, method='POST', params=params)
        if status_code != 200:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][Smoke] Smoke test for WebHCat failed due to status code = %d"
                % status_code)
        else:
            logger.info("Smoke test for WebHCat succeeded")

        UpgradePerNode.reportProgress(
            "[INFO][Hive][Smoke] Smoke test for Hive component finished")

Exemplo n.º 27

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def switch_master_version(cls, action, version, config=None):
        '''
        Switches Hive master services' version
        :param action: Whether to "upgrade" or "downgrade"
        :param version: Version to be switched to
        :param config: Configuration location
        '''
        from beaver.component.rollingupgrade.ruCommon import hdpSelect
        from beaver.component.hive import Hive

        currentHiveVersion = Hive.getVersion()

        if action == 'upgrade':
            # Backup the database used by the Hive Metastore
            logger.info(
                "Performing backup of the Hive Metastore DB before starting the upgrade"
            )
            Hive.backupMetastoreDB(cls._metastore_backup_file)

        node = Hive.getHiveHost()

        # Stop the old Hive Metastore
        logger.info("Stopping the Hive Metastore")
        Hive.stopService(services=["metastore"])

        # Upgrade Hive Metastore servers to new version
        hdpSelect.changeVersion("hive-metastore", version, node)

        if action == 'upgrade':
            logger.info("Upgrading the Hive metastore schema")
            Hive.upgradeSchema()

        # Restart Hive Metastore servers one at a time
        logger.info("Restarting the Hive Metastore")
        Hive.startService(services=["metastore"])

        # Start new Hive Server 2 instance
        confHS2Port = Hive.getHiveserver2ThriftPort()
        hs2port = util.getNextAvailablePort(node, confHS2Port)

        hdpSelect.changeVersion("hive-server2", version, node)

        Hive.modifyConfig(config,
                          services=['hiveserver2'],
                          restartService=False)
        logger.info(
            "Starting a new HiveServer2 at port '%d' for assisting rolling-upgrade"
            % hs2port)
        if hs2port != confHS2Port:
            changes = {'hive-site.xml': {'hive.server2.thrift.port': hs2port}}
            Hive.modifyConfig(changes,
                              services=["hiveserver2"],
                              restartService=False)
        Hive.startService(services=["hiveserver2"])
        cls._hs2_live_ports = [Hive.getHiveserver2ThriftPort(), hs2port]

        # Deregister the old Hive Server 2 instances
        logger.info("Deregistering the HiveServer2 on version '%s'" %
                    currentHiveVersion)
        Hive.deregisterHiveServer2(version=currentHiveVersion)

        from beaver.component.hcatalog import Hcatalog

        # Stop the old WebHCat server
        logger.info("Stopping the WebHCat server")
        node = Config.get('templeton',
                          'TEMPLETON_HOST',
                          default=Machine.getfqdn())
        webhcatPort = Config.get('templeton',
                                 'TEMPLETON_PORT',
                                 default="50111")
        # Stop the old WebHCat server
        logger.info("Stop the WebHCat server")
        Hcatalog.stop(node)

        # Upgrade WebHCat to the new version
        hdpSelect.changeVersion("hive-webhcat", version, node)

        # Start the WebHCat server
        logger.info("Restarting the WebHCat server")
        newConfDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                  'localWebhcatConf')
        if os.path.exists(newConfDir):
            Hcatalog.start(node, hcat_confdir=newConfDir)
        else:
            Hcatalog.start(node)

Exemplo n.º 28

0

Exibir arquivo

Arquivo: hiveutils.py Projeto: thakkardharmik/beaver

def runJdbcMultiSessionDriver(
        testDir,
        addlClasspath=[],
        connectionUrl=None,
        skippedTests=[],
        addlArgs=[],
        reuseConnections=False,
        testFilter=None,
        logsDir=None,
        queryTimeout=3600
):
    '''
  Run the Hive Jdbc MultiSession Test Driver
  '''
    harnessDir = os.path.join(Config.getEnv('WORKSPACE'), 'datateamtest', 'hive_jdbc_multisession')
    logger.info("Build the TestDriver to run tests")
    exit_code, stdout = Maven.run("clean package", cwd=harnessDir)
    assert exit_code == 0, "Failed to build the test driver"
    classpath = [
        os.path.join(harnessDir, "target", "hive-multisession-test-0.1.jar"),
        Config.get('hadoop', 'HADOOP_CONF')
    ]
    if len(addlClasspath) == 0:
        hiveJdbcDriver = getStandaloneHiveJdbcJar()
        classpath.insert(0, hiveJdbcDriver)
    else:
        classpath = addlClasspath + classpath

    cobert_tool_version = "cobertura-2.1.1"
    COBERTURA_CLASSPTH = os.path.join(
        tempfile.gettempdir(), "coverage-tmp", cobert_tool_version, cobert_tool_version + ".jar"
    )
    if Machine.pathExists(Machine.getAdminUser(), None, COBERTURA_CLASSPTH, Machine.getAdminPasswd()):
        classpath.append(COBERTURA_CLASSPTH)

    args = ["-t " + testDir]
    if connectionUrl is None:
        connectionUrl = Hive.getHiveServer2Url()
    args.append("-c \"%s\"" % connectionUrl)
    if Hadoop.isSecure():
        args.append("-k " + Config.get('machine', 'KEYTAB_FILES_DIR'))
        if Config.hasOption('machine', 'USER_REALM'):
            USER_REALM = Config.get('machine', 'USER_REALM', '')
            args.append("-e USER_REALM=%s" % (USER_REALM))
    args.extend(["--skip %s" % t for t in skippedTests])
    if reuseConnections:
        args.append("--reuseConnections")
    if testFilter:
        args.append("-f " + testFilter)
    from beaver.marker import getMarkerCondition
    markerCondition = getMarkerCondition()
    if markerCondition:
        args.append("-e 'marker=%s'" % markerCondition)
    if not logsDir:
        logsDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "logs_%d" % int(999999 * random.random()))
    args.append("-l " + logsDir)
    if queryTimeout > 0:
        args.append("--queryTimeout %d" % queryTimeout)
    args.extend(addlArgs)
    return Java.runJava(
        Config.getEnv('ARTIFACTS_DIR'),
        "org.apache.hive.jdbc.TestDriver",
        classPath=(os.pathsep).join(classpath),
        cmdArgs=args
    )

Exemplo n.º 29

0

Exibir arquivo

Arquivo: ruHive.py Projeto: thakkardharmik/beaver

    def run_background_job(cls, runSmokeTestSetup=False, config=None):
        '''
        Runs background long running Hive Job
        :param runSmokeTestSetup: Runs smoke test setup if set to true
        :param config: expected configuration location
        :return: Total number of long running jobs started
        '''
        from beaver.component.hive import Hive
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        UpgradePerNode.reportProgress(
            "[INFO][Hive][BGJob] Long running job for Hive component started")

        setqueue = ""
        if Hive.isTezEnabled():
            setqueue = "set tez.queue.name=%s; " % cls._yarn_queue
        else:
            setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue

        logger.info("**** Running Hive CLI Test ****")
        query = setqueue + " create table if not exists hive_cli_lr (a string); select sleep(%d, 2000, 'hdfs://%s/hive_cli_lr', 'hdfs://%s/END') from (select count(*) from hive_cli_lr) a;" % (
            cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir,
            cls._hdfs_bgjtest_dir)
        Hive.runQuery(query, background=True)

        logger.info("**** Running Beeline CLI Test ****")
        # Create the sleep function within the same Beeline session
        # Function created outside of HS2 instance are not picked
        query = setqueue + "\n"
        query += "drop function sleep2;\n"
        query += "create function sleep2 as 'org.apache.hive.udf.generic.GenericUDFSleep' using jar 'hdfs://%s/hive-udfs-0.1.jar';\n" % cls._hdfs_bgjtest_dir
        query += "create table if not exists bline_cli_lr (a string);\n"
        query += "select sleep2(%d, 2000, 'hdfs://%s/bline_cli_lr', 'hdfs://%s/END') from (select count(*) from bline_cli_lr) a;\n" % (
            cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir,
            cls._hdfs_bgjtest_dir)
        Hive.runQueryOnBeeline(query, readFromFile=True, background=True)

        logger.info("**** Running WebHCat Test ****")
        webhcatHost = Config.get('templeton',
                                 'TEMPLETON_HOST',
                                 default=Machine.getfqdn())
        webhcatPort = Config.get('templeton',
                                 'TEMPLETON_PORT',
                                 default="50111")
        url = "http://%s:%s/templeton/v1/hive" % (webhcatHost, webhcatPort)
        query = setqueue + " set mapred.task.timeout=0; create table if not exists whcat_rest_lr (a string); select sleep(%d, 2000, 'hdfs://%s/whcat_rest_lr', 'hdfs://%s/END') from (select count(*) from whcat_rest_lr) a;" % (
            cls._max_bgjtest_duration, cls._hdfs_bgjtest_dir,
            cls._hdfs_bgjtest_dir)
        params = {'execute': query}
        status_code, stdout = util.curl(url, method='POST', params=params)
        retry = 0
        while status_code == 404 and retry < 3:
            time.sleep(15)
            status_code, stdout = util.curl(url, method='POST', params=params)
            retry += 1
        if status_code != 200:
            UpgradePerNode.reportProgress(
                "[FAILED][Hive][BGJobSetup] Long running job for WebHCat failed due to status code = %d"
                % status_code)
            logger.error(
                "Webhcat request failed with the following error: %s\n" %
                stdout)

        if runSmokeTestSetup:
            logger.info("**** Running Hive Smoke Test Setup ****")
            cls.smoke_test_setup()
        return 3

Exemplo n.º 30

0

Exibir arquivo

Arquivo: ruStorm.py Projeto: thakkardharmik/beaver

    def setup_storm_hive_topology(cls, useStandaloneCmd):
        from beaver.component.hive import Hive

        storm_version = Storm.getVersion(useStandaloneCmd=True)
        hive_version = Hive.getVersion()
        HIVE_METASTORE_URI = Hive.getConfigValue(
            "hive.metastore.uris", defaultValue="thrift://localhost:9083")

        global HIVE_METASTORE_URI
        global HIVE_HOST
        global HIVE_PORT
        global HIVE_WAREHOUSE_DIR
        HIVE_WAREHOUSE_DIR = Hive.getConfigValue(
            "hive.metastore.warehouse.dir",
            defaultValue="/apps/hive/warehouse")
        HIVE_HOST = Hive.getHiveHost()
        HIVE_PORT = Hive.getMetastoreThriftPort()
        if Storm.isDalorBeyond():
            JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'),
                                             'tests', 'rolling_upgrade',
                                             'Storm', '2_3', 'storm-hive',
                                             'java')
        else:
            JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'),
                                             'tests', 'rolling_upgrade',
                                             'Storm', '2_2', 'storm-hive',
                                             'java')
        # hive.txn.manager and hive.support.concurrency are set through ambari as per bug-40500
        #logger.info("Restart Hive")
        #changes = {'hive-site.xml': {'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
        #                             'hive.support.concurrency': 'true'}}
        #Hive.modifyConfig(changes, services=['metastore'], restartService=True)
        logger.info("Create test database in Hive")

        exit_code, stdout = Hive.runQuery(
            cls.get_set_queue_cmd(useStandaloneCmd) +
            " drop database if exists stormdb cascade; \
                                               create database stormdb;")
        ruAssert("Storm", exit_code == 0,
                 "[StormHiveSetup] Failed to create test database" + stdout)
        HDFS.chmod(runasUser=HDFS.getHDFSUser(),
                   perm=777,
                   directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db")
        #copy tests/storm/storm-hive/java to artifacts/storm-hive-tests
        logger.info("JAVA_SRC_DIR " + JAVA_HIVE_SRC_DIR)
        logger.info("LOCAL_WORK_DIR " + LOCAL_HIVE_WORK_DIR)
        Machine.copy(JAVA_HIVE_SRC_DIR,
                     LOCAL_HIVE_WORK_DIR,
                     user=None,
                     passwd=None)
        #mvn package
        if Machine.isWindows():
            (_, _) = Maven.run(
                'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s' %
                (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION,
                 STORM_VERSION_MAVEN_PARAMETER, storm_version,
                 HIVE_VERSION_MAVEN_PARAMETER,
                 hive_version, PUBLIC_REPO_MAVEN_PARAMETER,
                 Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER,
                 CORE_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF,
                 HIVE_CORE_MAVEN_PARAMETER, HIVE_CORE_DIR,
                 HIVE_FILE_MAVEN_PARAMETER, HIVE_FILE),
                cwd=LOCAL_HIVE_WORK_DIR)
        else:
            (_, _) = Maven.run('package',
                               cwd=LOCAL_HIVE_WORK_DIR,
                               env={
                                   HADOOP_VERSION_MAVEN_PARAMETER:
                                   HADOOP_VERSION,
                                   STORM_VERSION_MAVEN_PARAMETER:
                                   storm_version,
                                   HIVE_VERSION_MAVEN_PARAMETER:
                                   hive_version,
                                   PUBLIC_REPO_MAVEN_PARAMETER:
                                   Maven.getPublicRepoUrl(),
                                   CORE_FILE_MAVEN_PARAMETER:
                                   CORE_FILE,
                                   HADOOP_CONF_MAVEN_PARAMETER:
                                   HADOOP_CONF,
                                   HDFS_FILE_MAVEN_PARAMETER:
                                   HDFS_FILE,
                                   HADOOP_CORE_MAVEN_PARAMETER:
                                   HADOOP_CONF,
                                   HIVE_CORE_MAVEN_PARAMETER:
                                   HIVE_CORE_DIR,
                                   HIVE_FILE_MAVEN_PARAMETER:
                                   HIVE_FILE
                               })
        create_table_q = "use %s; \
          drop table if exists %s; \
          create table %s (id int, name string, phone string, street string) \
          partitioned by (city string, state string) \
          clustered by (id) into %s buckets \
          stored as orc \
          tblproperties ('transactional'='true');" % (
            DATABASE_NAME, HIVE_TABLE_NAME, HIVE_TABLE_NAME, "5")

        exit_code, stdout = Hive.runQuery(
            cls.get_set_queue_cmd(useStandaloneCmd) + create_table_q)
        ruAssert(
            "Storm", exit_code == 0,
            "[StormHiveSetup] Failed to create test table userdata_partitioned"
        )
        HDFS.chmod(runasUser=HDFS.getHDFSUser(),
                   perm=777,
                   directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME +
                   ".db/" + HIVE_TABLE_NAME)