Пример #1
0
    def getZipFile(cls, version=HBase.getVersionFromBuild(), isRU=False):
        # download for linux, no download for windows
        HBASE_VER_BUILD = version

        if Machine.isWindows():
            zipFile = os.path.join(
                Config.get('slider', 'SLIDER_HOME'), "app-packages",
                "slider-hbase-app-win-package-%s.zip" % HBASE_VER_BUILD)
            return zipFile

        pkg_list = "pkg-list_qe.txt"
        path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), pkg_list)
        #pkgUrl = Config.get('slider','APP_PKG_LIST')
        pkgUrl = Slider.getAppPackageBaseUrl(
            isRU) + "/slider-app-packages/" + pkg_list
        util.downloadUrl(pkgUrl, path)
        with open(path, 'r') as f:
            for line in f:
                if line.startswith("hbase_pkg_url="):
                    url = line.strip()[14:]
                    break
        zipFile = os.path.join(
            os.getcwd(), "slider-hbase-app-package-%s.zip" % HBASE_VER_BUILD)
        logger.info("downloading " + url)
        util.downloadUrl(url, zipFile)
        return zipFile
Пример #2
0
def setupAcidDataset(testsuite, LOCAL_DIR):
    ddl_location = None
    if testsuite == 'acid':
        ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-tablesetup.sql")
    elif testsuite == 'unbucketed':
        ddl_location = os.path.join(LOCAL_DIR, "ddl", "acid-tpch-unbucketed-tablesetup.sql")
    else:
        assert 1 == 0, "The testsuite passed in not correct. Please use value 'acid' or 'unbuckted'"
    # change timezone on test machines
    Machine.resetTimeZoneOnCluster()

    # Download TPCH acids data
    tpch_newdata_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G")
    TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz")
    if not os.path.isfile(TPCH_STAGE_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ)
        Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR)

    # Load the acid tables in Hive
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/lineitem_acid", user=HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(tpch_newdata_dir, "lineitem*"), "/tmp/lineitem_acid", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/lineitem_acid", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        ddl_location, hivevar={'HDFS_LOCATION': '/tmp'}, logoutput=True, queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH acid data in Hive"
Пример #3
0
def setupHS2ConcurrencyDataset():
    logger.info("Setup test data")
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    HDFS.createDirectory("/tmp/hs2data", user=hdfs_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    query = """drop table if exists student_txt;
        create external table student_txt (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
        drop table if exists voter_txt;
        create external table voter_txt (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
        drop table if exists student;
        create table student (name string, age int, gpa double) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        drop table if exists voter;
        create table voter (name string, age int, registration string, contributions float) CLUSTERED BY (name) INTO 20 BUCKETS STORED AS ORC TBLPROPERTIES('transactional'='true');
        Insert into table student select * from student_txt;
        Insert into table voter select * from voter_txt;"""

    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"
Пример #4
0
def setupMondrianDataset():
    DATABASE_NAME = 'foodmart'
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), DATABASE_NAME)
    FOODMART_DDL = os.path.join(LOCAL_DATA_DIR, "foodmart.ddl")
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')

    logger.info("Setup Mondrian dataset")
    if not os.path.exists(LOCAL_DATA_DIR):
        MONDRIAN_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'MONDRIAN_DATASET'), MONDRIAN_DATA_TGZ)
        Machine.tarExtractAll(MONDRIAN_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create foodmart database and tables")
    HDFS.createDirectory("/tmp/mondrian", HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(LOCAL_DATA_DIR, "/tmp/mondrian", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/mondrian", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        FOODMART_DDL,
        hivevar={
            'DB': 'foodmart',
            'LOCATION': '/tmp/mondrian/foodmart'
        },
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Unable to deploy foodmart dataset"
Пример #5
0
def setupMergeScaleDataset(LOCAL_DIR):
    # change timezone on test machines
    Machine.resetTimeZoneOnCluster()

    # Download the TPCH dataset if not there
    tpch_data_dir = os.path.join(LOCAL_DIR, "data")
    TPCH_DATA_TGZ = os.path.join(LOCAL_DIR, "tpch_data.tgz")
    if not os.path.isfile(TPCH_DATA_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_DNLD_URL'), TPCH_DATA_TGZ)
        Machine.tarExtractAll(TPCH_DATA_TGZ, LOCAL_DIR)

    # Load the tables in Hive
    HADOOPQA_USER = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/tpch", user=HADOOPQA_USER, perm='777', force=True)
    HDFS.copyFromLocal(tpch_data_dir, "/tmp/tpch", user=HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/tpch", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        os.path.join(LOCAL_DIR, "ddl", "merge-tpch-tablesetup.sql"),
        hivevar={'HDFS_LOCATION': '/tmp/tpch/data'},
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH data in Hive"

    # Download TPCH staging data
    tpch_stage_dir = os.path.join(LOCAL_DIR, "tpch_newdata_5G")
    TPCH_STAGE_TGZ = os.path.join(LOCAL_DIR, "tpch_newdata_5G.tgz")
    if not os.path.isfile(TPCH_STAGE_TGZ):
        assert util.downloadUrl(Config.get('hive', 'TPCH_NEWDATA_5G_DNLD_URL'), TPCH_STAGE_TGZ)
        Machine.tarExtractAll(TPCH_STAGE_TGZ, LOCAL_DIR)

    # Load the staged tables in Hive
    HDFS.createDirectory(
        "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", user=HADOOPQA_USER, perm='777', force=True
    )
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "lineitem*"), "/tmp/lineitem_stage", HADOOPQA_USER)
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "order*"), "/tmp/orders_stage", HADOOPQA_USER)
    HDFS.copyFromLocal(os.path.join(tpch_stage_dir, "delete*"), "/tmp/delete_stage", HADOOPQA_USER)
    HDFS.chmod(None, 777, "/tmp/lineitem_stage /tmp/orders_stage /tmp/delete_stage", recursive=True)
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(
        os.path.join(LOCAL_DIR, "ddl", "merge-staged-tpch-tablesetup.sql"),
        hivevar={'HDFS_LOCATION': '/tmp'},
        logoutput=True,
        queryIsFile=True
    )
    assert exit_code == 0, "Failed to populate the TPCH staging data in Hive"
Пример #6
0
    def setupElasticSearch(cls):
        # if already downloaded, skip
        if os.path.isdir(cls._es_home):
            return

        # download and install
        if Machine.type() == "Linux":
            tarballpath = os.path.join(
                cls._toolsdir, "elasticsearch-%s.tar.gz" % cls._es_version)
            if not os.path.isfile(tarballpath):
                assert util.downloadUrl(cls._downloadurlunix, tarballpath)
            Machine.tarExtractAll(tarballpath, cls._toolsdir, mode='r:gz')
        else:
            zippath = os.path.join(cls._toolsdir,
                                   "elasticsearch-%s.zip" % cls._es_version)
            if not os.path.isfile(zippath):
                assert util.downloadUrl(cls._downloadurlwin, zippath)
            Machine.unzipExtractAll(zippath, cls._toolsdir)
        assert os.path.exists(cls._es_home)
Пример #7
0
    def setupActiveMQ(cls):
        # if already downloaded, skip
        if os.path.isdir(cls._activemq_home):
            return

        # download and install
        if Machine.type() == "Linux":
            tarballpath = os.path.join(cls._toolsdir,
                                       "apache-activemq-5.8.0-bin.tar.gz")
            if not os.path.isfile(tarballpath):
                assert util.downloadUrl(cls._downloadurlunix, tarballpath)
            Machine.tarExtractAll(tarballpath, cls._toolsdir, mode='r:gz')
        else:
            zippath = os.path.join(cls._toolsdir,
                                   "apache-activemq-5.8.0-bin.zip")
            if not os.path.isfile(zippath):
                assert util.downloadUrl(cls._downloadurlwin, zippath)
            Machine.unzipExtractAll(zippath, cls._toolsdir)
        assert os.path.exists(cls._activemq_home)
Пример #8
0
def setupHS2ConcurrTestData(stdauth=True):
    # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session.
    # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2
    changes = {
        'hive-site.xml': {
            'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
            'hive.support.concurrency': 'true',
            'hive.compactor.initiator.on': 'true',
            'hive.compactor.worker.threads': '3',
            'hive.compactor.check.interval': '10',
            'hive.timedout.txn.reaper.interval': '20s'
        },
        'hiveserver2-site.xml': {
            'hive.compactor.initiator.on': 'false',
            'hive.exec.dynamic.partition.mode': 'nonstrict'
        }
    }
    if not Hive.isHive2():
        changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true'
    else:
        changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false'
        changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager'
        changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true'
    Hive.modifyConfig(changes)
    time.sleep(60)
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    test_user = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address")
    query = """drop table if exists student;
create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
drop table if exists voter;
create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
drop table if exists customer_address;
create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address';
drop table if exists customer_address_partitioned;
create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true');
insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;"""
    if stdauth:
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;"
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"
Пример #9
0
    def setupSolr(cls):
        # if already downloaded, skip
        if os.path.isdir(cls._solr_home):
            return

        # download and install
        tarballpath = os.path.join(cls._toolsdir,
                                   "solr-%s.tgz" % cls._solr_version)
        if not os.path.isfile(tarballpath):
            assert util.downloadUrl(cls._downloadurl, tarballpath)
        Machine.tarExtractAll(tarballpath, cls._toolsdir, mode='r:gz')
Пример #10
0
    def getZipFileAndBuild(cls, buildNo=""):
        """
        This method is deprecated and used in ruSlider.py only
        DO NOT USE this method.
        """
        zipFile = ""

        path = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "pkg-list_qe.txt")
        #pkgUrl = Config.get('slider','APP_PKG_LIST')
        pkgUrl = Slider.getAppPackageBaseUrl(
        ) + "/slider-app-packages/pkg-list_qe.txt"
        if buildNo != "":
            pkgUrl = "http://s3.amazonaws.com/dev.hortonworks.com/HDP/" + Slider.getAppPackageOS(
            ) + "/2.x/BUILDS/" + buildNo + "/slider-app-packages/pkg-list_bn.txt"
        util.downloadUrl(pkgUrl, path)
        f = open(path, "r")
        buildNo = 0
        for line in f:
            if line.startswith("hbase_pkg_url="):
                line = line.strip()
                url = line[14:]
                # hbase_ver has the format like 0.98.4.2.2.1.0
                # we are not using cls.HBASE_VER here because in rollung-upgrade
                # the buildNo are different for current and base build
                get = re.search("\d\.\d{2}\.\d\.\d\.\d\.\d.\d", url)
                hbase_ver = get.group(0)
                index = line.find(hbase_ver)
                # skip version
                index = index + 15
                buildNo = line[index:-12]
                logger.info("build# is " + buildNo)
                zipFile = os.path.join(
                    os.getcwd(), "slider-hbase-app-package-" + hbase_ver +
                    "-" + buildNo + "-hadoop2.zip")
                logger.info("downloading " + url)
                util.downloadUrl(url, zipFile)
        f.close()
        return buildNo, zipFile
Пример #11
0
    def setupAnt(cls):
        # if dir exists return as its already setup
        if os.path.isdir(cls._ant_home):
            return

        # check if tarball exists or not before downloading it
        tarName = "apache-ant-" + cls._version + ".tar.gz"
        tarballPath = os.path.join(cls._worksapce, tarName)
        if not os.path.isfile(tarballPath):
            # download ant
            assert util.downloadUrl(cls._ant_download_url, tarballPath)

        # now untar ant on windows you have to run the tar cmd from the dir where
        # the file exists else it fails
        Machine.tarExtractAll(filepath=tarballPath, outpath=cls._tools_path, mode='r:gz')
        assert os.path.isfile(cls._ant_cmd)
Пример #12
0
def setupTableauDataset():
    LOCAL_DATA_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "tableau")
    DATA_DIR = os.path.join(LOCAL_DATA_DIR, 'data')
    SCHEMA_SQL_DIR = os.path.join(LOCAL_DATA_DIR, 'schema_3.0')
    HIVE_TABLES = [
        'Batters', 'Calcs', 'DateBins', 'DateTime', 'Election', 'FischerIris', 'Loan', 'NumericBins', 'REI',
        'SeattleCrime', 'Securities', 'SpecialData', 'Staples', 'Starbucks', 'UTStarcom', 'xy'
    ]
    TABLEAU_TEST_DIR = "/user/hrt_qa/tableau"
    DATABASE_NAME = 'tableau'

    logger.info("Setup Tableau dataset")

    if not os.path.exists(LOCAL_DATA_DIR):
        TABLEAU_DATA_TGZ = LOCAL_DATA_DIR + ".tgz"
        assert util.downloadUrl(Config.get('hive', 'TABLEAU_DATASET'), TABLEAU_DATA_TGZ)
        Machine.tarExtractAll(TABLEAU_DATA_TGZ, Config.getEnv('ARTIFACTS_DIR'))
        assert os.path.isdir(LOCAL_DATA_DIR)

    logger.info("create test directory on hdfs to store tableau data files")
    HDFS.createDirectory(TABLEAU_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    logger.info("create tableau database before creating tables")
    Hive.runQueryOnBeeline("DROP DATABASE IF EXISTS %s" % DATABASE_NAME)
    Hive.runQueryOnBeeline("CREATE DATABASE IF NOT EXISTS %s" % DATABASE_NAME)

    for tbl in HIVE_TABLES:
        hdfsDir = TABLEAU_TEST_DIR + '/%s' % tbl
        hdfsFile = hdfsDir + '/%s' % tbl
        localFile = os.path.join(DATA_DIR, '%s.tbl' % tbl)
        sqlFile = os.path.join(SCHEMA_SQL_DIR, '%s.sql' % tbl)

        logger.info("create directory for %s table" % tbl)
        exit_code, stdout = HDFS.createDirectory(hdfsDir, perm='777', force=True)
        assert exit_code == 0, 'Could not create dir for table %s on hdfs.' % tbl

        logger.info("copy file for table %s to hdfs" % tbl)
        exit_code, stdout = HDFS.copyFromLocal(localFile, hdfsFile)
        assert exit_code == 0, 'Could not copy file for table %s to hdfs.' % tbl

        logger.info("create %s table " % tbl)
        # thing-to-do Modify Hive.runQueryonBeeline to accept query file name
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(
            ReadFromFile(sqlFile), readFromFile=True, hivevar={'HDFS_LOCATION': hdfsDir}, logoutput=True
        )
        assert exit_code == 0, '%s table creation failed' % tbl
Пример #13
0
 def setupJava7(cls):
     # if dir exists return as its already setup
     # check if tarball exists or not before downloading it
     tarName = "jdk-7u80-linux-x64.tar.gz"
     jdk7_folder_name = "jdk1.7.0_80"
     jdk7_java_home = os.path.join(cls._tools_path, jdk7_folder_name)
     if os.path.isdir(jdk7_java_home):
         logger.info("JDK 7 already installed, skipping setup of JDK 7")
         return jdk7_java_home
     tarballPath = os.path.join(cls._tools_path, tarName)
     if not os.path.isfile(tarballPath):
         # download java
         logger.info("JDK 7 downloading")
         assert util.downloadUrl(cls._jdk_download_url_7, tarballPath)
     # now install java
     Machine.run('chmod 755 ' + tarballPath)
     Machine.run('cd ' + cls._tools_path + '; tar zxvf ' + tarballPath)
     return jdk7_java_home
Пример #14
0
 def setupJava(cls):
     # if dir exists return as its already setup
     if os.path.isdir(cls._java_home):
         logger.info("JDK already installed, skipping setup of JDK")
         return cls._java_home
     # check if tarball exists or not before downloading it
     tarName = "jdk-8u112-linux-x64.tar.gz"
     tarballPath = os.path.join(cls._tools_path, tarName)
     if not os.path.isfile(tarballPath):
         # download java
         assert util.downloadUrl(cls._jdk_download_url, tarballPath)
     # now install java
     Machine.run('chmod 755 ' + tarballPath)
     Machine.run('cd ' + cls._tools_path + '; tar zxvf ' + tarballPath)
     #Machine.run('echo A | .' + tarballPath + ' -noregister 2>&1')
     #Machine.tarExtractAll(filepath=tarballPath, outpath = cls._tools_path, mode='r:gz')
     print cls._java_home
     assert os.path.isfile(cls._java_cmd)
     return cls._java_home
Пример #15
0
def downloadDataset(dataDir, dataTgz, downloadUrl, hdfsLocalCopy, textDataDir):
    HDFS.createDirectory(HCAT_TEST_DIR, user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory(HDFS_TEST_DIR, user=HDFS_USER, perm='777', force=True)

    # change timezone on test machines
    Machine.resetTimeZoneOnCluster()

    # Download the TPCDS dataset if not there
    if not os.path.isfile(dataTgz):
        assert util.downloadUrl(downloadUrl, dataTgz)
        Machine.tarExtractAll(dataTgz, dataDir)

    os.makedirs(hdfsLocalCopy)
    for filename in os.listdir(textDataDir):
        hdfs_localcopy_table_dir = os.path.join(hdfsLocalCopy, filename[:-4])
        os.mkdir(hdfs_localcopy_table_dir)
        shutil.copy(os.path.join(textDataDir, filename), hdfs_localcopy_table_dir)
    HDFS.copyFromLocal(hdfsLocalCopy, HDFS_TEST_DIR)
    HDFS.chmod(None, '777', HDFS_TEST_DIR, recursive=True)
Пример #16
0
def setupTestData(stdauth=True):
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hive-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hive-simple-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HIVE_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    HDFS.createDirectory("/tmp/hs2data", user=HDFS_USER, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    query = """drop table if exists student;
create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
drop table if exists voter;
create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';"""
    if stdauth:
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;"
        exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"
Пример #17
0
    def setupMaven(cls, version=_version, mvn_url=_maven_download_url):
        # if dir exists return as its already setup

        mvn_home = os.path.join(cls._tools_path, 'apache-maven-' + version)
        if os.path.isdir(mvn_home):
            return
        #if os.path.isdir(cls._maven_home):
        #  return

        # check if tarball exists or not before downloading it
        tarName = "apache-maven-" + version + ".tar.gz"
        #tarName = "apache-maven-" +cls._version + ".tar.gz"

        tarballPath = os.path.join(cls._worksapce, tarName)
        if not os.path.isfile(tarballPath):
            # download maven
            assert util.downloadUrl(mvn_url, tarballPath)
            #assert util.downloadUrl(cls._maven_download_url, tarballPath)

        # now extract maven
        Machine.tarExtractAll(filepath=tarballPath, outpath=cls._tools_path, mode='r:gz')

        mvn_cmd = os.path.join(mvn_home, 'bin', 'mvn')
        assert os.path.isfile(mvn_cmd)