def setup():
    out = HDFS.deleteFile(CREATE_FILE_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0    
    out = HDFS.deleteDirectory(OUT_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0        
    out = HDFS.deleteDirectory(CREATE_FILE_2_PATH_IN_HADOOP, user=HDFS_USER)
    assert out[0] == 0
示例#2
0
 def configureOozieSpark(cls):
     cls.fix_qe_14910()
     # Workaround BUG-63500 oozie spark test cases are failing with org.apache.thrift.transport.TTransportException null
     sparkShareLibPath = cls.getLatestShareLibPath() + "/spark"
     SPARK_HOME = Config.get('spark', 'SPARK2_HOME')
     source = os.path.join(SPARK_HOME, "conf", "hive-site.xml")
     target_hive_site = os.path.join(sparkShareLibPath, "hive-site.xml")
     HDFS.deleteFile(target_hive_site, cls.getOozieUser())
     HDFS.copyFromLocal(source, sparkShareLibPath, cls.getOozieUser())
     isTez = Hadoop.isTez(True, False)
     if Hadoop.isTez:
         target_tez_site = os.path.join(sparkShareLibPath, "tez-site.xml")
         HDFS.deleteFile(target_tez_site, cls.getOozieUser())
         HDFS.copyFromLocal(
             os.path.join(Config.get('tez', 'TEZ_CONF_DIR'),
                          "tez-site.xml"), sparkShareLibPath,
             cls.getOozieUser())
     exit_code, stdout = Oozie.share_lib_update()
     assert exit_code == 0
示例#3
0
    def insertCSVDataViaCSVBuildLoad(cls,
                                     csvFile,
                                     tableName,
                                     putIntoHDFS=True,
                                     deleteAfterExec=True,
                                     runInBackground=False,
                                     user=None,
                                     config=None,
                                     optionAndParameter="",
                                     env=None,
                                     delimiter=",",
                                     arrayDelimiter=None,
                                     schema=None):
        """
          By default, the files will be allocated under /tmp/ folder in HDFS.
        """
        global ZK_ZPARENT
        if Slider.isSlider():
            ZK_ZPARENT = util.getPropertyValueFromConfigXMLFile(
                os.path.join(Config.get('hbase', 'HBASE_CONF_DIR'),
                             'hbase-site.xml'), "zookeeper.znode.parent")

        if Machine.isLinux():
            clientjar = Machine.find(user=Machine.getAdminUser(),
                                     host="localhost",
                                     filepath=PHOENIX_HOME,
                                     searchstr="phoenix-*[0-9]-client.jar",
                                     passwd=Machine.getAdminPasswd())
        else:
            clientjar = Machine.find(user=Machine.getAdminUser(),
                                     host="localhost",
                                     filepath=PHOENIX_HOME,
                                     searchstr="phoenix-*-client.jar",
                                     passwd=Machine.getAdminPasswd())

        if Machine.isWindows():
            clientjar = (clientjar[0].strip("\\localhost")).replace("$", ":")
            fileName = csvFile.split('\\')[-1]
        else:
            clientjar = clientjar[0]
            fileName = csvFile.split('/')[-1]

        # If we need to, we insert it into HDFS, since the library will take it from there.
        executingUser = (HADOOPQA_USER) if user is None else user

        if putIntoHDFS:
            if not HDFS.fileExists('/tmp/'):
                HDFS.mkdir('/tmp/')
            HDFS.copyFromLocal(csvFile, '/tmp/', executingUser, config,
                               optionAndParameter)

        hbaseConfDir = HBASE_CONF_DIR
        if Slider.isSlider():
            hbaseConfDir = Config.get('hbase', 'HBASE_CONF_DIR')

        classpath = hbaseConfDir

        finalCommand = "%s jar %s org.apache.phoenix.mapreduce.CsvBulkLoadTool --table %s  --input %s" \
                       % (HADOOP_CMD, clientjar, tableName, '/tmp/%s' % fileName)

        if schema is not None:
            finalCommand = '%s -schema %s' % (finalCommand, schema)

        if Machine.isWindows():
            os.environ['HADOOP_USER_CLASSPATH_FIRST'] = 'true'
            os.environ['HADOOP_CLASSPATH'] = classpath
            if delimiter != "," or arrayDelimiter != None:
                finalCommand = "%s -d `\\\"`%s`\\\" -a `\\\"`%s`\\\"" \
                               % (finalCommand, delimiter, arrayDelimiter.strip("'"))
            finalCommand = "%s --zookeeper %s" % (finalCommand, ZK_HOST)
            if runInBackground:
                exit_code = 0
                stdout = ''
                Machine.runinbackground(
                    finalCommand,
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))
            else:
                exit_code, stdout = Machine.run(
                    finalCommand,
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))
        else:
            # delimiter options
            if delimiter != "," or arrayDelimiter != None:
                finalCommand = "%s --delimiter %s --array-delimiter %s" % (
                    finalCommand, delimiter, arrayDelimiter)
            # ZKHosts options
            finalCommand = "%s --zookeeper %s" % (finalCommand,
                                                  cls.getZKConnectString())
            if runInBackground:
                exit_code = 0
                stdout = ''
                Machine.runinbackground(
                    "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand),
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))
            else:
                exit_code, stdout = Machine.run(
                    "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand),
                    env=dict(env.items() + ENVIRONMENT.items()
                             if env is not None else ENVIRONMENT))

        # If selected, after insertion into HBase we will delete the csvFile from HDFS
        if deleteAfterExec and not runInBackground:
            # Does not work for "run in background" option
            HDFS.deleteFile('/tmp/%s' % fileName, executingUser)
        # return 0,""
        return exit_code, stdout