Python HDFS.cat примеры использования

Язык программирования: Python

Пространство имен/Пакет: beaver.component.hadoop

Класс/Тип: HDFS

Метод/Функция: cat

Примеров на hotexamples.com: 4

Python HDFS.cat - 4 примера найдено. Это лучшие примеры Python кода для beaver.component.hadoop.HDFS.cat, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

createDirectory(23)

copyFromLocal(21)

deleteDirectory(17)

getDatanodes(8)

chmod(8)

getNamenodeByState(5)

cat(4)

getNameNodePrincipal(4)

getNamenode(4)

fileExists(3)

getHDFSUser(3)

getNamenodeHttpAddress(3)

getGateway(2)

getConfigValue(2)

deleteFile(2)

getNameServices(2)

getNNWebPort(1)

getNNWebAppAddress(1)

getJournalNodes(1)

getDataNodeIPCPort(1)

getDatanodesFromJmx(1)

getDatanodeCount(1)

getActiveNN(1)

formatNN(1)

disallowSnapshot(1)

deleteSnapshot(1)

createUserDirWithGroup(1)

copyToLocal(1)

chown(1)

getNamenodeHttpsAddress(1)

Пример #1

Показать файл

Файл: spark_ha.py Проект: thakkardharmik/beaver

    def validate_wordcount_written_to_HDFS(cls,
                                           hdfs_dir,
                                           patterns,
                                           expected_count,
                                           appId=None):
        """
          Validate the wordcount results, written into HDFS directories by a streaming job.
          Use wildcards in the 'hdfs_dir' to recursively read sub-directories.

          :param hdfs_dir: HDFS directory from where contents will be read
          :param patterns: list of words to check
          :param expected_count: the expected number of occurence for each word in the 'patterns'
          :param appId: application ID (Optional parameter)
          :return:
          """
        count = 0
        word_count = {}
        # initialize word_count dictonary
        for p in patterns:
            word_count[p] = 0

        exit_code, cat_content = HDFS.cat(hdfs_dir, logoutput=True)
        assert exit_code == 0, "Could not read from %s, Error: %s, appId: %s" % (
            hdfs_dir, cat_content, appId)
        for line in cat_content:
            words = line.split()
            for word in words:
                if word in word_count.keys():
                    word_count[word] = word_count[word] + 1

        logger.info(word_count)
        for key, value in word_count.iteritems():
            assert value >= expected_count, "%s wordcount is %s. expected_count is %s, appId: %s" % \
                                            (key, value, expected_count, appId)

Пример #2

Показать файл

Файл: ruYarn.py Проект: thakkardharmik/beaver

    def checkClasspathVersion(cls, Version_Num, config=None):
        Local_Test_dir = os.path.join(Config.getEnv("WORKSPACE"), "tests",
                                      "rolling_upgrade", "yarn")
        Multi_Version_App_Dir = os.path.join(Local_Test_dir, "data")
        Mapper = "data/versionVerifyMapper.py"
        Reducer = "data/versionVerifyReducer.py"
        Verify_File_Name = "test.txt"
        Verify_Test_File = os.path.join(Multi_Version_App_Dir,
                                        Verify_File_Name)
        # Set up env
        mapred_app_path = MAPRED.getConfigValue(
            "mapreduce.application.framework.path", None)
        mapred_classpath = MAPRED.getConfigValue(
            "mapreduce.application.classpath", None)
        env = {
            "mapreduce.application.framework.path": mapred_app_path,
            "mapreduce.application.classpath": mapred_classpath
        }
        verifyInput = cls._hdfs_input + "/verify"
        HDFS.createDirectory(verifyInput, None, "777", False)
        # Copy template files for the verifier streaming job
        templateFile = open(Verify_Test_File, 'w')
        templateFile.write(Version_Num)
        templateFile.close()
        HDFS.copyFromLocal(Verify_Test_File,
                           verifyInput,
                           user=Config.get('hadoop', 'HADOOPQA_USER'))
        # Submit the special streaming job
        shortStreamingId = HadoopJobHelper.runStreamJob(
            Mapper,
            Reducer,
            verifyInput,
            cls._hdfs_output_verify,
            files=Multi_Version_App_Dir,
            config=config,
            extraJobArg=cls._jobArgs,
            env=env,
            proposedJobName=cls._shortStreamingName)
        MAPRED.waitForJobDoneOrTimeout(shortStreamingId, timeoutInSec=180)
        # Make sure task succeeded
        #assert YARN.getAppFinalStateFromID(appId) == 'SUCCEEDED'

        # Check result content
        retVal, checkContent = HDFS.cat(cls._hdfs_output_verify +
                                        '/part-00000')
        logger.info("CHECK CLASSPATH VERSION OUTPUT")
        logger.info(retVal)
        logger.info(checkContent)
        ruAssert("YARN", retVal == 0)
        ruAssert("YARN", 'True' in checkContent,
                 "[VersionVerify] Stream job returns false: " + checkContent)
        #assert retVal == 0
        #assert 'True' in checkContent, "Stream job returns false: " + checkContent
        #assert 'False' not in checkContent, "Stream job returns false: " + checkContent
        HDFS.deleteDirectory(cls._hdfs_output_verify,
                             user=Config.get('hadoop', 'HADOOPQA_USER'))

Пример #3

Показать файл

Файл: ruStorm.py Проект: thakkardharmik/beaver

    def verify_hdfs_topology(cls, topologyName, targetDir, lines, type,
                             useStandaloneCmd):
        """
            Verifies the hdfs topologies produced expected output
        """
        #Slider app is killed before log running job verification so disabling topology activation checks.
        if useStandaloneCmd == True:
            ruAssert(
                "Storm",
                Storm.getTopologyStatus(
                    topologyName,
                    logoutput=True,
                    useStandaloneCmd=useStandaloneCmd) == 'ACTIVE')

        exit_code, stdout = HDFS.lsr(targetDir, False, True)
        hdfsListOutput = stdout.splitlines()

        #Picking the second last line as the first file might not have enough content and last file gets into transient
        #HDFS issues.
        if len(hdfsListOutput) >= 2:
            fileLine = hdfsListOutput[-2]
            sampleoutfile = fileLine.split(" ")[-1].strip()

            # Hecky solution as the test code for trident and core topologies writes under same directory.
            # if fileLine.endswith(".txt") and type == "cat":
            #     sampleoutfile = fileLine.split(" ")[-1].strip()
            # if fileLine.endswith(".seq") and type == "text":
            #     sampleoutfile = fileLine.split(" ")[-1].strip()

            logger.info("Taking sampleoutput file : %s" % (sampleoutfile))

            if type == "text":
                exit_code, stdout = HDFS.text(sampleoutfile, None)
            else:
                exit_code, stdout = HDFS.cat(sampleoutfile, None)
            for line in lines:
                ruAssert(
                    "Storm",
                    stdout.find(line) >= 0,
                    "[StormHDFSVerify] expected line : %s in %s" %
                    (line, sampleoutfile))
        else:
            ruAssert("Storm", False,
                     "hdfsListOutput must have at least 2 lines")

Пример #4

Показать файл

Файл: ruYarn.py Проект: thakkardharmik/beaver

    def verifyLongRunningJob(cls, config=None):
        '''
        Verify long running background job after it finishes
        :return:
        '''
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
        retVal, checkContent = HDFS.cat(cls._hdfs_output + '/part-00000')
        #assert retVal == 0
        if retVal == 0:
            UpgradePerNode.reportProgress(
                "[PASSED][YARN][BGJobCheck] verifyLongRunning Job for Yarn, retVal = 0. Successful check "
            )
        else:
            UpgradePerNode.reportProgress(
                "[FAILED][YARN][BGJobCheck] verifyLongRunning Job for Yarn, retVal != 0. Failed check "
            )
        #assert 'true' in checkContent, "Stream job returns false: " + checkContent
        if 'true' in checkContent:
            UpgradePerNode.reportProgress(
                "[PASSED][YARN][BGJobCheck] verifyLongRunning Job for Yarn, true in checkContent. Successful check "
            )
        else:
            UpgradePerNode.reportProgress(
                "[FAILED][YARN][BGJobCheck] verifyLongRunning Job for Yarn, true not in checkContent. Failed check  "
            )

        #verify application's attempt doesn't increase and no failed tasks.
        appID = cls._background_job_appId
        jobID = cls._background_job_jobId
        # temporarily skipping check
        #assert YARN.getNumAttemptsForApp(appID) == 1
        #YARN.verifyMRTasksCount(jobID, appID, 0, skipAssert=True)

        from beaver.component.rollingupgrade.ruCommon import hdpRelease
        Version_Num = hdpRelease.getCurrentRelease("hadoop-client")
        cls.checkClasspathVersion(Version_Num, config)