Python is_hdfs_path примеры, bigflow.util.path_util.is_hdfs_path Python примеры использования

Пример #1

0

Показать файл

Файл: spark_pipeline.py Проект: yangwei024/bigflow

    def _transform_output_format(self, pcollection, output_format):
        from bigflow.util import path_util
        from bigflow.util import utils

        format_type = output_format.get_entity_name()
        # todo: extract ugi from output_format, support multiple clusters and ugis
        if format_type == "TextOutputFormat" or \
                format_type == "SequenceFileAsBinaryOutputFormat":
            uri = path_util.to_abs_local_path(output_format.path)

            if utils.is_infinite(pcollection):
                if not path_util.is_hdfs_path(uri):
                    raise ValueError("That write infinite PType to local file "
                            "is not supported in MRPipeline")
                else:
                    output_format.path = self._toft_path(uri)
            else:
                if not path_util.is_hdfs_path(uri):
                    # User try to use MRPipeline to write local file, we replace original uri
                    # to a temp path on HDFS and dump the output for local FS after job is done.
                    hdfs_uri = self._tmp_hdfs_path(uri)
                    output_format.path = self._toft_path(hdfs_uri)
                    self._local_uri_infos.append({
                        'local_uri': uri,
                        'hdfs_uri': hdfs_uri,
                        'overwrite': output_format.overwrite
                    })
                    logger.debug(
                            "Write file to HDFS path: %s and dump it after job done" % hdfs_uri)
                    self._remote_temp_files.append(hdfs_uri)
                else:
                    output_format.path = self._toft_path(self._tmp_output_path(uri))
                    output_format.commit_path = self._toft_path(uri)

        return output_format

Пример #2

0

Показать файл

Файл: spark_pipeline.py Проект: yangwei024/bigflow

    def __append_cache_archive(self, file_path, resource_path):
        resource_path = resource_path.rstrip("/")

        if not path_util.is_hdfs_path(file_path):
            file_path = os.path.abspath(file_path)

        cache_archives = self._resource.cache_archive_list
        if cache_archives is None:
            cache_archives = "%s#%s" % (file_path, resource_path)
        else:
            cache_archives = "%s,%s#%s" % (cache_archives, file_path, resource_path)
        self._resource.cache_archive_list = cache_archives

Пример #3

0

Показать файл

Файл: spark_pipeline.py Проект: zz198808/bigflow

    def _transform_output_format(self, pcollection, output_format):
        from bigflow.util import path_util
        from bigflow.util import utils

        format_type = output_format.get_entity_name()
        # todo: extract ugi from output_format, support multiple clusters and ugis
        if format_type == "TextOutputFormat" or \
                format_type == "SequenceFileAsBinaryOutputFormat":
            uri = path_util.to_abs_local_path(output_format.path)

            if utils.is_infinite(pcollection):
                if not path_util.is_hdfs_path(uri):
                    raise ValueError("That write infinite PType to local file "
                                     "is not supported in MRPipeline")
                else:
                    output_format.path = self._toft_path(uri)
            else:
                if not path_util.is_hdfs_path(uri):
                    # User try to use MRPipeline to write local file, we replace original uri
                    # to a temp path on HDFS and dump the output for local FS after job is done.
                    hdfs_uri = self._tmp_hdfs_path(uri)
                    output_format.path = self._toft_path(hdfs_uri)
                    self._local_uri_infos.append({
                        'local_uri':
                        uri,
                        'hdfs_uri':
                        hdfs_uri,
                        'overwrite':
                        output_format.overwrite
                    })
                    logger.debug(
                        "Write file to HDFS path: %s and dump it after job done"
                        % hdfs_uri)
                    self._remote_temp_files.append(hdfs_uri)
                else:
                    output_format.path = self._toft_path(
                        self._tmp_output_path(uri))
                    output_format.commit_path = self._toft_path(uri)

        return output_format

Пример #4

0

Показать файл

Файл: spark_pipeline.py Проект: zz198808/bigflow

    def __append_cache_archive(self, file_path, resource_path):
        resource_path = resource_path.rstrip("/")

        if not path_util.is_hdfs_path(file_path):
            file_path = os.path.abspath(file_path)

        cache_archives = self._resource.cache_archive_list
        if cache_archives is None:
            cache_archives = "%s#%s" % (file_path, resource_path)
        else:
            cache_archives = "%s,%s#%s" % (cache_archives, file_path,
                                           resource_path)
        self._resource.cache_archive_list = cache_archives

Пример #5

0

Показать файл

Файл: spark_pipeline.py Проект: zz198808/bigflow

    def _transform_uri(self, uri, format_type, ugi=None):
        from bigflow.util import path_util

        if format_type == "TextInputFormat" or \
                format_type == "SequenceFileAsBinaryInputFormat":
            uri = path_util.to_abs_local_path(uri)

            if not path_util.is_hdfs_path(uri):
                return self._upload_file(uri)
            else:
                return uri
        # todo: support multiple clusters and ugis for spark_pipeline
        return uri

Пример #6

0

Показать файл

Файл: spark_pipeline.py Проект: yangwei024/bigflow

    def _transform_uri(self, uri, format_type, ugi=None):
        from bigflow.util import path_util

        if format_type == "TextInputFormat" or \
                format_type == "SequenceFileAsBinaryInputFormat":
            uri = path_util.to_abs_local_path(uri)

            if not path_util.is_hdfs_path(uri):
                return self._upload_file(uri)
            else:
                return uri
        # todo: support multiple clusters and ugis for spark_pipeline
        return uri

Пример #7

0

Показать файл

Файл: local_pipeline.py Проект: yangwei024/bigflow

    def add_file(self, file_path, resource_path=None, executable=False):
        """
        向Pipeline添加单个文件，使得该文件能够在运行期被访问

        Args:
          file_path(str): 需要添加的文件路径，可以是本地路径或者 HDFS 路径.
          resource_path(str): local 引擎运行时访问该文件的路径, 应是相对路径. 也即本地引擎在执行时,
                              file_path 将会被映射到该 resource_path, 用户程序可以以该路径访问
          executable (bool): 若为True，则该文件在运行期会被添加可执行属性
        """
        if path_util.is_hdfs_path(file_path.lower()):
            self._add_remote_file(file_path, resource_path, executable)
        else:
            self._resource.add_file(file_path, resource_path, executable)

Пример #8

0

Показать файл

    def add_file(self, file_path, resource_path=None, executable=False):
        """
        向Pipeline添加单个文件，使得该文件能够在运行期被访问

        Args:
          file_path(str): 需要添加的文件路径，可以是本地路径或者 HDFS 路径.
          resource_path(str): local 引擎运行时访问该文件的路径, 应是相对路径. 也即本地引擎在执行时,
                              file_path 将会被映射到该 resource_path, 用户程序可以以该路径访问
          executable (bool): 若为True，则该文件在运行期会被添加可执行属性
        """
        if path_util.is_hdfs_path(file_path.lower()):
            self._add_remote_file(file_path, resource_path, executable)
        else:
            self._resource.add_file(file_path, resource_path, executable)

Пример #9

0

Показать файл

Файл: spark_pipeline.py Проект: yangwei024/bigflow

    def add_file(self, file_path, resource_path=None, executable=False):
        """
        向Pipeline添加单个文件，使得该文件能够在运行期被访问

        Args:
          file_path(str): 需要添加的文件路径，支持本地, HDFS 路径
          resource_path (str): 远端运行时访问该文件的本地路径, 应是相对路径. 也即在远端, file_path 将会被映射
                               成该 resource_path 路径, 用户程序可以直接用该路径访问到 file_path 对应的文件
          executable (bool): 若为True，则该文件在运行期会被添加可执行属性
        """
        if path_util.is_hdfs_path(file_path.lower()):
            if executable:
                logger.warn("Set executable for cache file is not supported yet, "
                            "ignore executable property")
            self.__append_cache_file(file_path, resource_path, executable)
        else:
            self._resource.add_file(file_path, resource_path, executable)

Пример #10

0

Показать файл

Файл: spark_pipeline.py Проект: zz198808/bigflow

    def add_file(self, file_path, resource_path=None, executable=False):
        """
        向Pipeline添加单个文件，使得该文件能够在运行期被访问

        Args:
          file_path(str): 需要添加的文件路径，支持本地, HDFS 路径
          resource_path (str): 远端运行时访问该文件的本地路径, 应是相对路径. 也即在远端, file_path 将会被映射
                               成该 resource_path 路径, 用户程序可以直接用该路径访问到 file_path 对应的文件
          executable (bool): 若为True，则该文件在运行期会被添加可执行属性
        """
        if path_util.is_hdfs_path(file_path.lower()):
            if executable:
                logger.warn(
                    "Set executable for cache file is not supported yet, "
                    "ignore executable property")
            self.__append_cache_file(file_path, resource_path, executable)
        else:
            self._resource.add_file(file_path, resource_path, executable)

Пример #11

0

Показать файл

Файл: pipeline_base.py Проект: yangwei024/bigflow

 def _path_exists(self, path):
     if path_util.is_hdfs_path(path):
         return self._hadoop_client().fs_test(path, self._hadoop_config)
     return os.path.exists(path)

Пример #12

0

Показать файл

 def _path_exists(self, path):
     if path_util.is_hdfs_path(path):
         return self._hadoop_client().fs_test(path, self._hadoop_config)
     return os.path.exists(path)

Python is_hdfs_path примеры использования