Python FeatherReader.get_column_name示例

编程语言: Python

命名空间/包名称: pyarrow.feather

类/类型: FeatherReader

方法/功能: get_column_name

hotexamples.com的示例: 5

Python FeatherReader.get_column_name - 已找到5个示例。这些是从开源项目中提取的最受好评的pyarrow.feather.FeatherReader.get_column_name现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_column_name(5)

FeatherReader(1)

get_column(1)

iterrows(1)

示例#1

显示文件

文件： rnkdb.py 项目： dpaysan/pySCENIC

 def genes(self) -> Tuple[str]:
     # noinspection PyTypeChecker
     reader = FeatherReader(self._fname)
     # Get all gene names (exclude "features" column).
     return tuple(
         reader.get_column_name(idx) for idx in range(reader.num_columns)
         if reader.get_column_name(idx) != INDEX_NAME)

示例#2

显示文件

文件： io.py 项目： wingszb/modin

    def read_feather(cls, path, columns=None, use_threads=True):
        """Read a pandas.DataFrame from Feather format.
           Ray DataFrame only supports pyarrow engine for now.

        Args:
            path: The filepath of the feather file.
                  We only support local files for now.
                multi threading is set to True by default
            columns: not supported by pandas api, but can be passed here to read only
                specific columns
            use_threads: Whether or not to use threads when reading

        Notes:
            pyarrow feather is used. Please refer to the documentation here
            https://arrow.apache.org/docs/python/api.html#feather-format
        """
        if cls.read_feather_remote_task is None:
            return super(RayIO, cls).read_feather(
                path, columns=columns, use_threads=use_threads
            )

        if columns is None:
            from pyarrow.feather import FeatherReader

            fr = FeatherReader(path)
            columns = [fr.get_column_name(i) for i in range(fr.num_columns)]

        num_partitions = cls.frame_mgr_cls._compute_num_partitions()
        num_splits = min(len(columns), num_partitions)
        # Each item in this list will be a list of column names of the original df
        column_splits = (
            len(columns) // num_partitions
            if len(columns) % num_partitions == 0
            else len(columns) // num_partitions + 1
        )
        col_partitions = [
            columns[i : i + column_splits]
            for i in range(0, len(columns), column_splits)
        ]
        blk_partitions = np.array(
            [
                cls.read_feather_remote_task._remote(
                    args=(path, cols, num_splits), num_return_vals=num_splits + 1
                )
                for cols in col_partitions
            ]
        ).T
        remote_partitions = np.array(
            [
                [cls.frame_partition_cls(obj) for obj in row]
                for row in blk_partitions[:-1]
            ]
        )
        index_len = ray.get(blk_partitions[-1][0])
        index = pandas.RangeIndex(index_len)
        new_query_compiler = cls.query_compiler_cls(
            cls.frame_mgr_cls(remote_partitions), index, columns
        )
        return new_query_compiler

示例#3

显示文件

    def _get_null_counts(self, path, columns=None):
        reader = FeatherReader(path)
        counts = []
        for i in range(reader.num_columns):
            col = reader.get_column(i)
            name = reader.get_column_name(i)
            if columns is None or name in columns:
                counts.append(col.null_count)

        return counts

示例#4

显示文件

文件： feather_reader.py 项目： vanglian/modin

    def read(cls, path, columns=None, **kwargs):
        """Read a pandas.DataFrame from Feather format.
           Ray DataFrame only supports pyarrow engine for now.

        Args:
            path: The filepath of the feather file.
                  We only support local files for now.
                multi threading is set to True by default
            columns: not supported by pandas api, but can be passed here to read only
                specific columns

        Notes:
            pyarrow feather is used. Please refer to the documentation here
            https://arrow.apache.org/docs/python/api.html#feather-format
        """
        if columns is None:
            from pyarrow.feather import FeatherReader

            fr = FeatherReader(path)
            columns = [fr.get_column_name(i) for i in range(fr.num_columns)]
        return cls.build_query_compiler(path, columns, use_threads=False)

示例#5

显示文件

 def genes(self) -> Tuple[str]:
     # noinspection PyTypeChecker
     reader = FeatherReader(self._fname)
     return tuple(
         reader.get_column_name(idx) for idx in range(self.total_genes)
         if reader.get_column_name(idx) != INDEX_NAME)