Exemplo n.º 1
0
    def for_block(block: Block) -> "BlockAccessor[T]":
        """Create a block accessor for the given block."""
        _check_pyarrow_version()
        import pyarrow
        import pandas

        if isinstance(block, pyarrow.Table):
            from ray.data.impl.arrow_block import ArrowBlockAccessor

            return ArrowBlockAccessor(block)
        elif isinstance(block, pandas.DataFrame):
            from ray.data.impl.pandas_block import PandasBlockAccessor

            return PandasBlockAccessor(block)
        elif isinstance(block, bytes):
            from ray.data.impl.arrow_block import ArrowBlockAccessor

            return ArrowBlockAccessor.from_bytes(block)
        elif isinstance(block, list):
            from ray.data.impl.simple_block import SimpleBlockAccessor

            return SimpleBlockAccessor(block)
        else:
            raise TypeError("Not a block type: {} ({})".format(
                block, type(block)))
Exemplo n.º 2
0
Arquivo: block.py Projeto: smorad/ray
    def batch_to_block(batch: DataBatch) -> Block:
        """Create a block from user-facing data formats."""
        if isinstance(batch, np.ndarray):
            from ray.data.impl.arrow_block import ArrowBlockAccessor

            return ArrowBlockAccessor.numpy_to_block(batch)
        return batch
Exemplo n.º 3
0
 def aggregate_combined_blocks(
     blocks: List["pandas.DataFrame"], key: KeyFn, aggs: Tuple[AggregateFn]
 ) -> Tuple["pandas.DataFrame", BlockMetadata]:
     # TODO (kfstorm): A workaround to pass tests. Not efficient.
     block, metadata = ArrowBlockAccessor.aggregate_combined_blocks(
         [BlockAccessor.for_block(block).to_arrow() for block in blocks], key, aggs
     )
     return BlockAccessor.for_block(block).to_pandas(), metadata
Exemplo n.º 4
0
 def merge_sorted_blocks(
         blocks: List["pandas.DataFrame"], key: "SortKeyT",
         _descending: bool) -> Tuple["pandas.DataFrame", BlockMetadata]:
     # TODO (kfstorm): A workaround to pass tests. Not efficient.
     block, metadata = ArrowBlockAccessor.merge_sorted_blocks(
         [BlockAccessor.for_block(block).to_arrow() for block in blocks],
         key,
         _descending,
     )
     return BlockAccessor.for_block(block).to_pandas(), metadata
Exemplo n.º 5
0
    def for_block(block: Block) -> "BlockAccessor[T]":
        """Create a block accessor for the given block."""
        _check_pyarrow_version()
        import pyarrow

        if isinstance(block, pyarrow.Table):
            from ray.data.impl.arrow_block import \
                ArrowBlockAccessor
            return ArrowBlockAccessor(block)
        elif isinstance(block, bytes):
            from ray.data.impl.arrow_block import \
                ArrowBlockAccessor
            return ArrowBlockAccessor.from_bytes(block)
        elif isinstance(block, list):
            from ray.data.impl.simple_block import \
                SimpleBlockAccessor
            return SimpleBlockAccessor(block)
        elif isinstance(block, np.ndarray):
            from ray.data.impl.tensor_block import \
                TensorBlockAccessor
            return TensorBlockAccessor(block)
        else:
            raise TypeError("Not a block type: {}".format(block))