示例#1
0
文件: read_api.py 项目: parasj/ray
def range_table(n: int, *, parallelism: int = -1) -> Dataset[ArrowRow]:
    """Create a tabular dataset from a range of integers [0..n).

    Examples:
        >>> import ray
        >>> ds = ray.data.range_table(1000) # doctest: +SKIP
        >>> ds # doctest: +SKIP
        Dataset(num_blocks=200, num_rows=1000, schema={value: int64})
        >>> ds.map(lambda r: {"v2": r["value"] * 2}).take(2) # doctest: +SKIP
        [ArrowRow({'v2': 0}), ArrowRow({'v2': 2})]

    This is similar to range(), but uses Arrow tables to hold the integers
    in Arrow records. The dataset elements take the form {"value": N}.

    Args:
        n: The upper bound of the range of integer records.
        parallelism: The amount of parallelism to use for the dataset.
            Parallelism may be limited by the number of items.

    Returns:
        Dataset holding the integers as Arrow records.
    """
    return read_datasource(RangeDatasource(),
                           parallelism=parallelism,
                           n=n,
                           block_format="arrow")
示例#2
0
def range_tensor(n: int,
                 *,
                 shape: Tuple = (1, ),
                 parallelism: int = 200) -> Dataset[ArrowRow]:
    """Create a Tensor dataset from a range of integers [0..n).

    Examples:
        >>> ds = ray.data.range_tensor(1000, shape=(3, 10))
        >>> ds.map_batches(lambda arr: arr * 2, batch_format="pandas").show()

    This is similar to range_arrow(), but uses the ArrowTensorArray extension
    type. The dataset elements take the form {"value": array(N, shape=shape)}.

    Args:
        n: The upper bound of the range of integer records.
        shape: The shape of each record.
        parallelism: The amount of parallelism to use for the dataset.
            Parallelism may be limited by the number of items.

    Returns:
        Dataset holding the integers as Arrow tensor records.
    """
    return read_datasource(
        RangeDatasource(),
        parallelism=parallelism,
        n=n,
        block_format="tensor",
        tensor_shape=tuple(shape),
    )
示例#3
0
文件: read_api.py 项目: hngenc/ray
def range_tensor(n: int, *, shape: Tuple = (1, ),
                 parallelism: int = 200) -> Dataset[np.ndarray]:
    """Create a Tensor dataset from a range of integers [0..n).

    Examples:
        >>> ds = ray.data.range_tensor(1000, shape=(3, 10))
        >>> ds.map_batches(lambda arr: arr ** 2).show()

    This is similar to range(), but uses np.ndarrays to hold the integers
    in tensor form. The dataset has overall the shape ``(n,) + shape``.

    Args:
        n: The upper bound of the range of integer records.
        shape: The shape of each record.
        parallelism: The amount of parallelism to use for the dataset.

    Returns:
        Dataset holding the integers as tensors.
    """
    return read_datasource(
        RangeDatasource(),
        parallelism=parallelism,
        n=n,
        block_format="tensor",
        tensor_shape=tuple(shape))
示例#4
0
文件: read_api.py 项目: hngenc/ray
def range(n: int, *, parallelism: int = 200) -> Dataset[int]:
    """Create a dataset from a range of integers [0..n).

    Examples:
        >>> ray.data.range(10000).map(lambda x: x * 2).show()

    Args:
        n: The upper bound of the range of integers.
        parallelism: The amount of parallelism to use for the dataset.

    Returns:
        Dataset holding the integers.
    """
    return read_datasource(
        RangeDatasource(), parallelism=parallelism, n=n, block_format="list")
示例#5
0
文件: read_api.py 项目: hngenc/ray
def range_arrow(n: int, *, parallelism: int = 200) -> Dataset[ArrowRow]:
    """Create an Arrow dataset from a range of integers [0..n).

    Examples:
        >>> ds = ray.data.range_arrow(1000)
        >>> ds.map(lambda r: {"v2": r["value"] * 2}).show()

    This is similar to range(), but uses Arrow tables to hold the integers
    in Arrow records. The dataset elements take the form {"value": N}.

    Args:
        n: The upper bound of the range of integer records.
        parallelism: The amount of parallelism to use for the dataset.

    Returns:
        Dataset holding the integers as Arrow records.
    """
    return read_datasource(
        RangeDatasource(), parallelism=parallelism, n=n, block_format="arrow")
示例#6
0
文件: read_api.py 项目: parasj/ray
def range_tensor(n: int,
                 *,
                 shape: Tuple = (1, ),
                 parallelism: int = -1) -> Dataset[ArrowRow]:
    """Create a Tensor dataset from a range of integers [0..n).

    Examples:
        >>> import ray
        >>> ds = ray.data.range_tensor(1000, shape=(2, 2)) # doctest: +SKIP
        >>> ds # doctest: +SKIP
        Dataset(
            num_blocks=200,
            num_rows=1000,
            schema={__value__: <ArrowTensorType: shape=(2, 2), dtype=int64>},
        )
        >>> ds.map_batches(lambda arr: arr * 2).take(2) # doctest: +SKIP
        [array([[0, 0],
                [0, 0]]),
        array([[2, 2],
                [2, 2]])]

    This is similar to range_table(), but uses the ArrowTensorArray extension
    type. The dataset elements take the form {VALUE_COL_NAME: array(N, shape=shape)}.

    Args:
        n: The upper bound of the range of integer records.
        shape: The shape of each record.
        parallelism: The amount of parallelism to use for the dataset.
            Parallelism may be limited by the number of items.

    Returns:
        Dataset holding the integers as Arrow tensor records.
    """
    return read_datasource(
        RangeDatasource(),
        parallelism=parallelism,
        n=n,
        block_format="tensor",
        tensor_shape=tuple(shape),
    )
示例#7
0
def range(n: int, *, parallelism: int = 200) -> Dataset[int]:
    """Create a dataset from a range of integers [0..n).

    Examples:
        >>> import ray
        >>> ds = ray.data.range(10000) # doctest: +SKIP
        >>> ds # doctest: +SKIP
        Dataset(num_blocks=200, num_rows=10000, schema=<class 'int'>)
        >>> ds.map(lambda x: x * 2).take(4) # doctest: +SKIP
        [0, 2, 4, 6]

    Args:
        n: The upper bound of the range of integers.
        parallelism: The amount of parallelism to use for the dataset.
            Parallelism may be limited by the number of items.

    Returns:
        Dataset holding the integers.
    """
    return read_datasource(
        RangeDatasource(), parallelism=parallelism, n=n, block_format="list"
    )