def test_tensor_array_scalar_cast():
    outer_dim = 3
    inner_shape = (1,)
    shape = (outer_dim,) + inner_shape
    num_items = np.prod(np.array(shape))
    arr = np.arange(num_items).reshape(shape)

    t_arr = TensorArray(arr)

    for t_arr_elem, arr_elem in zip(t_arr, arr):
        assert float(t_arr_elem) == float(arr_elem)

    arr = np.arange(1).reshape((1, 1, 1))
    t_arr = TensorArray(arr)
    assert float(t_arr) == float(arr)
示例#2
0
def test_dict_pandas():
    input_data = {"x": np.array([1, 2, 3])}
    expected_output = pd.DataFrame({"x": TensorArray(input_data["x"])})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    output_array = convert_pandas_to_batch_type(actual_output,
                                                type=DataType.NUMPY)
    assert np.array_equal(output_array, input_data["x"])
示例#3
0
def test_dict_multi_dim_to_pandas():
    tensor = np.arange(12).reshape((3, 2, 2))
    input_data = {"x": tensor}
    expected_output = pd.DataFrame({"x": TensorArray(tensor)})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    output_array = convert_pandas_to_batch_type(actual_output,
                                                type=DataType.NUMPY)
    assert np.array_equal(output_array, input_data["x"])
示例#4
0
def test_numpy_object_pandas():
    input_data = np.array([[1, 2, 3], [1]], dtype=object)
    expected_output = pd.DataFrame(
        {TENSOR_COLUMN_NAME: TensorArray(input_data)})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert np.array_equal(
        convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY),
        input_data)
示例#5
0
def test_numpy_multi_dim_pandas():
    input_data = np.arange(12).reshape((3, 2, 2))
    expected_output = pd.DataFrame(
        {TENSOR_COLUMN_NAME: TensorArray(input_data)})
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert np.array_equal(
        convert_pandas_to_batch_type(actual_output, type=DataType.NUMPY),
        input_data)
示例#6
0
def test_arrow_tensor_pandas():
    np_array = np.array([1, 2, 3])
    df = pd.DataFrame({"x": TensorArray(np_array)})
    input_data = pa.Table.from_arrays([ArrowTensorArray.from_numpy(np_array)],
                                      names=["x"])
    expected_output = df
    actual_output = convert_batch_type_to_pandas(input_data)
    assert expected_output.equals(actual_output)

    assert convert_pandas_to_batch_type(actual_output,
                                        type=DataType.ARROW).equals(input_data)
示例#7
0
def test_dict_pandas_multi_column():
    array_dict = {"x": np.array([1, 2, 3]), "y": np.array([4, 5, 6])}
    expected_output = pd.DataFrame(
        {k: TensorArray(v)
         for k, v in array_dict.items()})
    actual_output = convert_batch_type_to_pandas(array_dict)
    assert expected_output.equals(actual_output)

    output_dict = convert_pandas_to_batch_type(actual_output,
                                               type=DataType.NUMPY)
    for k, v in output_dict.items():
        assert np.array_equal(v, array_dict[k])
示例#8
0
    def _predict_pandas(
            self, data: pd.DataFrame,
            dtype: Union[TensorDtype, Dict[str, TensorDtype]]) -> pd.DataFrame:
        tensors = convert_pandas_to_batch_type(data, DataType.NUMPY)

        # Single numpy array.
        if isinstance(tensors, np.ndarray):
            column_name = data.columns[0]
            if isinstance(dtype, dict):
                dtype = dtype[column_name]
            model_input = self._array_to_tensor(tensors, dtype)

        else:
            model_input = {
                k: self._array_to_tensor(
                    v, dtype=dtype[k] if isinstance(dtype, dict) else dtype)
                for k, v in tensors.items()
            }

        output = self._model_predict(model_input)

        # Handle model multi-output. For example if model outputs 2 images.
        if isinstance(output, dict):
            return pd.DataFrame(
                {k: TensorArray(self._tensor_to_array(v))
                 for k, v in output})
        elif isinstance(output, list) or isinstance(output, tuple):
            tensor_name = "output_"
            output_dict = {}
            for i in range(len(output)):
                output_dict[tensor_name + str(i + 1).zfill(5)] = TensorArray(
                    self._tensor_to_array(output[i]))
            return pd.DataFrame(output_dict)
        else:
            return pd.DataFrame(
                {"predictions": TensorArray(self._tensor_to_array(output))},
                columns=["predictions"],
            )
def test_tensor_array_dataframe_repr():
    outer_dim = 3
    inner_shape = (2, 2)
    shape = (outer_dim,) + inner_shape
    num_items = np.prod(np.array(shape))
    arr = np.arange(num_items).reshape(shape)

    t_arr = TensorArray(arr)
    df = pd.DataFrame({"a": t_arr})

    expected_repr = """                      a
0  [[ 0,  1], [ 2,  3]]
1  [[ 4,  5], [ 6,  7]]
2  [[ 8,  9], [10, 11]]"""
    assert repr(df) == expected_repr
示例#10
0
def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    """
    User Pytorch code to transform user image. Note we still use pandas as
    intermediate format to hold images as shorthand of python dictionary.
    """
    preprocess = transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ]
    )
    df["image"] = TensorArray([preprocess(x.to_numpy()) for x in df["image"]])
    return df
示例#11
0
def convert_batch_type_to_pandas(data: DataBatchType) -> pd.DataFrame:
    """Convert the provided data to a Pandas DataFrame.

    Args:
        data: Data of type DataBatchType

    Returns:
        A pandas Dataframe representation of the input data.

    """
    from ray.air.util.tensor_extensions.pandas import TensorArray

    if isinstance(data, pd.DataFrame):
        return data

    elif isinstance(data, np.ndarray):
        return pd.DataFrame({TENSOR_COLUMN_NAME: TensorArray(data)})

    elif isinstance(data, dict):
        tensor_dict = {}
        for k, v in data.items():
            if not isinstance(v, np.ndarray):
                raise ValueError(
                    "All values in the provided dict must be of type "
                    f"np.ndarray. Found type {type(v)} for key {k} "
                    f"instead.")
            # Convert numpy arrays to TensorArray.
            tensor_dict[k] = TensorArray(v)
        return pd.DataFrame(tensor_dict)

    elif pyarrow is not None and isinstance(data, pyarrow.Table):
        return data.to_pandas()
    else:
        raise ValueError(
            f"Received data of type: {type(data)}, but expected it to be one "
            f"of {DataBatchType}")
示例#12
0
def preprocess_image_with_label(df: pd.DataFrame) -> pd.DataFrame:
    """
    User Pytorch code to transform user image. Note we still use TensorArray as
    intermediate format to hold images for now.
    """
    preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    df["image"] = TensorArray(
        [preprocess(image.to_numpy()) for image in df["image"]])
    # Fix fixed synthetic value for perf benchmark purpose
    df["label"] = df["label"].map(lambda _: 1)
    return df
示例#13
0
def test_tensor_array_reductions():
    outer_dim = 3
    inner_shape = (2, 2, 2)
    shape = (outer_dim,) + inner_shape
    num_items = np.prod(np.array(shape))
    arr = np.arange(num_items).reshape(shape)

    df = pd.DataFrame({"one": list(range(outer_dim)), "two": TensorArray(arr)})

    # Reduction tests, using NumPy as the groundtruth.
    for name, reducer in TensorArray.SUPPORTED_REDUCERS.items():
        np_kwargs = {}
        if name in ("std", "var"):
            # Pandas uses a ddof default of 1 while NumPy uses 0.
            # Give NumPy a ddof kwarg of 1 in order to ensure equivalent
            # standard deviation calculations.
            np_kwargs["ddof"] = 1
        np.testing.assert_equal(df["two"].agg(name), reducer(arr, axis=0, **np_kwargs))
示例#14
0
def test_tensor_array_array_protocol():
    outer_dim = 3
    inner_shape = (2, 2, 2)
    shape = (outer_dim,) + inner_shape
    num_items = np.prod(np.array(shape))
    arr = np.arange(num_items).reshape(shape)

    t_arr = TensorArray(arr)

    np.testing.assert_array_equal(
        np.asarray(t_arr, dtype=np.float32), arr.astype(np.float32)
    )

    t_arr_elem = t_arr[0]

    np.testing.assert_array_equal(
        np.asarray(t_arr_elem, dtype=np.float32), arr[0].astype(np.float32)
    )
示例#15
0
def test_tensor_array_ops():
    outer_dim = 3
    inner_shape = (2, 2, 2)
    shape = (outer_dim,) + inner_shape
    num_items = np.prod(np.array(shape))
    arr = np.arange(num_items).reshape(shape)

    df = pd.DataFrame({"one": [1, 2, 3], "two": TensorArray(arr)})

    def apply_arithmetic_ops(arr):
        return 2 * (arr + 1) / 3

    def apply_comparison_ops(arr):
        return arr % 2 == 0

    def apply_logical_ops(arr):
        return arr & (3 * arr) | (5 * arr)

    # Op tests, using NumPy as the groundtruth.
    np.testing.assert_equal(apply_arithmetic_ops(arr), apply_arithmetic_ops(df["two"]))

    np.testing.assert_equal(apply_comparison_ops(arr), apply_comparison_ops(df["two"]))

    np.testing.assert_equal(apply_logical_ops(arr), apply_logical_ops(df["two"]))
示例#16
0
 def untensorize(torch_tensor):
     numpy_array = torch_tensor.cpu().detach().numpy()
     return TensorArray(numpy_array)