Exemplo n.º 1
0
def test_scalar_add_invalid(dtype_l, dtype_r):
    test_value = 1

    lval_gpu = cudf.Scalar(test_value, dtype=dtype_l)
    rval_gpu = cudf.Scalar(test_value, dtype=dtype_r)

    with pytest.raises(TypeError):
        lval_gpu + rval_gpu
Exemplo n.º 2
0
def test_boolean_scalar_binop(op):
    psr = pd.Series(np.random.choice([True, False], 10))
    gsr = cudf.from_pandas(psr)
    utils.assert_eq(op(psr, True), op(gsr, True))
    utils.assert_eq(op(psr, False), op(gsr, False))

    # cuDF scalar
    utils.assert_eq(op(psr, True), op(gsr, cudf.Scalar(True)))
    utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False)))
Exemplo n.º 3
0
def test_scalar_dtype_and_validity_decimal(slr, dtype, expect):
    if expect is pa.lib.ArrowInvalid:
        with pytest.raises(expect):
            cudf.Scalar(slr, dtype=dtype)
        return
    else:
        result = cudf.Scalar(slr, dtype=dtype)
        assert result.dtype == dtype
        assert result.is_valid
Exemplo n.º 4
0
def test_null_scalar(dtype):
    if isinstance(dtype, cudf.Decimal64Dtype):
        with pytest.raises(NotImplementedError):
            s = cudf.Scalar(None, dtype=dtype)
        return

    s = cudf.Scalar(None, dtype=dtype)
    assert s.value is cudf.NA
    assert s.dtype == np.dtype(dtype)
    assert s.is_valid() is False
Exemplo n.º 5
0
def test_scalar_dtype_and_validity(dtype):
    if isinstance(dtype, cudf.Decimal64Dtype):
        with pytest.raises(NotImplementedError):
            s = cudf.Scalar(None, dtype=dtype)
        return

    s = cudf.Scalar(1, dtype=dtype)

    assert s.dtype == np.dtype(dtype)
    assert s.is_valid() is True
Exemplo n.º 6
0
 def __setitem__(self, key, value):
     if isinstance(value, list):
         value = cudf.Scalar(value)
     if isinstance(value, cudf.Scalar):
         if value.dtype != self.dtype:
             raise TypeError("list nesting level mismatch")
     elif value is cudf.NA:
         value = cudf.Scalar(value, dtype=self.dtype)
     else:
         raise ValueError(f"Can not set {value} into ListColumn")
     super().__setitem__(key, value)
Exemplo n.º 7
0
def test_construct_from_scalar(value):
    value = cudf.utils.utils.to_cudf_compatible_scalar(value)
    x = cudf.Scalar(value, value.dtype)
    y = cudf.Scalar(x)
    assert x.value == y.value or np.isnan(x.value) and np.isnan(y.value)

    # check that this works:
    y.device_value

    x._is_host_value_current == y._is_host_value_current
    x._is_device_value_current == y._is_device_value_current
Exemplo n.º 8
0
def test_scalar_difference(dtype_l, dtype_r):
    test_value = 1

    lval_host = dtype_scalar(test_value, dtype=dtype_l)
    rval_host = dtype_scalar(test_value, dtype=dtype_r)

    lval_gpu = cudf.Scalar(test_value, dtype=dtype_l)
    rval_gpu = cudf.Scalar(test_value, dtype=dtype_r)

    expect = lval_host - rval_host
    got = lval_gpu - rval_gpu

    assert expect == got.value
    assert expect.dtype == got.dtype
Exemplo n.º 9
0
def test_scalar_floordiv(dtype_l, dtype_r):
    test_value = 1

    lval_host = dtype_scalar(test_value, dtype=dtype_l)
    rval_host = dtype_scalar(test_value, dtype=dtype_r)

    lval_gpu = cudf.Scalar(test_value, dtype=dtype_l)
    rval_gpu = cudf.Scalar(test_value, dtype=dtype_r)

    expect = lval_host // rval_host
    got = lval_gpu // rval_gpu

    assert expect == got.value
    assert expect.dtype == got.dtype
Exemplo n.º 10
0
def test_scalar_power(dtype_l, dtype_r):
    test_value = 1

    lval_host = dtype_scalar(test_value, dtype=dtype_l)
    rval_host = dtype_scalar(test_value, dtype=dtype_r)

    lval_gpu = cudf.Scalar(test_value, dtype=dtype_l)
    rval_gpu = cudf.Scalar(test_value, dtype=dtype_r)

    expect = lval_host**rval_host
    got = lval_gpu**rval_gpu

    assert expect == got.value
    assert expect.dtype == got.dtype
Exemplo n.º 11
0
def _check_and_cast_columns_with_other(
    source_col: ColumnBase,
    other: Union[ScalarLike, ColumnBase],
    inplace: bool,
) -> Tuple[ColumnBase, Union[ScalarLike, ColumnBase]]:
    """
    Returns type-casted column `source_col` & scalar `other_scalar`
    based on `inplace` parameter.
    """
    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
        return source_col, other

    if cudf.utils.dtypes.is_scalar(other):
        device_obj = _normalize_scalars(source_col, other)
    else:
        device_obj = other

    if other is None:
        return source_col, device_obj
    elif cudf.utils.dtypes.is_mixed_with_object_dtype(device_obj, source_col):
        raise TypeError("cudf does not support mixed types, please type-cast "
                        "the column of dataframe/series and other "
                        "to same dtypes.")
    if inplace:
        if not cudf.utils.dtypes._can_cast(device_obj.dtype, source_col.dtype):
            warnings.warn(
                f"Type-casting from {device_obj.dtype} "
                f"to {source_col.dtype}, there could be potential data loss")
        return source_col, device_obj.astype(source_col.dtype)
    else:
        if (cudf.utils.dtypes.is_scalar(other)
                and cudf.utils.dtypes.is_numerical_dtype(source_col.dtype)
                and cudf.utils.dtypes._can_cast(other, source_col.dtype)):
            common_dtype = source_col.dtype
            return (
                source_col.astype(common_dtype),
                cudf.Scalar(other, dtype=common_dtype),
            )
        else:
            common_dtype = cudf.utils.dtypes.find_common_type([
                source_col.dtype,
                np.min_scalar_type(other)
                if cudf.utils.dtypes.is_scalar(other) else other.dtype,
            ])
            if cudf.utils.dtypes.is_scalar(device_obj):
                device_obj = cudf.Scalar(other, dtype=common_dtype)
            else:
                device_obj = device_obj.astype(common_dtype)
            return source_col.astype(common_dtype), device_obj
Exemplo n.º 12
0
def test_scalar_add(dtype_l, dtype_r):
    test_value = 1

    lval_host = dtype_scalar(test_value, dtype=dtype_l)
    rval_host = dtype_scalar(test_value, dtype=dtype_r)

    lval_gpu = cudf.Scalar(test_value, dtype=dtype_l)
    rval_gpu = cudf.Scalar(test_value, dtype=dtype_r)

    # expect = np.add(lval_host, rval_host)
    expect = lval_host + rval_host
    got = lval_gpu + rval_gpu

    assert expect == got.value
    if not dtype_l == dtype_r == "str":
        assert expect.dtype == got.dtype
Exemplo n.º 13
0
    def fillna(
        self, value: Any = None, method: str = None, dtype: Dtype = None
    ):
        """Fill null values with ``value``.

        Returns a copy with null filled.
        """
        if isinstance(value, (int, Decimal)):
            value = cudf.Scalar(value, dtype=self.dtype)
        elif (
            isinstance(value, Decimal64Column)
            or isinstance(value, cudf.core.column.NumericalColumn)
            and is_integer_dtype(value.dtype)
        ):
            value = value.astype(self.dtype)
        else:
            raise TypeError(
                "Decimal columns only support using fillna with decimal and "
                "integer values"
            )

        result = libcudf.replace.replace_nulls(
            input_col=self, replacement=value, method=method, dtype=dtype
        )
        return result._with_type_metadata(self.dtype)
Exemplo n.º 14
0
    def _binary_op_truediv(
        self, rhs: BinaryOperand
    ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]:
        lhs = self  # type: column.ColumnBase
        if pd.api.types.is_timedelta64_dtype(rhs.dtype):
            common_dtype = determine_out_dtype(self.dtype, rhs.dtype)
            lhs = lhs.astype(common_dtype).astype("float64")
            if isinstance(rhs, cudf.Scalar):
                if rhs.is_valid():
                    rhs = rhs.value.astype(common_dtype).astype("float64")
                else:
                    rhs = cudf.Scalar(None, "float64")
            else:
                rhs = rhs.astype(common_dtype).astype("float64")

            out_dtype = np.dtype("float64")
        elif rhs.dtype.kind in ("f", "i", "u"):
            out_dtype = self.dtype
        else:
            raise TypeError(
                f"Division of {self.dtype} with {rhs.dtype} "
                f"cannot be performed."
            )

        return lhs, rhs, out_dtype
Exemplo n.º 15
0
def test_list_scalar_host_construction_null(elem_type, nesting_level):
    dtype = cudf.ListDtype(elem_type)
    for level in range(nesting_level - 1):
        dtype = cudf.ListDtype(dtype)

    slr = cudf.Scalar(None, dtype=dtype)
    assert slr.value is cudf.NA
Exemplo n.º 16
0
def test_scalar_host_initialization(value):
    s = cudf.Scalar(value)

    np.testing.assert_equal(s.value, value)
    assert s.is_valid() is True
    assert s._is_host_value_current
    assert not s._is_device_value_current
Exemplo n.º 17
0
def test_scalar_logical():
    T = cudf.Scalar(True)
    F = cudf.Scalar(False)

    assert T
    assert not F

    assert T and T
    assert not (T and F)
    assert not (F and T)
    assert not (F and F)

    assert T or T
    assert T or F
    assert F or T
    assert not (F or F)
Exemplo n.º 18
0
    def fillna(self, fill_value=None, method=None):
        """
        Fill null values with *fill_value*
        """
        if method is not None:
            return super().fillna(fill_value, method)

        if (isinstance(fill_value, cudf.Scalar)
                and fill_value.dtype == self.dtype):
            return super().fillna(fill_value, method)
        if np.isscalar(fill_value):
            # castsafely to the same dtype as self
            fill_value_casted = self.dtype.type(fill_value)
            if not np.isnan(fill_value) and (fill_value_casted != fill_value):
                raise TypeError(
                    f"Cannot safely cast non-equivalent "
                    f"{type(fill_value).__name__} to {self.dtype.name}")
            fill_value = cudf.Scalar(fill_value_casted)
        else:
            fill_value = column.as_column(fill_value, nan_as_null=False)
            # cast safely to the same dtype as self
            if is_integer_dtype(self.dtype):
                fill_value = _safe_cast_to_int(fill_value, self.dtype)
            else:
                fill_value = fill_value.astype(self.dtype)

        return super().fillna(fill_value, method)
Exemplo n.º 19
0
    def contains(self, search_key: ScalarLike) -> ParentType:
        """
        Returns boolean values indicating whether the specified scalar
        is an element of each row.

        Parameters
        ----------
        search_key : scalar
            element being searched for in each row of the list column

        Returns
        -------
        Series or Index

        Examples
        --------
        >>> s = cudf.Series([[1, 2, 3], [3, 4, 5], [4, 5, 6]])
        >>> s.list.contains(4)
        Series([False, True, True])
        dtype: bool
        """
        search_key = cudf.Scalar(search_key)
        try:
            res = self._return_or_inplace(
                contains_scalar(self._column, search_key))
        except RuntimeError as e:
            if ("Type/Scale of search key does not"
                    "match list column element type" in str(e)):
                raise TypeError("Type/Scale of search key does not"
                                "match list column element type") from e
            raise
        else:
            return res
Exemplo n.º 20
0
def test_timedelta_index_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
    gtdi = cudf.Index(data=data, dtype=dtype)
    ptdi = gtdi.to_pandas()

    gpu_scalar = cudf.Scalar(cpu_scalar)

    if op == "add":
        expected = ptdi + cpu_scalar
        actual = gtdi + gpu_scalar
    elif op == "sub":
        expected = ptdi - cpu_scalar
        actual = gtdi - gpu_scalar
    elif op == "truediv":
        expected = ptdi / cpu_scalar
        actual = gtdi / gpu_scalar
    elif op == "floordiv":
        expected = ptdi // cpu_scalar
        actual = gtdi // gpu_scalar

    assert_eq(expected, actual)

    if op == "add":
        expected = cpu_scalar + ptdi
        actual = gpu_scalar + gtdi
    elif op == "sub":
        expected = cpu_scalar - ptdi
        actual = gpu_scalar - gtdi
    elif op == "truediv":
        expected = cpu_scalar / ptdi
        actual = gpu_scalar / gtdi
    elif op == "floordiv":
        expected = cpu_scalar // ptdi
        actual = gpu_scalar // gtdi

    assert_eq(expected, actual)
Exemplo n.º 21
0
def test_scalar_no_negative_bools():
    x = cudf.Scalar(True)
    with pytest.raises(
            TypeError,
            match=re.escape("Boolean scalars in cuDF do not "
                            "support negation, use logical not"),
    ):
        -x
Exemplo n.º 22
0
    def seconds(self) -> "cudf.core.column.NumericalColumn":
        """
        Number of seconds (>= 0 and less than 1 day).

        Returns
        -------
        NumericalColumn
        """
        # This property must return the number of seconds (>= 0 and
        # less than 1 day) for each element, hence first performing
        # mod operation to remove the number of days and then performing
        # division operation to extract the number of seconds.

        return (self % cudf.Scalar(
            np.timedelta64(
                _numpy_to_pandas_conversion["D"], "ns"))) // cudf.Scalar(
                    np.timedelta64(_numpy_to_pandas_conversion["s"], "ns"))
Exemplo n.º 23
0
    def __setitem__(self, key, value):
        if isinstance(value, dict):
            # filling in fields not in dict
            for field in self.dtype.fields:
                value[field] = value.get(field, cudf.NA)

            value = cudf.Scalar(value, self.dtype)
        super().__setitem__(key, value)
Exemplo n.º 24
0
 def normalize_binop_value(self, other):
     if is_scalar(other) and isinstance(other, (int, np.int, Decimal)):
         return cudf.Scalar(Decimal(other))
     elif isinstance(other, cudf.Scalar) and isinstance(
             other.dtype, cudf.core.dtypes.DecimalDtype):
         return other
     else:
         raise TypeError(f"cannot normalize {type(other)}")
Exemplo n.º 25
0
def test_datetime_series_ops_with_cudf_scalars(data, scalar, dtype, op):
    gsr = cudf.Series(data=data, dtype=dtype)
    psr = gsr.to_pandas()

    expect = op(psr, scalar)
    got = op(gsr, cudf.Scalar(scalar))

    assert_eq(expect, got)
Exemplo n.º 26
0
    def nanoseconds(self) -> "cudf.core.column.NumericalColumn":
        """
        Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.

        Returns
        -------
        NumericalColumn
        """
        # This property must return the number of nanoseconds (>= 0 and
        # less than 1 microsecond) for each element, hence first performing
        # mod operation to remove the number of microseconds and then
        # performing division operation to extract the number
        # of nanoseconds.

        return (self % cudf.Scalar(
            np.timedelta64(
                _numpy_to_pandas_conversion["us"], "ns"))) // cudf.Scalar(
                    np.timedelta64(_numpy_to_pandas_conversion["ns"], "ns"))
Exemplo n.º 27
0
def cast_to_appropriate_cudf_type(val):
    # TODO Handle scalar
    if val.ndim == 0:
        return cudf.Scalar(val).value
    # 1D array
    elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1):
        return cudf.Series(val)
    else:
        return NotImplemented
Exemplo n.º 28
0
def test_series_iloc_defer_cudf_scalar():
    ps = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "c"]))
    gs = cudf.from_pandas(ps)

    for t in index_dtypes:
        arg = cudf.Scalar(1, dtype=t)
        got = gs[arg]
        expect = 2
        assert_eq(expect, got)
Exemplo n.º 29
0
def test_null_scalar(dtype):
    s = cudf.Scalar(None, dtype=dtype)
    assert s.value is cudf.NA
    assert s.dtype == (
        cudf.dtype(dtype)
        if not isinstance(dtype, cudf.core.dtypes.DecimalDtype)
        else dtype
    )
    assert s.is_valid() is False
Exemplo n.º 30
0
    def days(self) -> "cudf.core.column.NumericalColumn":
        """
        Number of days for each element.

        Returns
        -------
        NumericalColumn
        """
        return self // cudf.Scalar(
            np.timedelta64(_numpy_to_pandas_conversion["D"], "ns"))