Пример #1
0
        def method(self, other):
            from pandas.arrays import BooleanArray

            assert op.__name__ in ops.ARITHMETIC_BINOPS | ops.COMPARISON_BINOPS

            if isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame)):
                return NotImplemented

            elif isinstance(other, cls):
                other = other._ndarray

            mask = isna(self) | isna(other)
            valid = ~mask

            if not lib.is_scalar(other):
                if len(other) != len(self):
                    # prevent improper broadcasting when other is 2D
                    raise ValueError(
                        f"Lengths of operands do not match: {len(self)} != {len(other)}"
                    )

                other = np.asarray(other)
                other = other[valid]

            if op.__name__ in ops.ARITHMETIC_BINOPS:
                result = np.empty_like(self._ndarray, dtype="object")
                result[mask] = StringDtype.na_value
                result[valid] = op(self._ndarray[valid], other)
                return StringArray(result)
            else:
                # logical
                result = np.zeros(len(self._ndarray), dtype="bool")
                result[valid] = op(self._ndarray[valid], other)
                return BooleanArray(result, mask)
Пример #2
0
def test_to_boolean_array_all_none():
    expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))

    result = pd.array([None, None, None], dtype="boolean")
    tm.assert_extension_array_equal(result, expected)
    result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
    tm.assert_extension_array_equal(result, expected)
Пример #3
0
def test_boolean_array_constructor():
    values = np.array([True, False, True, False], dtype="bool")
    mask = np.array([False, False, False, True], dtype="bool")

    result = BooleanArray(values, mask)
    expected = pd.array([True, False, True, None], dtype="boolean")
    tm.assert_extension_array_equal(result, expected)

    with pytest.raises(TypeError,
                       match="values should be boolean numpy array"):
        BooleanArray(values.tolist(), mask)

    with pytest.raises(TypeError, match="mask should be boolean numpy array"):
        BooleanArray(values, mask.tolist())

    with pytest.raises(TypeError,
                       match="values should be boolean numpy array"):
        BooleanArray(values.astype(int), mask)

    with pytest.raises(TypeError, match="mask should be boolean numpy array"):
        BooleanArray(values, None)

    with pytest.raises(ValueError, match="values must be a 1D array"):
        BooleanArray(values.reshape(1, -1), mask)

    with pytest.raises(ValueError, match="mask must be a 1D array"):
        BooleanArray(values, mask.reshape(1, -1))
Пример #4
0
def test_coerce_to_array_from_boolean_array():
    # passing BooleanArray to coerce_to_array
    values = np.array([True, False, True, False], dtype="bool")
    mask = np.array([False, False, False, True], dtype="bool")
    arr = BooleanArray(values, mask)
    result = BooleanArray(*coerce_to_array(arr))
    tm.assert_extension_array_equal(result, arr)
    # no copy
    assert result._data is arr._data
    assert result._mask is arr._mask

    result = BooleanArray(*coerce_to_array(arr), copy=True)
    tm.assert_extension_array_equal(result, arr)
    assert result._data is not arr._data
    assert result._mask is not arr._mask

    with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
        coerce_to_array(arr, mask=mask)
Пример #5
0
def test_to_boolean_array():
    expected = BooleanArray(np.array([True, False, True]),
                            np.array([False, False, False]))

    result = pd.array([True, False, True], dtype="boolean")
    tm.assert_extension_array_equal(result, expected)
    result = pd.array(np.array([True, False, True]), dtype="boolean")
    tm.assert_extension_array_equal(result, expected)
    result = pd.array(np.array([True, False, True], dtype=object),
                      dtype="boolean")
    tm.assert_extension_array_equal(result, expected)

    # with missing values
    expected = BooleanArray(np.array([True, False, True]),
                            np.array([False, False, True]))

    result = pd.array([True, False, None], dtype="boolean")
    tm.assert_extension_array_equal(result, expected)
    result = pd.array(np.array([True, False, None], dtype=object),
                      dtype="boolean")
    tm.assert_extension_array_equal(result, expected)
Пример #6
0
    def _cmp_method(self, other, op):
        from pandas.arrays import BooleanArray

        pc_func = ARROW_CMP_FUNCS[op.__name__]
        if isinstance(other, ArrowStringArray):
            result = pc_func(self._data, other._data)
        elif isinstance(other, np.ndarray):
            result = pc_func(self._data, other)
        elif is_scalar(other):
            try:
                result = pc_func(self._data, pa.scalar(other))
            except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid):
                mask = isna(self) | isna(other)
                valid = ~mask
                result = np.zeros(len(self), dtype="bool")
                result[valid] = op(np.array(self)[valid], other)
                return BooleanArray(result, mask)
        else:
            return NotImplemented

        # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray
        return BooleanArray._from_sequence(result.to_pandas().values)
Пример #7
0
    def _cmp_method(self, other, op):
        from pandas.arrays import (
            BooleanArray,
            IntegerArray,
        )

        mask = None

        if isinstance(other, (BooleanArray, IntegerArray, FloatingArray)):
            other, mask = other._data, other._mask

        elif is_list_like(other):
            other = np.asarray(other)
            if other.ndim > 1:
                raise NotImplementedError(
                    "can only perform ops with 1-d structures")

        if other is libmissing.NA:
            # numpy does not handle pd.NA well as "other" scalar (it returns
            # a scalar False instead of an array)
            # This may be fixed by NA.__array_ufunc__. Revisit this check
            # once that's implemented.
            result = np.zeros(self._data.shape, dtype="bool")
            mask = np.ones(self._data.shape, dtype="bool")
        else:
            with warnings.catch_warnings():
                # numpy may show a FutureWarning:
                #     elementwise comparison failed; returning scalar instead,
                #     but in the future will perform elementwise comparison
                # before returning NotImplemented. We fall back to the correct
                # behavior today, so that should be fine to ignore.
                warnings.filterwarnings("ignore", "elementwise", FutureWarning)
                with np.errstate(all="ignore"):
                    method = getattr(self._data, f"__{op.__name__}__")
                    result = method(other)

                if result is NotImplemented:
                    result = invalid_comparison(self._data, other, op)

        # nans propagate
        if mask is None:
            mask = self._mask.copy()
        else:
            mask = self._mask | mask

        return BooleanArray(result, mask)
Пример #8
0
    def test_scalar(self, other, all_compare_operators):
        op = self.get_op_from_name(all_compare_operators)
        a = pd.array([True, False, None], dtype="boolean")

        result = op(a, other)

        if other is pd.NA:
            expected = pd.array([None, None, None], dtype="boolean")
        else:
            values = op(a._data, other)
            expected = BooleanArray(values, a._mask, copy=True)
        tm.assert_extension_array_equal(result, expected)

        # ensure we haven't mutated anything inplace
        result[0] = None
        tm.assert_extension_array_equal(
            a, pd.array([True, False, None], dtype="boolean"))
Пример #9
0
def test_coerce_to_array():
    # TODO this is currently not public API
    values = np.array([True, False, True, False], dtype="bool")
    mask = np.array([False, False, False, True], dtype="bool")
    result = BooleanArray(*coerce_to_array(values, mask=mask))
    expected = BooleanArray(values, mask)
    tm.assert_extension_array_equal(result, expected)
    assert result._data is values
    assert result._mask is mask
    result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
    expected = BooleanArray(values, mask)
    tm.assert_extension_array_equal(result, expected)
    assert result._data is not values
    assert result._mask is not mask

    # mixed missing from values and mask
    values = [True, False, None, False]
    mask = np.array([False, False, False, True], dtype="bool")
    result = BooleanArray(*coerce_to_array(values, mask=mask))
    expected = BooleanArray(np.array([True, False, True, True]),
                            np.array([False, False, True, True]))
    tm.assert_extension_array_equal(result, expected)
    result = BooleanArray(
        *coerce_to_array(np.array(values, dtype=object), mask=mask))
    tm.assert_extension_array_equal(result, expected)
    result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
    tm.assert_extension_array_equal(result, expected)

    # raise errors for wrong dimension
    values = np.array([True, False, True, False], dtype="bool")
    mask = np.array([False, False, False, True], dtype="bool")

    # passing 2D values is OK as long as no mask
    coerce_to_array(values.reshape(1, -1))

    with pytest.raises(ValueError,
                       match="values.shape and mask.shape must match"):
        coerce_to_array(values.reshape(1, -1), mask=mask)

    with pytest.raises(ValueError,
                       match="values.shape and mask.shape must match"):
        coerce_to_array(values, mask=mask.reshape(1, -1))
Пример #10
0
    def test_array(self, all_compare_operators):
        op = self.get_op_from_name(all_compare_operators)
        a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")
        b = pd.array([True, False, None] * 3, dtype="boolean")

        result = op(a, b)

        values = op(a._data, b._data)
        mask = a._mask | b._mask
        expected = BooleanArray(values, mask)
        tm.assert_extension_array_equal(result, expected)

        # ensure we haven't mutated anything inplace
        result[0] = None
        tm.assert_extension_array_equal(
            a, pd.array([True] * 3 + [False] * 3 + [None] * 3,
                        dtype="boolean"))
        tm.assert_extension_array_equal(
            b, pd.array([True, False, None] * 3, dtype="boolean"))
Пример #11
0
def test_to_boolean_array_from_strings_invalid_string():
    with pytest.raises(ValueError, match="cannot be cast"):
        BooleanArray._from_sequence_of_strings(["donkey"])
Пример #12
0
 # Interval
 (
     [pd.Interval(1, 2), pd.Interval(3, 4)],
     "interval",
     IntervalArray.from_tuples([(1, 2), (3, 4)]),
 ),
 # Sparse
 ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")),
 # IntegerNA
 ([1, None], "Int16", integer_array([1, None], dtype="Int16")),
 (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # String
 (["a", None], "string", StringArray._from_sequence(["a", None])),
 (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])),
 # Boolean
 ([True, None], "boolean", BooleanArray._from_sequence([True, None])),
 ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])),
 # Index
 (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))),
 # Series[EA] returns the EA
 (
     pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])),
     None,
     pd.Categorical(["a", "b"], categories=["a", "b", "c"]),
 ),
 # "3rd party" EAs work
 ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])),
 # pass an ExtensionArray, but a different dtype
 (
     period_array(["2000", "2001"], freq="D"),
     "category",