def method(self, other): from pandas.arrays import BooleanArray assert op.__name__ in ops.ARITHMETIC_BINOPS | ops.COMPARISON_BINOPS if isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame)): return NotImplemented elif isinstance(other, cls): other = other._ndarray mask = isna(self) | isna(other) valid = ~mask if not lib.is_scalar(other): if len(other) != len(self): # prevent improper broadcasting when other is 2D raise ValueError( f"Lengths of operands do not match: {len(self)} != {len(other)}" ) other = np.asarray(other) other = other[valid] if op.__name__ in ops.ARITHMETIC_BINOPS: result = np.empty_like(self._ndarray, dtype="object") result[mask] = StringDtype.na_value result[valid] = op(self._ndarray[valid], other) return StringArray(result) else: # logical result = np.zeros(len(self._ndarray), dtype="bool") result[valid] = op(self._ndarray[valid], other) return BooleanArray(result, mask)
def test_to_boolean_array_all_none(): expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True])) result = pd.array([None, None, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean") tm.assert_extension_array_equal(result, expected)
def test_boolean_array_constructor(): values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") result = BooleanArray(values, mask) expected = pd.array([True, False, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError, match="values should be boolean numpy array"): BooleanArray(values.tolist(), mask) with pytest.raises(TypeError, match="mask should be boolean numpy array"): BooleanArray(values, mask.tolist()) with pytest.raises(TypeError, match="values should be boolean numpy array"): BooleanArray(values.astype(int), mask) with pytest.raises(TypeError, match="mask should be boolean numpy array"): BooleanArray(values, None) with pytest.raises(ValueError, match="values must be a 1D array"): BooleanArray(values.reshape(1, -1), mask) with pytest.raises(ValueError, match="mask must be a 1D array"): BooleanArray(values, mask.reshape(1, -1))
def test_coerce_to_array_from_boolean_array(): # passing BooleanArray to coerce_to_array values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") arr = BooleanArray(values, mask) result = BooleanArray(*coerce_to_array(arr)) tm.assert_extension_array_equal(result, arr) # no copy assert result._data is arr._data assert result._mask is arr._mask result = BooleanArray(*coerce_to_array(arr), copy=True) tm.assert_extension_array_equal(result, arr) assert result._data is not arr._data assert result._mask is not arr._mask with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): coerce_to_array(arr, mask=mask)
def test_to_boolean_array(): expected = BooleanArray(np.array([True, False, True]), np.array([False, False, False])) result = pd.array([True, False, True], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([True, False, True]), dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") tm.assert_extension_array_equal(result, expected) # with missing values expected = BooleanArray(np.array([True, False, True]), np.array([False, False, True])) result = pd.array([True, False, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") tm.assert_extension_array_equal(result, expected)
def _cmp_method(self, other, op): from pandas.arrays import BooleanArray pc_func = ARROW_CMP_FUNCS[op.__name__] if isinstance(other, ArrowStringArray): result = pc_func(self._data, other._data) elif isinstance(other, np.ndarray): result = pc_func(self._data, other) elif is_scalar(other): try: result = pc_func(self._data, pa.scalar(other)) except (pa.lib.ArrowNotImplementedError, pa.lib.ArrowInvalid): mask = isna(self) | isna(other) valid = ~mask result = np.zeros(len(self), dtype="bool") result[valid] = op(np.array(self)[valid], other) return BooleanArray(result, mask) else: return NotImplemented # TODO(ARROW-9429): Add a .to_numpy() to ChunkedArray return BooleanArray._from_sequence(result.to_pandas().values)
def _cmp_method(self, other, op): from pandas.arrays import ( BooleanArray, IntegerArray, ) mask = None if isinstance(other, (BooleanArray, IntegerArray, FloatingArray)): other, mask = other._data, other._mask elif is_list_like(other): other = np.asarray(other) if other.ndim > 1: raise NotImplementedError( "can only perform ops with 1-d structures") if other is libmissing.NA: # numpy does not handle pd.NA well as "other" scalar (it returns # a scalar False instead of an array) # This may be fixed by NA.__array_ufunc__. Revisit this check # once that's implemented. result = np.zeros(self._data.shape, dtype="bool") mask = np.ones(self._data.shape, dtype="bool") else: with warnings.catch_warnings(): # numpy may show a FutureWarning: # elementwise comparison failed; returning scalar instead, # but in the future will perform elementwise comparison # before returning NotImplemented. We fall back to the correct # behavior today, so that should be fine to ignore. warnings.filterwarnings("ignore", "elementwise", FutureWarning) with np.errstate(all="ignore"): method = getattr(self._data, f"__{op.__name__}__") result = method(other) if result is NotImplemented: result = invalid_comparison(self._data, other, op) # nans propagate if mask is None: mask = self._mask.copy() else: mask = self._mask | mask return BooleanArray(result, mask)
def test_scalar(self, other, all_compare_operators): op = self.get_op_from_name(all_compare_operators) a = pd.array([True, False, None], dtype="boolean") result = op(a, other) if other is pd.NA: expected = pd.array([None, None, None], dtype="boolean") else: values = op(a._data, other) expected = BooleanArray(values, a._mask, copy=True) tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace result[0] = None tm.assert_extension_array_equal( a, pd.array([True, False, None], dtype="boolean"))
def test_coerce_to_array(): # TODO this is currently not public API values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") result = BooleanArray(*coerce_to_array(values, mask=mask)) expected = BooleanArray(values, mask) tm.assert_extension_array_equal(result, expected) assert result._data is values assert result._mask is mask result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) expected = BooleanArray(values, mask) tm.assert_extension_array_equal(result, expected) assert result._data is not values assert result._mask is not mask # mixed missing from values and mask values = [True, False, None, False] mask = np.array([False, False, False, True], dtype="bool") result = BooleanArray(*coerce_to_array(values, mask=mask)) expected = BooleanArray(np.array([True, False, True, True]), np.array([False, False, True, True])) tm.assert_extension_array_equal(result, expected) result = BooleanArray( *coerce_to_array(np.array(values, dtype=object), mask=mask)) tm.assert_extension_array_equal(result, expected) result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) tm.assert_extension_array_equal(result, expected) # raise errors for wrong dimension values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") # passing 2D values is OK as long as no mask coerce_to_array(values.reshape(1, -1)) with pytest.raises(ValueError, match="values.shape and mask.shape must match"): coerce_to_array(values.reshape(1, -1), mask=mask) with pytest.raises(ValueError, match="values.shape and mask.shape must match"): coerce_to_array(values, mask=mask.reshape(1, -1))
def test_array(self, all_compare_operators): op = self.get_op_from_name(all_compare_operators) a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") b = pd.array([True, False, None] * 3, dtype="boolean") result = op(a, b) values = op(a._data, b._data) mask = a._mask | b._mask expected = BooleanArray(values, mask) tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace result[0] = None tm.assert_extension_array_equal( a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")) tm.assert_extension_array_equal( b, pd.array([True, False, None] * 3, dtype="boolean"))
def test_to_boolean_array_from_strings_invalid_string(): with pytest.raises(ValueError, match="cannot be cast"): BooleanArray._from_sequence_of_strings(["donkey"])
# Interval ( [pd.Interval(1, 2), pd.Interval(3, 4)], "interval", IntervalArray.from_tuples([(1, 2), (3, 4)]), ), # Sparse ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), # IntegerNA ([1, None], "Int16", integer_array([1, None], dtype="Int16")), (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # String (["a", None], "string", StringArray._from_sequence(["a", None])), (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None])), # Boolean ([True, None], "boolean", BooleanArray._from_sequence([True, None])), ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None])), # Index (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), # Series[EA] returns the EA ( pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), None, pd.Categorical(["a", "b"], categories=["a", "b", "c"]), ), # "3rd party" EAs work ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])), # pass an ExtensionArray, but a different dtype ( period_array(["2000", "2001"], freq="D"), "category",