def test_basic_binop(): # Just a basic smoke test. The EA interface tests exercise this # more thoroughly. x = PandasArray(np.array([1, 2, 3])) result = x + x expected = PandasArray(np.array([2, 4, 6])) tm.assert_extension_array_equal(result, expected)
def test_extension_array(self): # a = array([1, 3, np.nan, 2], dtype='Int64') a = array([1, 3, 2], dtype='Int64') result = safe_sort(a) # expected = array([1, 2, 3, np.nan], dtype='Int64') expected = array([1, 2, 3], dtype='Int64') tm.assert_extension_array_equal(result, expected)
def test_pow(self): # https://github.com/pandas-dev/pandas/issues/22022 a = integer_array([1, np.nan, np.nan, 1]) b = integer_array([1, np.nan, 1, np.nan]) result = a ** b expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1]) tm.assert_extension_array_equal(result, expected)
def test_extension_array_labels(self, verify, na_sentinel): a = array([1, 3, 2], dtype='Int64') result, labels = safe_sort(a, [0, 1, na_sentinel, 2], na_sentinel=na_sentinel, verify=verify) expected_values = array([1, 2, 3], dtype='Int64') expected_labels = np.array([0, 2, na_sentinel, 1], dtype=np.intp) tm.assert_extension_array_equal(result, expected_values) tm.assert_numpy_array_equal(labels, expected_labels)
def test_grouping_grouper(self, data_for_grouping): df = pd.DataFrame({ "A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping }) gr1 = df.groupby("A").grouper.groupings[0] gr2 = df.groupby("B").grouper.groupings[0] tm.assert_numpy_array_equal(gr1.grouper, df.A.values) tm.assert_extension_array_equal(gr2.grouper, data_for_grouping)
def test_assert_extension_array_equal_non_extension_array(side): numpy_array = np.arange(5) extension_array = SparseArray(numpy_array) msg = "{side} is not an ExtensionArray".format(side=side) args = ((numpy_array, extension_array) if side == "left" else (extension_array, numpy_array)) with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(*args)
def test_set_na(self, left_right_dtypes): left, right = left_right_dtypes result = IntervalArray.from_arrays(left, right) result[0] = np.nan expected_left = Index([left._na_value] + list(left[1:])) expected_right = Index([right._na_value] + list(right[1:])) expected = IntervalArray.from_arrays(expected_left, expected_right) tm.assert_extension_array_equal(result, expected)
def test_non_extension_array(self): numpy_array = np.arange(5) extension_array = SparseArray(np.arange(5)) msg = 'left is not an ExtensionArray' with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(numpy_array, extension_array) msg = 'right is not an ExtensionArray' with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(extension_array, numpy_array)
def test_to_integer_array_float(): result = integer_array([1., 2.]) expected = integer_array([1, 2]) tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): integer_array([1.5, 2.]) # for float dtypes, the itemsize is not preserved result = integer_array(np.array([1., 2.], dtype='float32')) assert result.dtype == Int64Dtype()
def test_to_integer_array_float(): result = integer_array([1., 2.]) expected = integer_array([1, 2]) tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): integer_array([1.5, 2.]) # for float dtypes, the itemsize is not preserved result = integer_array(np.array([1., 2.], dtype='float32')) assert result.dtype == Int64Dtype()
def test_add_sequence(): a = pd.array(["a", "b", None, None], dtype="string") other = ["x", None, "y", None] result = a + other expected = pd.array(["ax", None, None, None], dtype="string") tm.assert_extension_array_equal(result, expected) result = other + a expected = pd.array(["xa", None, None, None], dtype="string") tm.assert_extension_array_equal(result, expected)
def test_divmod_array(reverse, expected_div, expected_mod): # https://github.com/pandas-dev/pandas/issues/22930 arr = to_decimal([1, 2, 3, 4]) if reverse: div, mod = divmod(2, arr) else: div, mod = divmod(arr, 2) expected_div = to_decimal(expected_div) expected_mod = to_decimal(expected_mod) tm.assert_extension_array_equal(div, expected_div) tm.assert_extension_array_equal(mod, expected_mod)
def test_divmod_array(reverse, expected_div, expected_mod): # https://github.com/pandas-dev/pandas/issues/22930 arr = to_decimal([1, 2, 3, 4]) if reverse: div, mod = divmod(2, arr) else: div, mod = divmod(arr, 2) expected_div = to_decimal(expected_div) expected_mod = to_decimal(expected_mod) tm.assert_extension_array_equal(div, expected_div) tm.assert_extension_array_equal(mod, expected_mod)
def test_ufuncs_unary(ufunc): a = pd.array([True, False, None], dtype="boolean") result = ufunc(a) expected = pd.array(ufunc(a._data), dtype="boolean") expected[a._mask] = np.nan tm.assert_extension_array_equal(result, expected) s = pd.Series(a) result = ufunc(s) expected = pd.Series(ufunc(a._data), dtype="boolean") expected[a._mask] = np.nan tm.assert_series_equal(result, expected)
def test_missing_values(self): left = SparseArray([np.nan, 1, 2, np.nan]) right = SparseArray([np.nan, 1, 2, 3]) msg = textwrap.dedent("""\ ExtensionArray NA mask are different ExtensionArray NA mask values are different \\(25\\.0 %\\) \\[left\\]: \\[True, False, False, True\\] \\[right\\]: \\[True, False, False, False\\]""") with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(left, right)
def test_comparison_methods_scalar(all_compare_operators): op_name = all_compare_operators a = pd.array(["a", None, "c"], dtype="string") other = "a" result = getattr(a, op_name)(other) expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object) expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) result = getattr(a, op_name)(pd.NA) expected = pd.array([None, None, None], dtype="boolean") tm.assert_extension_array_equal(result, expected)
def test_assert_extension_array_equal_missing_values(): arr1 = SparseArray([np.nan, 1, 2, np.nan]) arr2 = SparseArray([np.nan, 1, 2, 3]) msg = """\ ExtensionArray NA mask are different ExtensionArray NA mask values are different \\(25\\.0 %\\) \\[left\\]: \\[True, False, False, True\\] \\[right\\]: \\[True, False, False, False\\]""" with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(arr1, arr2)
def test_check_less_precise_fails(self, check_less_precise): left = SparseArray([0.5, 0.123456]) right = SparseArray([0.5, 0.123457]) msg = textwrap.dedent("""\ ExtensionArray are different ExtensionArray values are different \\(50\\.0 %\\) \\[left\\]: \\[0\\.5, 0\\.123456\\] \\[right\\]: \\[0\\.5, 0\\.123457\\]""") with pytest.raises(AssertionError, match=msg): assert_extension_array_equal( left, right, check_less_precise=check_less_precise)
def test_assert_extension_array_equal_missing_values(): arr1 = SparseArray([np.nan, 1, 2, np.nan]) arr2 = SparseArray([np.nan, 1, 2, 3]) msg = """\ ExtensionArray NA mask are different ExtensionArray NA mask values are different \\(25\\.0 %\\) \\[left\\]: \\[True, False, False, True\\] \\[right\\]: \\[True, False, False, False\\]""" with pytest.raises(AssertionError, match=msg): tm.assert_extension_array_equal(arr1, arr2)
def test_comparison_methods_array(all_compare_operators): op_name = all_compare_operators a = pd.array(["a", None, "c"], dtype="string") other = [None, None, "c"] result = getattr(a, op_name)(other) expected = np.empty_like(a, dtype="object") expected[-1] = getattr(other[-1], op_name)(a[-1]) expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) result = getattr(a, op_name)(pd.NA) expected = pd.array([None, None, None], dtype="boolean") tm.assert_extension_array_equal(result, expected)
def test_pow_scalar(self): a = pd.array([0, 1, None, 2], dtype="Int64") result = a**0 expected = pd.array([1, 1, 1, 1], dtype="Int64") tm.assert_extension_array_equal(result, expected) result = a**1 expected = pd.array([0, 1, None, 2], dtype="Int64") tm.assert_extension_array_equal(result, expected) # result = a ** pd.NA # expected = pd.array([None, 1, None, None], dtype="Int64") # tm.assert_extension_array_equal(result, expected) result = a**np.nan expected = np.array([np.nan, 1, np.nan, np.nan], dtype="float64") tm.assert_numpy_array_equal(result, expected) # reversed result = 0**a expected = pd.array([1, 0, None, 0], dtype="Int64") tm.assert_extension_array_equal(result, expected) result = 1**a expected = pd.array([1, 1, 1, 1], dtype="Int64") tm.assert_extension_array_equal(result, expected) # result = pd.NA ** a # expected = pd.array([1, None, None, None], dtype="Int64") # tm.assert_extension_array_equal(result, expected) result = np.nan**a expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64") tm.assert_numpy_array_equal(result, expected)
def test_fillna_preserves_tz(self, method): dti = pd.date_range('2000-01-01', periods=5, freq='D', tz='US/Central') arr = DatetimeArray(dti, copy=True) arr[2] = pd.NaT fill_val = dti[1] if method == 'pad' else dti[3] expected = DatetimeArray([dti[0], dti[1], fill_val, dti[3], dti[4]], freq=None, tz='US/Central') result = arr.fillna(method=method) tm.assert_extension_array_equal(result, expected) # assert that arr and dti were not modified in-place assert arr[2] is pd.NaT assert dti[2] == pd.Timestamp('2000-01-03', tz='US/Central')
def test_check_dtype(self): left = SparseArray(np.arange(5, dtype='int64')) right = SparseArray(np.arange(5, dtype='int32')) # passes with check_dtype=False assert_extension_array_equal(left, right, check_dtype=False) # raises with check_dtype=True msg = textwrap.dedent("""\ ExtensionArray are different Attribute "dtype" are different \\[left\\]: Sparse\\[int64, 0\\] \\[right\\]: Sparse\\[int32, 0\\]""") with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(left, right, check_dtype=True)
def test_assert_extension_array_equal_not_exact(kwargs): # see gh-23709 arr1 = SparseArray([-0.17387645482451206, 0.3414148016424936]) arr2 = SparseArray([-0.17387645482451206, 0.3414148016424937]) if kwargs.get("check_exact", False): msg = """\ ExtensionArray are different ExtensionArray values are different \\(50\\.0 %\\) \\[left\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\] \\[right\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\]""" with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(arr1, arr2, **kwargs) else: assert_extension_array_equal(arr1, arr2, **kwargs)
def test_assert_extension_array_equal_not_exact(kwargs): # see gh-23709 arr1 = SparseArray([-0.17387645482451206, 0.3414148016424936]) arr2 = SparseArray([-0.17387645482451206, 0.3414148016424937]) if kwargs.get("check_exact", False): msg = """\ ExtensionArray are different ExtensionArray values are different \\(50\\.0 %\\) \\[left\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\] \\[right\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\]""" with pytest.raises(AssertionError, match=msg): tm.assert_extension_array_equal(arr1, arr2, **kwargs) else: tm.assert_extension_array_equal(arr1, arr2, **kwargs)
def test_scalar(self, other, all_compare_operators): op = self.get_op_from_name(all_compare_operators) a = pd.array([True, False, None], dtype="boolean") result = op(a, other) if other is pd.NA: expected = pd.array([None, None, None], dtype="boolean") else: values = op(a._data, other) expected = BooleanArray(values, a._mask, copy=True) tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace result[0] = None tm.assert_extension_array_equal( a, pd.array([True, False, None], dtype="boolean"))
def test_assert_extension_array_equal_less_precise(check_less_precise): arr1 = SparseArray([0.5, 0.123456]) arr2 = SparseArray([0.5, 0.123457]) kwargs = dict(check_less_precise=check_less_precise) if check_less_precise is False or check_less_precise >= 5: msg = """\ ExtensionArray are different ExtensionArray values are different \\(50\\.0 %\\) \\[left\\]: \\[0\\.5, 0\\.123456\\] \\[right\\]: \\[0\\.5, 0\\.123457\\]""" with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(arr1, arr2, **kwargs) else: assert_extension_array_equal(arr1, arr2, **kwargs)
def test_assert_extension_array_equal_less_precise(check_less_precise): arr1 = SparseArray([0.5, 0.123456]) arr2 = SparseArray([0.5, 0.123457]) kwargs = dict(check_less_precise=check_less_precise) if check_less_precise is False or check_less_precise >= 5: msg = """\ ExtensionArray are different ExtensionArray values are different \\(50\\.0 %\\) \\[left\\]: \\[0\\.5, 0\\.123456\\] \\[right\\]: \\[0\\.5, 0\\.123457\\]""" with pytest.raises(AssertionError, match=msg): tm.assert_extension_array_equal(arr1, arr2, **kwargs) else: tm.assert_extension_array_equal(arr1, arr2, **kwargs)
def test_coerce_to_array_from_boolean_array(): # passing BooleanArray to coerce_to_array values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") arr = BooleanArray(values, mask) result = BooleanArray(*coerce_to_array(arr)) tm.assert_extension_array_equal(result, arr) # no copy assert result._data is arr._data assert result._mask is arr._mask result = BooleanArray(*coerce_to_array(arr), copy=True) tm.assert_extension_array_equal(result, arr) assert result._data is not arr._data assert result._mask is not arr._mask with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): coerce_to_array(arr, mask=mask)
def test_to_boolean_array_integer_like(): # integers of 0's and 1's result = pd.array([1, 0, 1, 0], dtype="boolean") expected = pd.array([True, False, True, False], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") tm.assert_extension_array_equal(result, expected) # with missing values result = pd.array([1, 0, 1, None], dtype="boolean") expected = pd.array([True, False, True, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") tm.assert_extension_array_equal(result, expected)
def test_integer_array_constructor(): values = np.array([1, 2, 3, 4], dtype='int64') mask = np.array([False, False, False, True], dtype='bool') result = IntegerArray(values, mask) expected = integer_array([1, 2, 3, np.nan], dtype='int64') tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError): IntegerArray(values.tolist(), mask) with pytest.raises(TypeError): IntegerArray(values, mask.tolist()) with pytest.raises(TypeError): IntegerArray(values.astype(float), mask) with pytest.raises(TypeError): IntegerArray(values)
def test_integer_array_constructor(): values = np.array([1, 2, 3, 4], dtype='int64') mask = np.array([False, False, False, True], dtype='bool') result = IntegerArray(values, mask) expected = integer_array([1, 2, 3, np.nan], dtype='int64') tm.assert_extension_array_equal(result, expected) with pytest.raises(TypeError): IntegerArray(values.tolist(), mask) with pytest.raises(TypeError): IntegerArray(values, mask.tolist()) with pytest.raises(TypeError): IntegerArray(values.astype(float), mask) with pytest.raises(TypeError): IntegerArray(values)
def test_assert_extension_array_equal_dtype_mismatch(check_dtype): end = 5 kwargs = dict(check_dtype=check_dtype) arr1 = SparseArray(np.arange(end, dtype="int64")) arr2 = SparseArray(np.arange(end, dtype="int32")) if check_dtype: msg = """\ ExtensionArray are different Attribute "dtype" are different \\[left\\]: Sparse\\[int64, 0\\] \\[right\\]: Sparse\\[int32, 0\\]""" with pytest.raises(AssertionError, match=msg): tm.assert_extension_array_equal(arr1, arr2, **kwargs) else: tm.assert_extension_array_equal(arr1, arr2, **kwargs)
def test_assert_extension_array_equal_dtype_mismatch(check_dtype): end = 5 kwargs = dict(check_dtype=check_dtype) arr1 = SparseArray(np.arange(end, dtype="int64")) arr2 = SparseArray(np.arange(end, dtype="int32")) if check_dtype: msg = """\ ExtensionArray are different Attribute "dtype" are different \\[left\\]: Sparse\\[int64, 0\\] \\[right\\]: Sparse\\[int32, 0\\]""" with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(arr1, arr2, **kwargs) else: assert_extension_array_equal(arr1, arr2, **kwargs)
def test_to_boolean_array(): expected = BooleanArray(np.array([True, False, True]), np.array([False, False, False])) result = pd.array([True, False, True], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([True, False, True]), dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") tm.assert_extension_array_equal(result, expected) # with missing values expected = BooleanArray(np.array([True, False, True]), np.array([False, False, True])) result = pd.array([True, False, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") tm.assert_extension_array_equal(result, expected)
def test_no_masked_assumptions(self, other, all_logical_operators): # The logical operations should not assume that masked values are False! a = pd.arrays.BooleanArray( np.array([True, True, True, False, False, False, True, False, True]), np.array([False] * 6 + [True, True, True]), ) b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") if isinstance(other, list): other = pd.array(other, dtype="boolean") result = getattr(a, all_logical_operators)(other) expected = getattr(b, all_logical_operators)(other) tm.assert_extension_array_equal(result, expected) if isinstance(other, BooleanArray): other._data[other._mask] = True a._data[a._mask] = False result = getattr(a, all_logical_operators)(other) expected = getattr(b, all_logical_operators)(other) tm.assert_extension_array_equal(result, expected)
def test_ufuncs_binary(ufunc): # two BooleanArrays a = pd.array([True, False, None], dtype="boolean") result = ufunc(a, a) expected = pd.array(ufunc(a._data, a._data), dtype="boolean") expected[a._mask] = np.nan tm.assert_extension_array_equal(result, expected) s = pd.Series(a) result = ufunc(s, a) expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") expected[a._mask] = np.nan tm.assert_series_equal(result, expected) # Boolean with numpy array arr = np.array([True, True, False]) result = ufunc(a, arr) expected = pd.array(ufunc(a._data, arr), dtype="boolean") expected[a._mask] = np.nan tm.assert_extension_array_equal(result, expected) result = ufunc(arr, a) expected = pd.array(ufunc(arr, a._data), dtype="boolean") expected[a._mask] = np.nan tm.assert_extension_array_equal(result, expected) # BooleanArray with scalar result = ufunc(a, True) expected = pd.array(ufunc(a._data, True), dtype="boolean") expected[a._mask] = np.nan tm.assert_extension_array_equal(result, expected) result = ufunc(True, a) expected = pd.array(ufunc(True, a._data), dtype="boolean") expected[a._mask] = np.nan tm.assert_extension_array_equal(result, expected) # not handled types with pytest.raises(TypeError): ufunc(a, "test")
def test_coerce_to_array(): # TODO this is currently not public API values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") result = BooleanArray(*coerce_to_array(values, mask=mask)) expected = BooleanArray(values, mask) tm.assert_extension_array_equal(result, expected) assert result._data is values assert result._mask is mask result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) expected = BooleanArray(values, mask) tm.assert_extension_array_equal(result, expected) assert result._data is not values assert result._mask is not mask # mixed missing from values and mask values = [True, False, None, False] mask = np.array([False, False, False, True], dtype="bool") result = BooleanArray(*coerce_to_array(values, mask=mask)) expected = BooleanArray(np.array([True, False, True, True]), np.array([False, False, True, True])) tm.assert_extension_array_equal(result, expected) result = BooleanArray( *coerce_to_array(np.array(values, dtype=object), mask=mask)) tm.assert_extension_array_equal(result, expected) result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) tm.assert_extension_array_equal(result, expected) # raise errors for wrong dimension values = np.array([True, False, True, False], dtype="bool") mask = np.array([False, False, False, True], dtype="bool") with pytest.raises(ValueError, match="values must be a 1D list-like"): coerce_to_array(values.reshape(1, -1)) with pytest.raises(ValueError, match="mask must be a 1D list-like"): coerce_to_array(values, mask=mask.reshape(1, -1))
def test_kleene_xor(self): a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") b = pd.array([True, False, None] * 3, dtype="boolean") result = a ^ b expected = pd.array( [False, True, None, True, False, None, None, None, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = b ^ a tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace tm.assert_extension_array_equal( a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")) tm.assert_extension_array_equal( b, pd.array([True, False, None] * 3, dtype="boolean"))
def test_kleene_and(self): # A clear test of behavior. a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") b = pd.array([True, False, None] * 3, dtype="boolean") result = a & b expected = pd.array( [True, False, None, False, False, False, None, False, None], dtype="boolean") tm.assert_extension_array_equal(result, expected) result = b & a tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace tm.assert_extension_array_equal( a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")) tm.assert_extension_array_equal( b, pd.array([True, False, None] * 3, dtype="boolean"))
def test_kleene_xor_scalar(self, other, expected): a = pd.array([True, False, None], dtype="boolean") result = a ^ other expected = pd.array(expected, dtype="boolean") tm.assert_extension_array_equal(result, expected) result = other ^ a tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace tm.assert_extension_array_equal( a, pd.array([True, False, None], dtype="boolean"))
def test_ufunc(): arr = PandasArray(np.array([-1.0, 0.0, 1.0])) result = np.abs(arr) expected = PandasArray(np.abs(arr._ndarray)) tm.assert_extension_array_equal(result, expected) r1, r2 = np.divmod(arr, np.add(arr, 2)) e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) e1 = PandasArray(e1) e2 = PandasArray(e2) tm.assert_extension_array_equal(r1, e1) tm.assert_extension_array_equal(r2, e2)
def test_ufunc(): arr = PandasArray(np.array([-1.0, 0.0, 1.0])) result = np.abs(arr) expected = PandasArray(np.abs(arr._ndarray)) tm.assert_extension_array_equal(result, expected) r1, r2 = np.divmod(arr, np.add(arr, 2)) e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) e1 = PandasArray(e1) e2 = PandasArray(e2) tm.assert_extension_array_equal(r1, e1) tm.assert_extension_array_equal(r2, e2)
def test_check_exact(self): # GH 23709 left = SparseArray([-0.17387645482451206, 0.3414148016424936]) right = SparseArray([-0.17387645482451206, 0.3414148016424937]) # passes with check_exact=False (should be default) assert_extension_array_equal(left, right) assert_extension_array_equal(left, right, check_exact=False) # raises with check_exact=True msg = textwrap.dedent("""\ ExtensionArray are different ExtensionArray values are different \\(50\\.0 %\\) \\[left\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\] \\[right\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\]""") with pytest.raises(AssertionError, match=msg): assert_extension_array_equal(left, right, check_exact=True)
def test_array(self, all_compare_operators): op = self.get_op_from_name(all_compare_operators) a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") b = pd.array([True, False, None] * 3, dtype="boolean") result = op(a, b) values = op(a._data, b._data) mask = a._mask | b._mask expected = BooleanArray(values, mask) tm.assert_extension_array_equal(result, expected) # ensure we haven't mutated anything inplace result[0] = None tm.assert_extension_array_equal( a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean")) tm.assert_extension_array_equal( b, pd.array([True, False, None] * 3, dtype="boolean"))
def test_to_integer_array(values, to_dtype, result_dtype): # convert existing arrays to IntegerArrays result = integer_array(values, dtype=to_dtype) assert result.dtype == result_dtype() expected = integer_array(values, dtype=result_dtype()) tm.assert_extension_array_equal(result, expected)
def test_repeat(self, left_right_dtypes, repeats): left, right = left_right_dtypes result = IntervalArray.from_arrays(left, right).repeat(repeats) expected = IntervalArray.from_arrays( left.repeat(repeats), right.repeat(repeats)) tm.assert_extension_array_equal(result, expected)
def test_integer_array_constructor_none_is_nan(a, b): result = integer_array(a) expected = integer_array(b) tm.assert_extension_array_equal(result, expected)
def test_check_less_precise_passes(self, check_less_precise): left = SparseArray([0.5, 0.123456]) right = SparseArray([0.5, 0.123457]) assert_extension_array_equal( left, right, check_less_precise=check_less_precise)
def test_to_integer_array(values, to_dtype, result_dtype): # convert existing arrays to IntegerArrays result = integer_array(values, dtype=to_dtype) assert result.dtype == result_dtype() expected = integer_array(values, dtype=result_dtype()) tm.assert_extension_array_equal(result, expected)
def test_numpy_array(arr): ser = pd.Series(arr) result = ser.array expected = PandasArray(arr) tm.assert_extension_array_equal(result, expected)
def test_value_counts_unique_nunique_null(self, null_obj): for orig in self.objs: o = orig.copy() klass = type(o) values = o._ndarray_values if not self._allow_na_ops(o): continue # special assign to the numpy array if is_datetime64tz_dtype(o): if isinstance(o, DatetimeIndex): v = o.asi8 v[0:2] = iNaT values = o._shallow_copy(v) else: o = o.copy() o[0:2] = iNaT values = o._values elif needs_i8_conversion(o): values[0:2] = iNaT values = o._shallow_copy(values) else: values[0:2] = null_obj # check values has the same dtype as the original assert values.dtype == o.dtype # create repeated values, 'n'th element is repeated by n+1 # times if isinstance(o, (DatetimeIndex, PeriodIndex)): expected_index = o.copy() expected_index.name = None # attach name to klass o = klass(values.repeat(range(1, len(o) + 1))) o.name = 'a' else: if isinstance(o, DatetimeIndex): expected_index = orig._values._shallow_copy(values) else: expected_index = Index(values) expected_index.name = None o = o.repeat(range(1, len(o) + 1)) o.name = 'a' # check values has the same dtype as the original assert o.dtype == orig.dtype # check values correctly have NaN nanloc = np.zeros(len(o), dtype=np.bool) nanloc[:3] = True if isinstance(o, Index): tm.assert_numpy_array_equal(pd.isna(o), nanloc) else: exp = Series(nanloc, o.index, name='a') tm.assert_series_equal(pd.isna(o), exp) expected_s_na = Series(list(range(10, 2, -1)) + [3], index=expected_index[9:0:-1], dtype='int64', name='a') expected_s = Series(list(range(10, 2, -1)), index=expected_index[9:1:-1], dtype='int64', name='a') result_s_na = o.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) assert result_s_na.index.name is None assert result_s_na.name == 'a' result_s = o.value_counts() tm.assert_series_equal(o.value_counts(), expected_s) assert result_s.index.name is None assert result_s.name == 'a' result = o.unique() if isinstance(o, Index): tm.assert_index_equal(result, Index(values[1:], name='a')) elif is_datetime64tz_dtype(o): # unable to compare NaT / nan tm.assert_extension_array_equal(result[1:], values[2:]) assert result[0] is pd.NaT else: tm.assert_numpy_array_equal(result[1:], values[2:]) assert pd.isna(result[0]) assert result.dtype == orig.dtype assert o.nunique() == 8 assert o.nunique(dropna=False) == 9
def test_from_sequence_dtype(): arr = np.array([1, 2, 3], dtype='int64') result = PandasArray._from_sequence(arr, dtype='uint64') expected = PandasArray(np.array([1, 2, 3], dtype='uint64')) tm.assert_extension_array_equal(result, expected)
def test_array_inference_fails(data): result = pd.array(data) expected = PandasArray(np.array(data, dtype=object)) tm.assert_extension_array_equal(result, expected)
def test_set_closed(self, closed, new_closed): # GH 21670 array = IntervalArray.from_breaks(range(10), closed=closed) result = array.set_closed(new_closed) expected = IntervalArray.from_breaks(range(10), closed=new_closed) tm.assert_extension_array_equal(result, expected)