def test_set_inclusive(self, closed, new_inclusive): # GH 21670 array = IntervalArray.from_breaks(range(10), inclusive=closed) result = array.set_inclusive(new_inclusive) expected = IntervalArray.from_breaks(range(10), inclusive=new_inclusive) tm.assert_extension_array_equal(result, expected)
def test_arrow_table_roundtrip(breaks): import pyarrow as pa from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType arr = IntervalArray.from_breaks(breaks) arr[1] = None df = pd.DataFrame({"a": arr}) table = pa.table(df) assert isinstance(table.field("a").type, ArrowIntervalType) result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) table2 = pa.concat_tables([table, table]) result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) # GH-41040 table = pa.table([pa.chunked_array([], type=table.column(0).type)], schema=table.schema) result = table.to_pandas() tm.assert_frame_equal(result, expected[0:0])
def test_shift(self): # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502 a = IntervalArray.from_breaks([1, 2, 3]) result = a.shift() # int -> float expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)]) tm.assert_interval_array_equal(result, expected)
def test_arrow_array_missing(): import pyarrow as pa from pandas.core.arrays._arrow_utils import ArrowIntervalType arr = IntervalArray.from_breaks([0.0, 1.0, 2.0, 3.0]) arr[1] = None result = pa.array(arr) assert isinstance(result.type, ArrowIntervalType) assert result.type.closed == arr.closed assert result.type.subtype == pa.float64() # fields have missing values (not NaN) left = pa.array([0.0, None, 2.0], type="float64") right = pa.array([1.0, None, 3.0], type="float64") assert result.storage.field("left").equals(left) assert result.storage.field("right").equals(right) # structarray itself also has missing values on the array level vals = [ {"left": 0.0, "right": 1.0}, {"left": None, "right": None}, {"left": 2.0, "right": 3.0}, ] expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False])) assert result.storage.equals(expected)
def test_index_series_compat(self, op, constructor, expected_type, assert_func): # IntervalIndex/Series that rely on IntervalArray for comparisons breaks = range(4) index = constructor(IntervalIndex.from_breaks(breaks)) # scalar comparisons other = index[0] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) other = breaks[0] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) # list-like comparisons other = IntervalArray.from_breaks(breaks) result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) other = [index[0], breaks[0], "foo"] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected)
def test_shift_datetime(self): a = IntervalArray.from_breaks(pd.date_range("2000", periods=4)) result = a.shift(2) expected = a.take([-1, -1, 0], allow_fill=True) tm.assert_interval_array_equal(result, expected) result = a.shift(-1) expected = a.take([1, 2, -1], allow_fill=True) tm.assert_interval_array_equal(result, expected)
def test_get_numeric_data_extension_dtype(self): # GH 22290 df = DataFrame({ 'A': integer_array([-10, np.nan, 0, 10, 20, 30], dtype='Int64'), 'B': Categorical(list('abcabc')), 'C': integer_array([0, 1, 2, 3, np.nan, 5], dtype='UInt8'), 'D': IntervalArray.from_breaks(range(7))}) result = df._get_numeric_data() expected = df.loc[:, ['A', 'C']] assert_frame_equal(result, expected)
def test_get_numeric_data_extension_dtype(self): # GH 22290 df = DataFrame({ 'A': integer_array([-10, np.nan, 0, 10, 20, 30], dtype='Int64'), 'B': Categorical(list('abcabc')), 'C': integer_array([0, 1, 2, 3, np.nan, 5], dtype='UInt8'), 'D': IntervalArray.from_breaks(range(7))}) result = df._get_numeric_data() expected = df.loc[:, ['A', 'C']] assert_frame_equal(result, expected)
def test_where_raises(self, other): # GH#45768 The IntervalArray methods raises; the Series method coerces ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) mask = np.array([True, False, True]) match = "'value.closed' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.array._where(mask, other) res = ser.where(mask, other=other) expected = ser.astype(object).where(mask, other) tm.assert_series_equal(res, expected)
def test_get_numeric_data_extension_dtype(self): # GH 22290 df = DataFrame( { "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), "B": Categorical(list("abcabc")), "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), "D": IntervalArray.from_breaks(range(7)), } ) result = df._get_numeric_data() expected = df.loc[:, ["A", "C"]] assert_frame_equal(result, expected)
class TestMethods: @pytest.mark.parametrize("new_inclusive", ["left", "right", "both", "neither"]) def test_set_inclusive(self, closed, new_inclusive): # GH 21670 array = IntervalArray.from_breaks(range(10), inclusive=closed) result = array.set_inclusive(new_inclusive) expected = IntervalArray.from_breaks(range(10), inclusive=new_inclusive) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize( "other", [ Interval(0, 1, inclusive="right"), IntervalArray.from_breaks([1, 2, 3, 4], inclusive="right"), ], ) def test_where_raises(self, other): # GH#45768 The IntervalArray methods raises; the Series method coerces ser = pd.Series( IntervalArray.from_breaks([1, 2, 3, 4], inclusive="left")) mask = np.array([True, False, True]) match = "'value.inclusive' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.array._where(mask, other) res = ser.where(mask, other=other) expected = ser.astype(object).where(mask, other) tm.assert_series_equal(res, expected) def test_shift(self): # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502 a = IntervalArray.from_breaks([1, 2, 3], "right") result = a.shift() # int -> float expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)], "right") tm.assert_interval_array_equal(result, expected) def test_shift_datetime(self): # GH#31502, GH#31504 a = IntervalArray.from_breaks(date_range("2000", periods=4), "right") result = a.shift(2) expected = a.take([-1, -1, 0], allow_fill=True) tm.assert_interval_array_equal(result, expected) result = a.shift(-1) expected = a.take([1, 2, -1], allow_fill=True) tm.assert_interval_array_equal(result, expected)
def test_arrow_table_roundtrip_without_metadata(breaks): import pyarrow as pa arr = IntervalArray.from_breaks(breaks) arr[1] = None df = pd.DataFrame({"a": arr}) table = pa.table(df) # remove the metadata table = table.replace_schema_metadata() assert table.schema.metadata is None result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df)
def test_setitem_mismatched_inclusive(self): arr = IntervalArray.from_breaks(range(4), "right") orig = arr.copy() other = arr.set_inclusive("both") msg = "'value.inclusive' is 'both', expected 'right'" with pytest.raises(ValueError, match=msg): arr[0] = other[0] with pytest.raises(ValueError, match=msg): arr[:1] = other[:1] with pytest.raises(ValueError, match=msg): arr[:0] = other[:0] with pytest.raises(ValueError, match=msg): arr[:] = other[::-1] with pytest.raises(ValueError, match=msg): arr[:] = list(other[::-1]) with pytest.raises(ValueError, match=msg): arr[:] = other[::-1].astype(object) with pytest.raises(ValueError, match=msg): arr[:] = other[::-1].astype("category") # empty list should be no-op arr[:0] = IntervalArray.from_breaks([], "right") tm.assert_interval_array_equal(arr, orig)
def test_from_arrow_from_raw_struct_array(): # in case pyarrow lost the Interval extension type (eg on parquet roundtrip # with datetime64[ns] subtype, see GH-45881), still allow conversion # from arrow to IntervalArray import pyarrow as pa arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}]) dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither") result = dtype.__from_arrow__(arr) expected = IntervalArray.from_breaks(np.array([0, 1, 2], dtype="int64"), closed="neither") tm.assert_extension_array_equal(result, expected) result = dtype.__from_arrow__(pa.chunked_array([arr])) tm.assert_extension_array_equal(result, expected)
class TestMethods: @pytest.mark.parametrize('new_closed', ['left', 'right', 'both', 'neither']) def test_set_closed(self, closed, new_closed): # GH 21670 array = IntervalArray.from_breaks(range(10), closed=closed) result = array.set_closed(new_closed) expected = IntervalArray.from_breaks(range(10), closed=new_closed) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize('other', [ Interval(0, 1, closed='right'), IntervalArray.from_breaks([1, 2, 3, 4], closed='right'), ]) def test_where_raises(self, other): ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed='left')) match = "'value.closed' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.where([True, False, True], other=other)
class TestMethods: @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, closed, new_closed): # GH 21670 array = IntervalArray.from_breaks(range(10), closed=closed) result = array.set_closed(new_closed) expected = IntervalArray.from_breaks(range(10), closed=new_closed) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize( "other", [ Interval(0, 1, closed="right"), IntervalArray.from_breaks([1, 2, 3, 4], closed="right"), ], ) def test_where_raises(self, other): ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) match = "'value.closed' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.where([True, False, True], other=other) def test_shift(self): # https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502 a = IntervalArray.from_breaks([1, 2, 3]) result = a.shift() # int -> float expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)]) tm.assert_interval_array_equal(result, expected) def test_shift_datetime(self): # GH#31502, GH#31504 a = IntervalArray.from_breaks(date_range("2000", periods=4)) result = a.shift(2) expected = a.take([-1, -1, 0], allow_fill=True) tm.assert_interval_array_equal(result, expected) result = a.shift(-1) expected = a.take([1, 2, -1], allow_fill=True) tm.assert_interval_array_equal(result, expected)
class TestMethods(object): @pytest.mark.parametrize('repeats', [0, 1, 5]) def test_repeat(self, left_right_dtypes, repeats): left, right = left_right_dtypes result = IntervalArray.from_arrays(left, right).repeat(repeats) expected = IntervalArray.from_arrays(left.repeat(repeats), right.repeat(repeats)) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize( 'bad_repeats, msg', [(-1, 'negative dimensions are not allowed'), ('foo', r'invalid literal for (int|long)\(\) with base 10')]) def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) with pytest.raises(ValueError, match=msg): array.repeat(bad_repeats) @pytest.mark.parametrize('new_closed', ['left', 'right', 'both', 'neither']) def test_set_closed(self, closed, new_closed): # GH 21670 array = IntervalArray.from_breaks(range(10), closed=closed) result = array.set_closed(new_closed) expected = IntervalArray.from_breaks(range(10), closed=new_closed) tm.assert_extension_array_equal(result, expected) @pytest.mark.parametrize('other', [ Interval(0, 1, closed='right'), IntervalArray.from_breaks([1, 2, 3, 4], closed='right'), ]) def test_where_raises(self, other): ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed='left')) match = "'value.closed' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.where([True, False, True], other=other)
result = ser.array if is_datetime64_dtype(any_numpy_dtype): assert isinstance(result, DatetimeArray) elif is_timedelta64_dtype(any_numpy_dtype): assert isinstance(result, TimedeltaArray) else: assert isinstance(result, PandasArray) @pytest.mark.parametrize( "array, attr", [ (pd.Categorical(["a", "b"]), "_codes"), (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), (pd.core.arrays.integer_array([0, np.nan]), "_data"), (IntervalArray.from_breaks([0, 1]), "_combined"), (SparseArray([0, 1]), "_sparse_values"), (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), # tz-aware Datetime ( DatetimeArray( np.array(["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]"), dtype=DatetimeTZDtype(tz="US/Central"), ), "_data", ), ], ) def test_array(array, attr, index_or_series): box = index_or_series
def test_set_closed_deprecated(): # GH#40245 array = IntervalArray.from_breaks(range(10)) with tm.assert_produces_warning(FutureWarning): array.set_closed(closed="both")
def test_set_closed(self, closed, new_closed): # GH 21670 array = IntervalArray.from_breaks(range(10), closed=closed) result = array.set_closed(new_closed) expected = IntervalArray.from_breaks(range(10), closed=new_closed) tm.assert_extension_array_equal(result, expected)
def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) with tm.assert_raises_regex(ValueError, msg): array.repeat(bad_repeats)
def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) with pytest.raises(ValueError, match=msg): array.repeat(bad_repeats)
def test_from_breaks_deprecation(): # GH#40245 with tm.assert_produces_warning(FutureWarning): IntervalArray.from_breaks([0, 1, 2, 3], closed="right")
def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) with tm.assert_raises_regex(ValueError, msg): array.repeat(bad_repeats)
def test_set_closed(self, closed, new_closed): # GH 21670 array = IntervalArray.from_breaks(range(10), closed=closed) result = array.set_closed(new_closed) expected = IntervalArray.from_breaks(range(10), closed=new_closed) tm.assert_extension_array_equal(result, expected)
result = ser.array if is_datetime64_dtype(any_numpy_dtype): assert isinstance(result, DatetimeArray) elif is_timedelta64_dtype(any_numpy_dtype): assert isinstance(result, TimedeltaArray) else: assert isinstance(result, PandasArray) @pytest.mark.parametrize( "array, attr", [ (pd.Categorical(["a", "b"]), "_codes"), (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), (pd.core.arrays.integer_array([0, np.nan]), "_data"), (IntervalArray.from_breaks([0, 1]), "_left"), (SparseArray([0, 1]), "_sparse_values"), (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), # tz-aware Datetime ( DatetimeArray( np.array(["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]"), dtype=DatetimeTZDtype(tz="US/Central"), ), "_data", ), ], ) def test_array(array, attr, index_or_series): box = index_or_series
def test_where_raises(self, other): ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed='left')) match = "'value.closed' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.where([True, False, True], other=other)
def test_where_raises(self, other): ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) match = "'value.closed' is 'right', expected 'left'." with pytest.raises(ValueError, match=match): ser.where([True, False, True], other=other)
def test_repeat_errors(self, bad_repeats, msg): array = IntervalArray.from_breaks(range(4)) with pytest.raises(ValueError, match=msg): array.repeat(bad_repeats)