def test_overlaps_nested(self, start_shift, closed, other_closed): start, shift = start_shift interval1 = Interval(start, start + 3 * shift, other_closed) interval2 = Interval(start + shift, start + 2 * shift, closed) # nested intervals should always overlap assert interval1.overlaps(interval2)
def test_overlaps_disjoint(self, start_shift, closed, other_closed): start, shift = start_shift interval1 = Interval(start, start + shift, other_closed) interval2 = Interval(start + 2 * shift, start + 3 * shift, closed) # disjoint intervals should never overlap assert not interval1.overlaps(interval2)
def test_overlaps_endpoint(self, start_shift, closed, other_closed): start, shift = start_shift interval1 = Interval(start, start + shift, other_closed) interval2 = Interval(start + shift, start + 2 * shift, closed) # overlap if shared endpoint is closed for both (overlap at a point) result = interval1.overlaps(interval2) expected = interval1.closed_right and interval2.closed_left assert result == expected
def test_construct_errors(self, left, right): # GH 23013 msg = "Only numeric, Timestamp and Timedelta endpoints are allowed" with pytest.raises(ValueError, match=msg): Interval(left, right)
def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) msg = "|".join([ "not supported between instances of 'int' and '.*.Interval'", r"Invalid comparison between dtype=interval\[int64, right\] and ", ]) with pytest.raises(TypeError, match=msg): self.index > 0 with pytest.raises(TypeError, match=msg): self.index <= 0 with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3)
def test_deprecated(self): ivs = [Interval(0, 1), Interval(1, 2)] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): IntervalIndex.from_intervals(ivs)
def test_overlaps_invalid_type(self, other): interval = Interval(0, 1) msg = '`other` must be an Interval, got {other}'.format( other=type(other).__name__) with pytest.raises(TypeError, match=msg): interval.overlaps(other)
class TestIntervalIndex(Base): _holder = IntervalIndex def setup_method(self, method): self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan, (1, 2)]) self.indices = dict(intervalIndex=tm.makeIntervalIndex(10)) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.repeat(False, len(index)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.repeat(True, len(index)) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(index._ndarray_values, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_equals(self, closed): expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) assert expected.equals(expected) assert expected.equals(expected.copy()) assert not expected.equals(expected.astype(object)) assert not expected.equals(np.array(expected)) assert not expected.equals(list(expected)) assert not expected.equals([1, 2]) assert not expected.equals(np.array([1, 2])) assert not expected.equals(pd.date_range("20130101", periods=2)) expected_name1 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name="foo") expected_name2 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name="bar") assert expected.equals(expected_name1) assert expected_name1.equals(expected_name2) for other_closed in {"left", "right", "both", "neither"} - {closed}: expected_other_closed = IntervalIndex.from_breaks( np.arange(5), closed=other_closed) assert not expected.equals(expected_other_closed) @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) def test_where(self, closed, klass): idx = self.create_index(closed=closed) cond = [True] * len(idx) expected = idx result = expected.where(klass(cond)) tm.assert_index_equal(result, expected) cond = [False] + [True] * len(idx[1:]) expected = IntervalIndex([np.nan] + idx[1:].tolist()) result = idx.where(klass(cond)) tm.assert_index_equal(result, expected) def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = "can only insert Interval objects and NA into an IntervalIndex" with pytest.raises(ValueError, match=msg): data.insert(1, "foo") # invalid closed msg = "inserted item must be closed on the same side as the index" for closed in {"left", "right", "both", "neither"} - {item.closed}: with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_take(self, closed): index = self.create_index(closed=closed) result = index.take(range(10)) tm.assert_index_equal(result, index) result = index.take([0, 0, 1]) expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True @pytest.mark.skip(reason="not a valid repr as we use interval notation") def test_repr(self): i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed="right") expected = ("IntervalIndex(left=[0, 1]," "\n right=[1, 2]," "\n closed='right'," "\n dtype='interval[int64]')") assert repr(i) == expected i = IntervalIndex.from_tuples( (Timestamp("20130101"), Timestamp("20130102")), (Timestamp("20130102"), Timestamp("20130103")), closed="right", ) expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02']," "\n right=['2013-01-02', '2013-01-03']," "\n closed='right'," "\n dtype='interval[datetime64[ns]]')") assert repr(i) == expected @pytest.mark.skip(reason="not a valid repr as we use interval notation") def test_repr_max_seq_item_setting(self): super().test_repr_max_seq_item_setting() @pytest.mark.skip(reason="not a valid repr as we use interval notation") def test_repr_roundtrip(self): super().test_repr_roundtrip() def test_frame_repr(self): # https://github.com/pandas-dev/pandas/pull/24134/files df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])) result = repr(df) expected = " A\n(0, 1] 1\n(1, 2] 2\n(2, 3] 3\n(3, 4] 4" assert result == expected @pytest.mark.parametrize( "constructor,expected", [ ( pd.Series, ("(0.0, 1.0] a\n" "NaN b\n" "(2.0, 3.0] c\n" "dtype: object"), ), ( pd.DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c"), ), ], ) def test_repr_missing(self, constructor, expected): # GH 25984 index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) obj = constructor(list("abc"), index=index) result = repr(obj) assert result == expected @pytest.mark.parametrize( "tuples, closed, expected_data", [ ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]), ( [(0.5, 1.0), np.nan, (2.0, 3.0)], "right", ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"], ), ( [ (Timestamp("20180101"), Timestamp("20180102")), np.nan, ((Timestamp("20180102"), Timestamp("20180103"))), ], "both", [ "[2018-01-01, 2018-01-02]", "NaN", "[2018-01-02, 2018-01-03]" ], ), ( [ (Timedelta("0 days"), Timedelta("1 days")), (Timedelta("1 days"), Timedelta("2 days")), np.nan, ], "neither", [ "(0 days 00:00:00, 1 days 00:00:00)", "(1 days 00:00:00, 2 days 00:00:00)", "NaN", ], ), ], ) def test_to_native_types(self, tuples, closed, expected_data): # GH 28210 index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.to_native_types() expected = np.array(expected_data) tm.assert_numpy_array_equal(result, expected) def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6]) def test_get_loc_length_one_scalar(self, scalar, closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) if scalar in index[0]: result = index.get_loc(scalar) assert result == 0 else: with pytest.raises(KeyError, match=str(scalar)): index.get_loc(scalar) @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"]) @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)]) def test_get_loc_length_one_interval(self, left, right, closed, other_closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) interval = Interval(left, right, closed=other_closed) if interval == index[0]: result = index.get_loc(interval) assert result == 0 else: with pytest.raises( KeyError, match=re.escape( "Interval({left}, {right}, closed='{other_closed}')". format(left=left, right=right, other_closed=other_closed)), ): index.get_loc(interval) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_get_loc_datetimelike_nonoverlapping(self, breaks): # GH 20636 # nonoverlapping = IntervalIndex method and no i8 conversion index = IntervalIndex.from_breaks(breaks) value = index[0].mid result = index.get_loc(value) expected = 0 assert result == expected interval = Interval(index[0].left, index[0].right) result = index.get_loc(interval) expected = 0 assert result == expected @pytest.mark.parametrize( "arrays", [ (date_range("20180101", periods=4), date_range("20180103", periods=4)), ( date_range("20180101", periods=4, tz="US/Eastern"), date_range("20180103", periods=4, tz="US/Eastern"), ), ( timedelta_range("0 days", periods=4), timedelta_range("2 days", periods=4), ), ], ids=lambda x: str(x[0].dtype), ) def test_get_loc_datetimelike_overlapping(self, arrays): # GH 20636 index = IntervalIndex.from_arrays(*arrays) value = index[0].mid + Timedelta("12 hours") result = index.get_loc(value) expected = slice(0, 2, None) assert result == expected interval = Interval(index[0].left, index[0].right) result = index.get_loc(interval) expected = 0 assert result == expected @pytest.mark.parametrize( "values", [ date_range("2018-01-04", periods=4, freq="-1D"), date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"), timedelta_range("3 days", periods=4, freq="-1D"), np.arange(3.0, -1.0, -1.0), np.arange(3, -1, -1), ], ids=lambda x: str(x.dtype), ) def test_get_loc_decreasing(self, values): # GH 25860 index = IntervalIndex.from_arrays(values[1:], values[:-1]) result = index.get_loc(index[0]) expected = 0 assert result == expected @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)]) def test_get_indexer_length_one(self, item, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer(item) expected = np.array([0] * len(item), dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("size", [1, 5]) def test_get_indexer_length_one_interval(self, size, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer([Interval(0, 5, closed)] * size) expected = np.array([0] * size, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ("Cannot index an IntervalIndex of subtype {dtype1} with " "values of dtype {dtype2}") msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype)) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) with pytest.raises(TypeError, match="unorderable types"): self.index > 0 with pytest.raises(TypeError, match="unorderable types"): self.index <= 0 msg = r"unorderable types: Interval\(\) > int\(\)" with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left" " and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = ("can only append two IntervalIndex objects that are closed " "on the same side") for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected def test_itemsize(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") expected = 16 # 8 * 2 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = IntervalIndex.from_arrays(left, right).itemsize assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00")) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates
class TestClassConstructors(ConstructorTests): """Tests specific to the IntervalIndex/Index constructors""" @pytest.fixture( params=[IntervalIndex, partial(Index, dtype="interval")], ids=["IntervalIndex", "Index"], ) def constructor(self, request): return request.param def get_kwargs_from_breaks(self, breaks, closed="right"): """ converts intervals in breaks format to a dictionary of kwargs to specific to the format expected by the IntervalIndex/Index constructors """ if len(breaks) == 0: return {"data": breaks} ivs = [ Interval(left, right, closed) if notna(left) else left for left, right in zip(breaks[:-1], breaks[1:]) ] if isinstance(breaks, list): return {"data": ivs} elif is_categorical_dtype(breaks): return {"data": breaks._constructor(ivs)} return {"data": np.array(ivs, dtype=object)} def test_generic_errors(self, constructor): """ override the base class implementation since errors are handled differently; checks unnecessary since caught at the Interval level """ pass def test_constructor_string(self): # GH23013 # When forming the interval from breaks, # the interval of strings is already forbidden. pass def test_constructor_errors(self, constructor): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")] msg = "intervals must all be closed on the same side" with pytest.raises(ValueError, match=msg): constructor(ivs) # scalar msg = ( r"IntervalIndex\(...\) must be called with a collection of " "some kind, 5 was passed" ) with pytest.raises(TypeError, match=msg): constructor(5) # not an interval; dtype depends on 32bit/windows builds msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval" with pytest.raises(TypeError, match=msg): constructor([0, 1]) @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") @pytest.mark.parametrize( "data, closed", [ ([], "both"), ([np.nan, np.nan], "neither"), ( [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")], "left", ), ( [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")], "neither", ), (IntervalIndex.from_breaks(range(5), closed="both"), "right"), ], ) def test_override_inferred_closed(self, constructor, data, closed): # GH 19370 if isinstance(data, IntervalIndex): tuples = data.to_tuples() else: tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] expected = IntervalIndex.from_tuples(tuples, closed=closed) result = constructor(data, closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "values_constructor", [list, np.array, IntervalIndex, IntervalArray] ) def test_index_object_dtype(self, values_constructor): # Index(intervals, dtype=object) is an Index (not an IntervalIndex) intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] values = values_constructor(intervals) result = Index(values, dtype=object) assert type(result) is Index tm.assert_numpy_array_equal(result.values, np.array(values)) def test_index_mixed_closed(self): # GH27172 intervals = [ Interval(0, 1, closed="left"), Interval(1, 2, closed="right"), Interval(2, 3, closed="neither"), Interval(3, 4, closed="both"), ] result = Index(intervals) expected = Index(intervals, dtype=object) tm.assert_index_equal(result, expected)
def test_single_quantile(self): # issue 15431 expected = Series([0, 0]) s = Series([9., 9.]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex( [Interval(8.999, 9.0), Interval(8.999, 9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9., -9.]) expected = Series([0, 0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex( [Interval(-9.001, -9.0), Interval(-9.001, -9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0., 0.]) expected = Series([0, 0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex( [Interval(-0.001, 0.0), Interval(-0.001, 0.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([9]) expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([-9]) expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected) s = Series([0]) expected = Series([0]) result = qcut(s, 1, labels=False) tm.assert_series_equal(result, expected) result = qcut(s, 1) intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right') expected = Series(intervals).astype(CDT(ordered=True)) tm.assert_series_equal(result, expected)
def test_is_scalar_pandas_scalars(self): assert is_scalar(Timestamp('2014-01-01')) assert is_scalar(Timedelta(hours=1)) assert is_scalar(Period('2014-01-01')) assert is_scalar(Interval(left=0, right=1)) assert is_scalar(DateOffset(days=1))
class TestIntervalIndex: index = IntervalIndex.from_arrays([0, 1], [1, 2]) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [ Interval(left, right, closed) for left, right in zip(range(10), range(1, 11)) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(left, right, closed) if notna(left) else np.nan for left, right in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.zeros(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.ones(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(np.array(index), copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type res = data.insert(1, "foo") expected = data.astype(object).insert(1, "foo") tm.assert_index_equal(res, expected) msg = "can only insert Interval objects and NA into an IntervalArray" with pytest.raises(TypeError, match=msg): data._data.insert(1, "foo") # invalid closed msg = "'value.closed' is 'left', expected 'right'." for closed in {"left", "right", "both", "neither"} - {item.closed}: msg = f"'value.closed' is '{closed}', expected '{item.closed}'." bad_item = Interval(item.left, item.right, closed=closed) res = data.insert(1, bad_item) expected = data.astype(object).insert(1, bad_item) tm.assert_index_equal(res, expected) with pytest.raises(ValueError, match=msg): data._data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in [np.nan, None, pd.NA]: expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) if data.left.dtype.kind not in ["m", "M"]: # trying to insert pd.NaT into a numeric-dtyped Index should cast expected = data.astype(object).insert(1, pd.NaT) msg = "can only insert Interval objects and NA into an IntervalArray" with pytest.raises(TypeError, match=msg): data._data.insert(1, pd.NaT) result = data.insert(1, pd.NaT) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True # unique NaN idx = IntervalIndex.from_tuples([(np.NaN, np.NaN)], closed=closed) assert idx.is_unique is True # non-unique NaN idx = IntervalIndex.from_tuples([(np.NaN, np.NaN), (np.NaN, np.NaN)], closed=closed) assert idx.is_unique is False def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True def test_is_monotonic_with_nans(self): # GH#41831 index = IntervalIndex([np.nan, np.nan]) assert not index.is_monotonic assert not index._is_strictly_monotonic_increasing assert not index.is_monotonic_increasing assert not index._is_strictly_monotonic_decreasing assert not index.is_monotonic_decreasing def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ( f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with " f"values of dtype {breaks2.dtype}") msg = re.escape(msg) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_contains_dunder(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed="right") in index assert Interval(0, 2, closed="right") not in index assert Interval(0, 0.5, closed="right") not in index assert Interval(3, 5, closed="right") not in index assert Interval(-1, 0, closed="left") not in index assert Interval(0, 1, closed="left") not in index assert Interval(0, 1, closed="both") not in index def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) msg = "|".join([ "not supported between instances of 'int' and '.*.Interval'", r"Invalid comparison between dtype=interval\[int64, right\] and ", ]) with pytest.raises(TypeError, match=msg): self.index > 0 with pytest.raises(TypeError, match=msg): self.index <= 0 with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left " "and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False, na_position="first") expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) result = index1.append(index_other_closed) expected = index1.astype(object).append( index_other_closed.astype(object)) tm.assert_index_equal(result, expected) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = f"invalid option for 'closed': {bad_closed}" with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = Interval(Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")) year_2017_index = IntervalIndex([year_2017]) assert not year_2017_index._is_all_dates @pytest.mark.parametrize("key", [[5], (2, 3)]) def test_get_value_non_scalar_errors(self, key): # GH 31117 idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) s = pd.Series(range(len(idx)), index=idx) msg = str(key) with pytest.raises(InvalidIndexError, match=msg): with tm.assert_produces_warning(FutureWarning): idx.get_value(s, key) @pytest.mark.parametrize("closed", ["left", "right", "both"]) def test_pickle_round_trip_closed(self, closed): # https://github.com/pandas-dev/pandas/issues/35658 idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx)
class TestInterval: def test_properties(self, interval): assert interval.closed == "right" assert interval.left == 0 assert interval.right == 1 assert interval.mid == 0.5 def test_repr(self, interval): assert repr(interval) == "Interval(0, 1, closed='right')" assert str(interval) == "(0, 1]" interval_left = Interval(0, 1, closed="left") assert repr(interval_left) == "Interval(0, 1, closed='left')" assert str(interval_left) == "[0, 1)" def test_contains(self, interval): assert 0.5 in interval assert 1 in interval assert 0 not in interval msg = "__contains__ not defined for two intervals" with pytest.raises(TypeError, match=msg): interval in interval interval_both = Interval(0, 1, closed="both") assert 0 in interval_both assert 1 in interval_both interval_neither = Interval(0, 1, closed="neither") assert 0 not in interval_neither assert 0.5 in interval_neither assert 1 not in interval_neither def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed="right") assert Interval(0, 1) != Interval(0, 1, closed="left") assert Interval(0, 1) != 0 def test_comparison(self): msg = ("'<' not supported between instances of " "'pandas._libs.interval.Interval' and 'int'") with pytest.raises(TypeError, match=msg): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) assert Interval(0, 1) < Interval(0, 2) assert Interval(0, 1) < Interval(0.5, 1.5) assert Interval(0, 1) <= Interval(0, 1) assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1) def test_hash(self, interval): # should not raise hash(interval) @pytest.mark.parametrize( "left, right, expected", [ (0, 5, 5), (-2, 5.5, 7.5), (10, 10, 0), (10, np.inf, np.inf), (-np.inf, -5, np.inf), (-np.inf, np.inf, np.inf), (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")), (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")), (Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")), (Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")), ], ) def test_length(self, left, right, expected): # GH 18789 iv = Interval(left, right) result = iv.length assert result == expected @pytest.mark.parametrize( "left, right, expected", [ ("2017-01-01", "2017-01-06", "5 days"), ("2017-01-01", "2017-01-01 12:00:00", "12 hours"), ("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"), ("2017-01-01 12:01", "2017-01-05 17:31:00", "4 days 5 hours 30 min"), ], ) @pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern")) def test_length_timestamp(self, tz, left, right, expected): # GH 18789 iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) result = iv.length expected = Timedelta(expected) assert result == expected @pytest.mark.parametrize( "left, right", [ (0, 1), (Timedelta("0 days"), Timedelta("1 day")), (Timestamp("2018-01-01"), Timestamp("2018-01-02")), ( Timestamp("2018-01-01", tz="US/Eastern"), Timestamp("2018-01-02", tz="US/Eastern"), ), ], ) def test_is_empty(self, left, right, closed): # GH27219 # non-empty always return False iv = Interval(left, right, closed) assert iv.is_empty is False # same endpoint is empty except when closed='both' (contains one point) iv = Interval(left, left, closed) result = iv.is_empty expected = closed != "both" assert result is expected @pytest.mark.parametrize( "left, right", [ ("a", "z"), (("a", "b"), ("c", "d")), (list("AB"), list("ab")), (Interval(0, 1), Interval(1, 2)), (Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")), ], ) def test_construct_errors(self, left, right): # GH 23013 msg = "Only numeric, Timestamp and Timedelta endpoints are allowed" with pytest.raises(ValueError, match=msg): Interval(left, right) def test_math_add(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(1, 2, closed=closed) result = interval + 1 assert result == expected result = 1 + interval assert result == expected result = interval result += 1 assert result == expected msg = r"unsupported operand type\(s\) for \+" with pytest.raises(TypeError, match=msg): interval + interval with pytest.raises(TypeError, match=msg): interval + "foo" def test_math_sub(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(-1, 0, closed=closed) result = interval - 1 assert result == expected result = interval result -= 1 assert result == expected msg = r"unsupported operand type\(s\) for -" with pytest.raises(TypeError, match=msg): interval - interval with pytest.raises(TypeError, match=msg): interval - "foo" def test_math_mult(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(0, 2, closed=closed) result = interval * 2 assert result == expected result = 2 * interval assert result == expected result = interval result *= 2 assert result == expected msg = r"unsupported operand type\(s\) for \*" with pytest.raises(TypeError, match=msg): interval * interval msg = r"can\'t multiply sequence by non-int" with pytest.raises(TypeError, match=msg): interval * "foo" def test_math_div(self, closed): interval = Interval(0, 1, closed=closed) expected = Interval(0, 0.5, closed=closed) result = interval / 2.0 assert result == expected result = interval result /= 2.0 assert result == expected msg = r"unsupported operand type\(s\) for /" with pytest.raises(TypeError, match=msg): interval / interval with pytest.raises(TypeError, match=msg): interval / "foo" def test_math_floordiv(self, closed): interval = Interval(1, 2, closed=closed) expected = Interval(0, 1, closed=closed) result = interval // 2 assert result == expected result = interval result //= 2 assert result == expected msg = r"unsupported operand type\(s\) for //" with pytest.raises(TypeError, match=msg): interval // interval with pytest.raises(TypeError, match=msg): interval // "foo" def test_constructor_errors(self): msg = "invalid option for 'closed': foo" with pytest.raises(ValueError, match=msg): Interval(0, 1, closed="foo") msg = "left side of interval must be <= right side" with pytest.raises(ValueError, match=msg): Interval(1, 0) @pytest.mark.parametrize("tz_left, tz_right", [(None, "UTC"), ("UTC", None), ("UTC", "US/Eastern")]) def test_constructor_errors_tz(self, tz_left, tz_right): # GH 18538 left = Timestamp("2017-01-01", tz=tz_left) right = Timestamp("2017-01-02", tz=tz_right) if com.any_none(tz_left, tz_right): error = TypeError msg = "Cannot compare tz-naive and tz-aware timestamps" else: error = ValueError msg = "left and right must have the same time zone" with pytest.raises(error, match=msg): Interval(left, right) def test_equality_comparison_broadcasts_over_array(self): # https://github.com/pandas-dev/pandas/issues/35931 interval = Interval(0, 1) arr = np.array([interval, interval]) result = interval == arr expected = np.array([True, True]) tm.assert_numpy_array_equal(result, expected)
def make_data(): N = 100 left = np.random.uniform(size=N).cumsum() right = left + np.random.uniform(size=N) return [Interval(l, r) for l, r in zip(left, right)]
def interval(): return Interval(0, 1)
def test_length(self, left, right, expected): # GH 18789 iv = Interval(left, right) result = iv.length assert result == expected
def test_comparison(self): msg = ("'<' not supported between instances of " "'pandas._libs.interval.Interval' and 'int'") with pytest.raises(TypeError, match=msg): Interval(0, 1) < 2 assert Interval(0, 1) < Interval(1, 2) assert Interval(0, 1) < Interval(0, 2) assert Interval(0, 1) < Interval(0.5, 1.5) assert Interval(0, 1) <= Interval(0, 1) assert Interval(0, 1) > Interval(-1, 2) assert Interval(0, 1) >= Interval(0, 1)
def test_equal(self): assert Interval(0, 1) == Interval(0, 1, closed="right") assert Interval(0, 1) != Interval(0, 1, closed="left") assert Interval(0, 1) != 0
def test_qcut_index(self): result = qcut([0, 2], 2) intervals = [Interval(-0.001, 1), Interval(1, 2)] expected = Categorical(intervals, ordered=True) tm.assert_categorical_equal(result, expected)
def test_is_all_dates(self): # GH 23576 year_2017 = Interval(Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")) year_2017_index = IntervalIndex([year_2017]) assert not year_2017_index._is_all_dates
def test_value_counts_bins(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) # bins pytest.raises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) if isinstance(s1, Index): tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) else: exp = np.array([1, 2, 3], dtype=np.int64) tm.assert_numpy_array_equal(s1.unique(), exp) assert s1.nunique() == 3 # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(['a', 'b', np.nan, 'd']) tm.assert_index_equal(s.unique(), exp) else: exp = np.array(['a', 'b', np.nan, 'd'], dtype=object) tm.assert_numpy_array_equal(s.unique(), exp) assert s.nunique() == 3 s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) else: tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) assert s.nunique() == 0
def test_searchsorted_invalid_argument(arg): values = IntervalIndex([Interval(0, 1), Interval(1, 2)]) msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and " with pytest.raises(TypeError, match=msg): values.searchsorted(arg)
def test_intervals(self): # 19967 for i in [Interval(0, 1), Interval(0, 1, "left"), Interval(10, 25.0, "right")]: i_rec = self.encode_decode(i) assert i == i_rec
def decode(obj): """ Decoder for deserializing numpy data types. """ typ = obj.get('typ') if typ is None: return obj elif typ == 'timestamp': freq = obj['freq'] if 'freq' in obj else obj['offset'] return Timestamp(obj['value'], tz=obj['tz'], freq=freq) elif typ == 'nat': return NaT elif typ == 'period': return Period(ordinal=obj['ordinal'], freq=obj['freq']) elif typ == 'index': dtype = dtype_for(obj['dtype']) data = unconvert(obj['data'], dtype, obj.get('compress')) return Index(data, dtype=dtype, name=obj['name']) elif typ == 'range_index': return RangeIndex(obj['start'], obj['stop'], obj['step'], name=obj['name']) elif typ == 'multi_index': dtype = dtype_for(obj['dtype']) data = unconvert(obj['data'], dtype, obj.get('compress')) data = [tuple(x) for x in data] return MultiIndex.from_tuples(data, names=obj['names']) elif typ == 'period_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) d = dict(name=obj['name'], freq=obj['freq']) freq = d.pop('freq', None) return PeriodIndex(PeriodArray(data, freq), **d) elif typ == 'datetime_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) d = dict(name=obj['name'], freq=obj['freq']) result = DatetimeIndex(data, **d) tz = obj['tz'] # reverse tz conversion if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result elif typ in ('interval_index', 'interval_array'): return globals()[obj['klass']].from_arrays(obj['left'], obj['right'], obj['closed'], name=obj['name']) elif typ == 'category': from_codes = globals()[obj['klass']].from_codes return from_codes(codes=obj['codes'], categories=obj['categories'], ordered=obj['ordered']) elif typ == 'interval': return Interval(obj['left'], obj['right'], obj['closed']) elif typ == 'series': dtype = dtype_for(obj['dtype']) pd_dtype = pandas_dtype(dtype) index = obj['index'] result = Series(unconvert(obj['data'], dtype, obj['compress']), index=index, dtype=pd_dtype, name=obj['name']) return result elif typ == 'block_manager': axes = obj['axes'] def create_block(b): values = _safe_reshape( unconvert(b['values'], dtype_for(b['dtype']), b['compress']), b['shape']) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 if 'locs' in b: placement = b['locs'] else: placement = axes[0].get_indexer(b['items']) if is_datetime64tz_dtype(b['dtype']): assert isinstance(values, np.ndarray), type(values) assert values.dtype == 'M8[ns]', values.dtype values = DatetimeArray(values, dtype=b['dtype']) return make_block(values=values, klass=getattr(internals, b['klass']), placement=placement, dtype=b['dtype']) blocks = [create_block(b) for b in obj['blocks']] return globals()[obj['klass']](BlockManager(blocks, axes)) elif typ == 'datetime': return parse(obj['data']) elif typ == 'datetime64': return np.datetime64(parse(obj['data'])) elif typ == 'date': return parse(obj['data']).date() elif typ == 'timedelta': return timedelta(*obj['data']) elif typ == 'timedelta64': return np.timedelta64(int(obj['data'])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return SparseSeries( # unconvert(obj['sp_values'], dtype, obj['compress']), # sparse_index=obj['sp_index'], index=obj['index'], # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) # elif typ == 'sparse_dataframe': # return SparseDataFrame( # obj['data'], columns=obj['columns'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'] # ) # elif typ == 'sparse_panel': # return SparsePanel( # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind']) elif typ == 'block_index': return globals()[obj['klass']](obj['length'], obj['blocs'], obj['blengths']) elif typ == 'int_index': return globals()[obj['klass']](obj['length'], obj['indices']) elif typ == 'ndarray': return unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')).reshape(obj['shape']) elif typ == 'np_scalar': if obj.get('sub_typ') == 'np_complex': return c2f(obj['real'], obj['imag'], obj['dtype']) else: dtype = dtype_for(obj['dtype']) try: return dtype(obj['data']) except (ValueError, TypeError): return dtype.type(obj['data']) elif typ == 'np_complex': return complex(obj['real'] + '+' + obj['imag'] + 'j') elif isinstance(obj, (dict, list, set)): return obj else: return obj
def test_get_indexer_length_one_interval(self, size, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer([Interval(0, 5, closed)] * size) expected = np.array([0] * size, dtype="intp") tm.assert_numpy_array_equal(result, expected)
datetime(2000, 1, 3), datetime(2000, 1, 3), datetime(2000, 1, 4), datetime(2000, 1, 4), datetime(2000, 1, 4), datetime(2000, 1, 5), ] return Series(np.random.randn(len(dates)), index=dates) # ---------------------------------------------------------------- # Scalars # ---------------------------------------------------------------- @pytest.fixture(params=[ (Interval(left=0, right=5), IntervalDtype("int64", "right")), (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")), (Period("2012-01", freq="M"), "period[M]"), (Period("2012-02-01", freq="D"), "period[D]"), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), ]) def ea_scalar_and_dtype(request): return request.param # ---------------------------------------------------------------- # Operators & Operations
class TestClassConstructors(Base): """Tests specific to the IntervalIndex/Index constructors""" @pytest.fixture(params=[IntervalIndex, partial(Index, dtype='interval')], ids=['IntervalIndex', 'Index']) def constructor(self, request): return request.param def get_kwargs_from_breaks(self, breaks, closed='right'): """ converts intervals in breaks format to a dictionary of kwargs to specific to the format expected by the IntervalIndex/Index constructors """ if len(breaks) == 0: return {'data': breaks} ivs = [ Interval(l, r, closed) if notna(l) else l for l, r in zip(breaks[:-1], breaks[1:]) ] if isinstance(breaks, list): return {'data': ivs} elif is_categorical_dtype(breaks): return {'data': breaks._constructor(ivs)} return {'data': np.array(ivs, dtype=object)} def test_generic_errors(self, constructor): """ override the base class implementation since errors are handled differently; checks unnecessary since caught at the Interval level """ pass def test_constructor_errors(self, constructor): # mismatched closed within intervals with no constructor override ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')] msg = 'intervals must all be closed on the same side' with tm.assert_raises_regex(ValueError, msg): constructor(ivs) # scalar msg = (r'IntervalIndex\(...\) must be called with a collection of ' 'some kind, 5 was passed') with tm.assert_raises_regex(TypeError, msg): constructor(5) # not an interval msg = ("type <(class|type) 'numpy.int64'> with value 0 " "is not an interval") with tm.assert_raises_regex(TypeError, msg): constructor([0, 1]) @pytest.mark.parametrize( 'data, closed', [([], 'both'), ([np.nan, np.nan], 'neither'), ([Interval(0, 3, closed='neither'), Interval(2, 5, closed='neither')], 'left'), ([Interval(0, 3, closed='left'), Interval(2, 5, closed='right')], 'neither'), (IntervalIndex.from_breaks(range(5), closed='both'), 'right')]) def test_override_inferred_closed(self, constructor, data, closed): # GH 19370 if isinstance(data, IntervalIndex): tuples = data.to_tuples() else: tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data] expected = IntervalIndex.from_tuples(tuples, closed=closed) result = constructor(data, closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('values_constructor', [list, np.array, IntervalIndex, IntervalArray]) def test_index_object_dtype(self, values_constructor): # Index(intervals, dtype=object) is an Index (not an IntervalIndex) intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)] values = values_constructor(intervals) result = Index(values, dtype=object) assert type(result) is Index tm.assert_numpy_array_equal(result.values, np.array(values))
def test_slice_interval_step(self, series_with_interval_index): # GH#31658 allows for integer step!=1, not Interval step ser = series_with_interval_index.copy() msg = "label-based slicing with step!=1 is not supported for IntervalIndex" with pytest.raises(ValueError, match=msg): ser[0 : 4 : Interval(0, 1)]
class TestDataFrameSetItem: def test_setitem_str_subclass(self): # GH#37366 class mystring(str): pass data = ["2020-10-22 01:21:00+00:00"] index = DatetimeIndex(data) df = DataFrame({"a": [1]}, index=index) df["b"] = 2 df[mystring("c")] = 3 expected = DataFrame({ "a": [1], "b": [2], mystring("c"): [3] }, index=index) tm.assert_equal(df, expected) @pytest.mark.parametrize( "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"]) def test_setitem_dtype(self, dtype, float_frame): arr = np.random.randn(len(float_frame)) float_frame[dtype] = np.array(arr, dtype=dtype) assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): data = np.random.randn(len(float_frame), 2) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) def test_setitem_error_msmgs(self): # GH 7432 df = DataFrame( { "bar": [1, 2, 3], "baz": ["d", "e", "f"] }, index=Index(["a", "b", "c"], name="foo"), ) ser = Series( ["g", "h", "i", "j"], index=Index(["a", "b", "c", "a"], name="foo"), name="fiz", ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, match="non-unique"): df["newcol"] = ser # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) msg = "incompatible index of inserted column with frame index" with pytest.raises(TypeError, match=msg): df["gr"] = df.groupby(["b", "c"]).count() def test_setitem_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 df = DataFrame(index=range(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) tm.assert_frame_equal(df, expected) def test_setitem_different_dtype(self): df = DataFrame(np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]) df.insert(0, "foo", df["a"]) df.insert(2, "bar", df["c"]) # diff dtype # new item df["x"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 5 + [np.dtype("float32")], index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) # replacing current (in different block) df["a"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) df["y"] = df["a"].astype("int32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], index=["foo", "c", "bar", "b", "a", "x", "y"], ) tm.assert_series_equal(result, expected) def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) tm.assert_frame_equal(df, exp) def test_setitem_dt64_index_empty_columns(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") df = DataFrame(index=np.arange(len(rng))) df["A"] = rng assert df["A"].dtype == np.dtype("M8[ns]") def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) df["now"] = Timestamp("20130101", tz="UTC") expected = DataFrame([[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"]) tm.assert_frame_equal(df, expected) def test_setitem_wrong_length_categorical_dtype_raises(self): # GH#29523 cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) df = DataFrame(range(10), columns=["bar"]) msg = (rf"Length of values \({len(cat)}\) " rf"does not match length of index \({len(df)}\)") with pytest.raises(ValueError, match=msg): df["foo"] = cat def test_setitem_with_sparse_value(self): # GH#8131 df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_array = SparseArray([0, 0, 1]) df["new_column"] = sp_array expected = Series(sp_array, name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_with_unaligned_sparse_value(self): df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) df["new_column"] = sp_series expected = Series(SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_period_preserves_dtype(self): # GH: 26861 data = [Period("2003-12", "D")] result = DataFrame([]) result["a"] = data expected = DataFrame({"a": data}) tm.assert_frame_equal(result, expected) def test_setitem_dict_preserves_dtypes(self): # https://github.com/pandas-dev/pandas/issues/34573 expected = DataFrame({ "a": Series([0, 1, 2], dtype="int64"), "b": Series([1, 2, 3], dtype=float), "c": Series([1, 2, 3], dtype=float), "d": Series([1, 2, 3], dtype="uint32"), }) df = DataFrame({ "a": Series([], dtype="int64"), "b": Series([], dtype=float), "c": Series([], dtype=float), "d": Series([], dtype="uint32"), }) for idx, b in enumerate([1, 2, 3]): df.loc[df.shape[0]] = { "a": int(idx), "b": float(b), "c": float(b), "d": np.uint32(b), } tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "obj,dtype", [ (Period("2020-01"), PeriodDtype("M")), ( Interval(left=0, right=5, inclusive="right"), IntervalDtype("int64", "right"), ), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), ], ) def test_setitem_extension_types(self, obj, dtype): # GH: 34832 expected = DataFrame({ "idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype) }) df = DataFrame({"idx": [1, 2, 3]}) df["obj"] = obj tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "ea_name", [ dtype.name for dtype in ea_registry.dtypes # property would require instantiation if not isinstance(dtype.name, property) ] # mypy doesn't allow adding lists of different types # https://github.com/python/mypy/issues/5492 + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 result = DataFrame([0]) result[ea_name] = [1] expected = DataFrame({0: [0], ea_name: [1]}) tm.assert_frame_equal(result, expected) def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): # GH#7492 data_ns = np.array([1, "nat"], dtype="datetime64[ns]") result = Series(data_ns).to_frame() result["new"] = data_ns expected = DataFrame({ 0: [1, None], "new": [1, None] }, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, "nat"], dtype="datetime64[s]") result["new"] = data_s expected = DataFrame({ 0: [1, None], "new": [1e9, None] }, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into a not-yet-existing column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df[unit] = vals assert df[unit].dtype == np.dtype("M8[ns]") assert (df[unit].values == ex_vals).all() @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_existing_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into an already-existing dt64 column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") # We overwrite existing dt64 column with new, non-nano dt64 vals df["dates"] = vals assert (df["dates"].values == ex_vals).all() def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame idx = df["B"].rename("foo") # setitem df["C"] = idx tm.assert_series_equal(df["C"], Series(idx, name="C")) df["D"] = "foo" df["D"] = idx tm.assert_series_equal(df["D"], Series(idx, name="D")) del df["D"] # assert that A & C are not sharing the same base (e.g. they # are copies) v1 = df._mgr.arrays[1] v2 = df._mgr.arrays[2] tm.assert_extension_array_equal(v1, v2) v1base = v1._data.base v2base = v2._data.base assert v1base is None or (id(v1base) != id(v2base)) # with nan df2 = df.copy() df2.iloc[1, 1] = NaT df2.iloc[1, 2] = NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal(df2.dtypes, df.dtypes) def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") df = DataFrame(np.random.randn(5, 3), index=rng) df["Index"] = rng rs = Index(df["Index"]) tm.assert_index_equal(rs, rng, check_names=False) assert rs.name == "Index" assert rng.name == "index" rs = df.reset_index().set_index("index") assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) def test_setitem_complete_column_with_array(self): # GH#37954 df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}) arr = np.array([[1, 1], [3, 1], [5, 1]]) df[["c", "d"]] = arr expected = DataFrame({ "a": ["one", "two", "three"], "b": [1, 2, 3], "c": [1, 3, 5], "d": [1, 1, 1], }) expected["c"] = expected["c"].astype(arr.dtype) expected["d"] = expected["d"].astype(arr.dtype) assert expected["c"].dtype == arr.dtype assert expected["d"].dtype == arr.dtype tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) df = DataFrame(np.random.randn(3, 3), columns=cols) df[False] = ["a", "b", "c"] expected_cols = Index([1, 2, 3, False], dtype=object) if dtype == "f8": expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object) tm.assert_index_equal(df.columns, expected_cols) @pytest.mark.parametrize("indexer", ["B", ["B"]]) def test_setitem_frame_length_0_str_key(self, indexer): # GH#38831 df = DataFrame(columns=["A", "B"]) other = DataFrame({"B": [1, 2]}) df[indexer] = other expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]}) expected["A"] = expected["A"].astype("object") tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns(self, using_array_manager): # GH#15695 warn = FutureWarning if using_array_manager else None msg = "will attempt to set the values inplace" cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) df.loc[0, "A"] = (0, 3) with tm.assert_produces_warning(warn, match=msg): df.loc[:, "B"] = (1, 4) df["C"] = (2, 5) expected = DataFrame( [ [0, 1, 2, 3, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], ], dtype="object", ) if using_array_manager: # setitem replaces column so changes dtype expected.columns = cols expected["C"] = expected["C"].astype("int64") # TODO(ArrayManager) .loc still overwrites expected["B"] = expected["B"].astype("int64") else: # set these with unique columns to be extra-unambiguous expected[2] = expected[2].astype(np.int64) expected[5] = expected[5].astype(np.int64) expected.columns = cols tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns_size_mismatch(self): # GH#39510 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) with pytest.raises(ValueError, match="Columns must be same length as key"): df[["A"]] = (0, 3, 5) df2 = df.iloc[:, :3] # unique columns with pytest.raises(ValueError, match="Columns must be same length as key"): df2[["A"]] = (0, 3, 5) @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) def test_setitem_df_wrong_column_number(self, cols): # GH#38604 df = DataFrame([[1, 2, 3]], columns=cols) rhs = DataFrame([[10, 11]], columns=["d", "e"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df["a"] = rhs def test_setitem_listlike_indexer_duplicate_columns(self): # GH#38604 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) df[["a", "b"]] = rhs expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) tm.assert_frame_equal(df, expected) df[["c", "b"]] = rhs expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) tm.assert_frame_equal(df, expected) def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): # GH#39403 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11]], columns=["a", "b"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs def test_setitem_intervals(self): df = DataFrame({"A": range(10)}) ser = cut(df["A"], 5) assert isinstance(ser.cat.categories, IntervalIndex) # B & D end up as Categoricals # the remainder are converted to in-line objects # containing an IntervalIndex.values df["B"] = ser df["C"] = np.array(ser) df["D"] = ser.values df["E"] = np.array(ser.values) df["F"] = ser.astype(object) assert is_categorical_dtype(df["B"].dtype) assert is_interval_dtype(df["B"].cat.categories) assert is_categorical_dtype(df["D"].dtype) assert is_interval_dtype(df["D"].cat.categories) # These go through the Series constructor and so get inferred back # to IntervalDtype assert is_interval_dtype(df["C"]) assert is_interval_dtype(df["E"]) # But the Series constructor doesn't do inference on Series objects, # so setting df["F"] doesn't get cast back to IntervalDtype assert is_object_dtype(df["F"]) # they compare equal as Index # when converted to numpy objects c = lambda x: Index(np.array(x)) tm.assert_index_equal(c(df.B), c(df.B)) tm.assert_index_equal(c(df.B), c(df.C), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) tm.assert_index_equal(c(df.C), c(df.D), check_names=False) # B & D are the same Series tm.assert_series_equal(df["B"], df["B"]) tm.assert_series_equal(df["B"], df["D"], check_names=False) # C & E are the same Series tm.assert_series_equal(df["C"], df["C"]) tm.assert_series_equal(df["C"], df["E"], check_names=False) def test_setitem_categorical(self): # GH#35369 df = DataFrame({"h": Series(list("mn")).astype("category")}) df.h = df.h.cat.reorder_categories(["n", "m"]) expected = DataFrame( {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])}) tm.assert_frame_equal(df, expected) def test_setitem_with_empty_listlike(self): # GH#17101 index = Index([], name="idx") result = DataFrame(columns=["A"], index=index) result["A"] = [] expected = DataFrame(columns=["A"], index=index) tm.assert_index_equal(result.index, expected.index) @pytest.mark.parametrize( "cols, values, expected", [ (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols (["C", "B", "a"], [1, 2, 3], 3), # no duplicates (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order (["C", "a", "B"], [3, 2, 1], 2), # in the middle ], ) def test_setitem_same_column(self, cols, values, expected): # GH#23239 df = DataFrame([values], columns=cols) df["a"] = df["a"] result = df["a"].values[0] assert result == expected def test_setitem_multi_index(self): # GH#7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] cols = MultiIndex.from_product(it) index = date_range("20141006", periods=20) vals = np.random.randint(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] np.random.shuffle(i) df["jim"] = df["jolie"].loc[i, ::-1] tm.assert_frame_equal(df["jim"], df["jolie"]) np.random.shuffle(j) df[("joe", "first")] = df[("jolie", "last")].loc[i, j] tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) np.random.shuffle(j) df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) @pytest.mark.parametrize( "columns,box,expected", [ ( ["A", "B", "C", "D"], 7, DataFrame( [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), ), ( ["C", "D"], [7, 8], DataFrame( [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], columns=["A", "B", "C", "D"], ), ), ( ["A", "B", "C"], np.array([7, 8, 9], dtype=np.int64), DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), ), ( ["B", "C", "D"], [[7, 8, 9], [10, 11, 12], [13, 14, 15]], DataFrame( [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], columns=["A", "B", "C", "D"], ), ), ( ["C", "A", "D"], np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), DataFrame( [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], columns=["A", "B", "C", "D"], ), ), ( ["A", "C"], DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), DataFrame([[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]), ), ], ) def test_setitem_list_missing_columns(self, columns, box, expected): # GH#29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df[columns] = box tm.assert_frame_equal(df, expected) def test_setitem_list_of_tuples(self, float_frame): tuples = list(zip(float_frame["A"], float_frame["B"])) float_frame["tuples"] = tuples result = float_frame["tuples"] expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) def test_setitem_iloc_generator(self): # GH#39614 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer] = 1 expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_iloc_two_dimensional_generator(self): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer, 1] = 1 expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_dtypes_bytes_type_to_object(self): # GH 20734 index = Series(name="id", dtype="S24") df = DataFrame(index=index) df["a"] = Series(name="a", index=index, dtype=np.uint32) df["b"] = Series(name="b", index=index, dtype="S64") df["c"] = Series(name="c", index=index, dtype="S64") df["d"] = Series(name="d", index=index, dtype=np.uint8) result = df.dtypes expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) tm.assert_series_equal(result, expected) def test_boolean_mask_nullable_int64(self): # GH 28928 result = DataFrame({ "a": [3, 4], "b": [5, 6] }).astype({ "a": "int64", "b": "Int64" }) mask = Series(False, index=result.index) result.loc[mask, "a"] = result["a"] result.loc[mask, "b"] = result["b"] expected = DataFrame({ "a": [3, 4], "b": [5, 6] }).astype({ "a": "int64", "b": "Int64" }) tm.assert_frame_equal(result, expected) def test_setitem_ea_dtype_rhs_series(self): # GH#47425 df = DataFrame({"a": [1, 2]}) df["a"] = Series([1, 2], dtype="Int64") expected = DataFrame({"a": [1, 2]}, dtype="Int64") tm.assert_frame_equal(df, expected) # TODO(ArrayManager) set column with 2d column array, see #44788 @td.skip_array_manager_not_yet_implemented def test_setitem_npmatrix_2d(self): # GH#42376 # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) expected = DataFrame( { "np-array": np.ones(10), "np-matrix": np.ones(10) }, index=np.arange(10)) a = np.ones((10, 1)) df = DataFrame(index=np.arange(10)) df["np-array"] = a # Instantiation of `np.matrix` gives PendingDeprecationWarning with tm.assert_produces_warning(PendingDeprecationWarning): df["np-matrix"] = np.matrix(a) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) def test_setitem_aligning_dict_with_index(self, vals): # GH#47216 df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) df.loc[:, "a"] = {1: 100, 0: 200} df.loc[:, "c"] = {0: 5, 1: 6} df.loc[:, "e"] = {1: 5} expected = DataFrame({ "a": [200, 100], "b": [3, 4], **vals, "c": [5, 6], "e": [np.nan, 5] }) tm.assert_frame_equal(df, expected) def test_setitem_rhs_dataframe(self): # GH#47578 df = DataFrame({"a": [1, 2]}) df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2]) expected = DataFrame({"a": [np.nan, 10]}) tm.assert_frame_equal(df, expected) df = DataFrame({"a": [1, 2]}) df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2])) tm.assert_frame_equal(df, expected)
class TestIntervalIndex(object): def _compare_tuple_of_numpy_array(self, result, expected): lidx, ridx = result lidx_expected, ridx_expected = expected tm.assert_numpy_array_equal(lidx, lidx_expected) tm.assert_numpy_array_equal(ridx, ridx_expected) @pytest.mark.parametrize("side", ['right', 'left', 'both', 'neither']) def test_get_loc_interval(self, closed, side): idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: # if get_loc is supplied an interval, it should only search # for exact matches, not overlaps or covers, else KeyError. if closed == side: if bound == [0, 1]: assert idx.get_loc(Interval(0, 1, closed=side)) == 0 elif bound == [2, 3]: assert idx.get_loc(Interval(2, 3, closed=side)) == 1 else: with pytest.raises(KeyError): idx.get_loc(Interval(*bound, closed=side)) else: with pytest.raises(KeyError): idx.get_loc(Interval(*bound, closed=side)) @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) def test_get_loc_scalar(self, closed, scalar): # correct = {side: {query: answer}}. # If query is not in the dict, that query should raise a KeyError correct = { 'right': { 0.5: 0, 1: 0, 2.5: 1, 3: 1 }, 'left': { 0: 0, 0.5: 0, 2: 1, 2.5: 1 }, 'both': { 0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1 }, 'neither': { 0.5: 0, 2.5: 1 } } idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) # if get_loc is supplied a scalar, it should return the index of # the interval which contains the scalar, or KeyError. if scalar in correct[closed].keys(): assert idx.get_loc(scalar) == correct[closed][scalar] else: pytest.raises(KeyError, idx.get_loc, scalar) def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) pytest.raises( KeyError, index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))) pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2))) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2))) pytest.raises( KeyError, index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) def test_slice_locs_with_ints_and_floats_succeeds(self): # increasing non-overlapping index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0, 3) == (0, 2) assert index.slice_locs(3, 1) == (2, 1) assert index.slice_locs(3, 4) == (2, 3) assert index.slice_locs(0, 4) == (0, 3) # decreasing non-overlapping index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)]) assert index.slice_locs(0, 1) == (3, 2) assert index.slice_locs(0, 2) == (3, 1) assert index.slice_locs(0, 3) == (3, 1) assert index.slice_locs(3, 1) == (1, 2) assert index.slice_locs(3, 4) == (1, 0) assert index.slice_locs(0, 4) == (3, 0) @pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [3, 1], [3, 4], [0, 4]]) @pytest.mark.parametrize("tuples", [[(0, 2), (1, 3), (2, 4)], [(2, 4), (1, 3), (0, 2)], [(0, 2), (0, 2), (2, 4)], [(0, 2), (2, 4), (0, 2)], [(0, 2), (0, 2), (2, 4), (1, 3)]]) def test_slice_locs_with_ints_and_floats_errors(self, tuples, query): index = IntervalIndex.from_tuples(tuples) with pytest.raises(KeyError): index.slice_locs(query) @pytest.mark.parametrize('query, expected', [(Interval(1, 3, closed='right'), 1), (Interval(1, 3, closed='left'), -1), (Interval(1, 3, closed='both'), -1), (Interval(1, 3, closed='neither'), -1), (Interval(1, 4, closed='right'), -1), (Interval(0, 4, closed='right'), -1), (Interval(1, 2, closed='right'), -1)]) def test_get_indexer_with_interval_single_queries(self, query, expected): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='right') result = index.get_indexer([query]) expected = np.array([expected], dtype='intp') tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( 'query, expected', [([Interval(2, 4, closed='right'), Interval(1, 3, closed='right')], [2, 1]), ([Interval(1, 3, closed='right'), Interval(0, 2, closed='right')], [1, -1]), ([Interval(1, 3, closed='right'), Interval(1, 3, closed='left')], [1, -1])]) def test_get_indexer_with_interval_multiple_queries(self, query, expected): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='right') result = index.get_indexer(query) expected = np.array(expected, dtype='intp') tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize('query, expected', [(-0.5, -1), (0, -1), (0.5, 0), (1, 0), (1.5, 1), (2, 1), (2.5, -1), (3, -1), (3.5, 2), (4, 2), (4.5, -1)]) def test_get_indexer_with_ints_and_floats_single_queries( self, query, expected): index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed='right') result = index.get_indexer([query]) expected = np.array([expected], dtype='intp') tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize('query, expected', [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, -1]), ([1, 2, 3, 4], [0, 1, -1, 2]), ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1])]) def test_get_indexer_with_ints_and_floats_multiple_queries( self, query, expected): index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed='right') result = index.get_indexer(query) expected = np.array(expected, dtype='intp') tm.assert_numpy_array_equal(result, expected) index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) # TODO: @shoyer believes this should raise, master branch doesn't @pytest.mark.parametrize( 'query, expected', [(-0.5, (Int64Index([], dtype='int64'), np.array([0]))), (0, (Int64Index([0], dtype='int64'), np.array([]))), (0.5, (Int64Index([0], dtype='int64'), np.array([]))), (1, (Int64Index([0, 1], dtype='int64'), np.array([]))), (1.5, (Int64Index([0, 1], dtype='int64'), np.array([]))), (2, (Int64Index([0, 1, 2], dtype='int64'), np.array([]))), (2.5, (Int64Index([1, 2], dtype='int64'), np.array([]))), (3, (Int64Index([2], dtype='int64'), np.array([]))), (3.5, (Int64Index([2], dtype='int64'), np.array([]))), (4, (Int64Index([], dtype='int64'), np.array([0]))), (4.5, (Int64Index([], dtype='int64'), np.array([0])))]) def test_get_indexer_non_unique_with_ints_and_floats_single_queries( self, query, expected): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='left') result = index.get_indexer_non_unique([query]) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( 'query, expected', [([1, 2], (Int64Index([0, 1, 0, 1, 2], dtype='int64'), np.array([]))), ([1, 2, 3], (Int64Index([0, 1, 0, 1, 2, 2], dtype='int64'), np.array([]))), ([1, 2, 3, 4], (Int64Index([0, 1, 0, 1, 2, 2, -1], dtype='int64'), np.array([3]))), ([1, 2, 3, 4, 2], (Int64Index([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], dtype='int64'), np.array([3])))]) def test_get_indexer_non_unique_with_ints_and_floats_multiple_queries( self, query, expected): index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)], closed='left') result = index.get_indexer_non_unique(query) tm.assert_numpy_array_equal(result, expected) # TODO we may also want to test get_indexer for the case when # the intervals are duplicated, decreasing, non-monotonic, etc.. def test_contains(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right') # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed='right') in index assert Interval(0, 2, closed='right') not in index assert Interval(0, 0.5, closed='right') not in index assert Interval(3, 5, closed='right') not in index assert Interval(-1, 0, closed='left') not in index assert Interval(0, 1, closed='left') not in index assert Interval(0, 1, closed='both') not in index def test_contains_method(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right') assert not index.contains(0) assert index.contains(0.1) assert index.contains(0.5) assert index.contains(1) assert index.contains(Interval(0, 1, closed='right')) assert not index.contains(Interval(0, 1, closed='left')) assert not index.contains(Interval(0, 1, closed='both')) assert not index.contains(Interval(0, 2, closed='right')) assert not index.contains(Interval(0, 3, closed='right')) assert not index.contains(Interval(1, 3, closed='right')) assert not index.contains(20) assert not index.contains(-20)
def test_overlaps_self(self, start_shift, closed): start, shift = start_shift interval = Interval(start, start + shift, closed) assert interval.overlaps(interval)
def test_slice_locs_with_interval(self): # increasing monotonically index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 1) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1) # decreasing monotonically index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1) assert index.slice_locs(start=Interval(0, 2)) == (2, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 1) assert index.slice_locs(end=Interval(0, 2)) == (0, 3) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3) # sorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 3) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2) # unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)]) pytest.raises( KeyError, index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))) pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2))) assert index.slice_locs(end=Interval(2, 4)) == (0, 2) pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2))) pytest.raises( KeyError, index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))) # another unsorted duplicates index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)]) assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3) assert index.slice_locs(start=Interval(0, 2)) == (0, 4) assert index.slice_locs(end=Interval(2, 4)) == (0, 3) assert index.slice_locs(end=Interval(0, 2)) == (0, 2) assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
def test_errors(self): # not enough params msg = ( "Of the four parameters: start, end, periods, and freq, " "exactly three must be specified" ) with pytest.raises(ValueError, match=msg): interval_range(start=0) with pytest.raises(ValueError, match=msg): interval_range(end=5) with pytest.raises(ValueError, match=msg): interval_range(periods=2) with pytest.raises(ValueError, match=msg): interval_range() # too many params with pytest.raises(ValueError, match=msg): interval_range(start=0, end=5, periods=6, freq=1.5) # mixed units msg = "start, end, freq need to be type compatible" with pytest.raises(TypeError, match=msg): interval_range(start=0, end=Timestamp("20130101"), freq=2) with pytest.raises(TypeError, match=msg): interval_range(start=0, end=Timedelta("1 day"), freq=2) with pytest.raises(TypeError, match=msg): interval_range(start=0, end=10, freq="D") with pytest.raises(TypeError, match=msg): interval_range(start=Timestamp("20130101"), end=10, freq="D") with pytest.raises(TypeError, match=msg): interval_range( start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" ) with pytest.raises(TypeError, match=msg): interval_range( start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 ) with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta("1 day"), end=10, freq="D") with pytest.raises(TypeError, match=msg): interval_range( start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" ) with pytest.raises(TypeError, match=msg): interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) # invalid periods msg = "periods must be a number, got foo" with pytest.raises(TypeError, match=msg): interval_range(start=0, periods="foo") # invalid start msg = "start must be numeric or datetime-like, got foo" with pytest.raises(ValueError, match=msg): interval_range(start="foo", periods=10) # invalid end msg = r"end must be numeric or datetime-like, got \(0, 1\]" with pytest.raises(ValueError, match=msg): interval_range(end=Interval(0, 1), periods=10) # invalid freq for datetime-like msg = "freq must be numeric or convertible to DateOffset, got foo" with pytest.raises(ValueError, match=msg): interval_range(start=0, end=10, freq="foo") with pytest.raises(ValueError, match=msg): interval_range(start=Timestamp("20130101"), periods=10, freq="foo") with pytest.raises(ValueError, match=msg): interval_range(end=Timedelta("1 day"), periods=10, freq="foo") # mixed tz start = Timestamp("2017-01-01", tz="US/Eastern") end = Timestamp("2017-01-07", tz="US/Pacific") msg = "Start and end cannot both be tz-aware with different timezones" with pytest.raises(TypeError, match=msg): interval_range(start=start, end=end)
class TestAstype: @pytest.mark.parametrize("dtype", np.typecodes["All"]) def test_astype_empty_constructor_equality(self, dtype): # see GH#15524 if dtype not in ( "S", "V", # poor support (if any) currently "M", "m", # Generic timestamps raise a ValueError. Already tested. ): init_empty = Series([], dtype=dtype) with tm.assert_produces_warning(DeprecationWarning): as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) @pytest.mark.parametrize("dtype", [str, np.str_]) @pytest.mark.parametrize( "series", [ Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), ], ) def test_astype_str_map(self, dtype, series): # see GH#4405 result = series.astype(dtype) expected = series.map(str) tm.assert_series_equal(result, expected) def test_astype_float_to_period(self): result = Series([np.nan]).astype("period[D]") expected = Series([NaT], dtype="period[D]") tm.assert_series_equal(result, expected) def test_astype_no_pandas_dtype(self): # https://github.com/pandas-dev/pandas/pull/24866 ser = Series([1, 2], dtype="int64") # Don't have PandasDtype in the public API, so we use `.array.dtype`, # which is a PandasDtype. result = ser.astype(ser.array.dtype) tm.assert_series_equal(result, ser) @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) def test_astype_generic_timestamp_no_frequency(self, dtype, request): # see GH#15524, GH#15987 data = [1] s = Series(data) if np.dtype(dtype).name not in ["timedelta64", "datetime64"]: mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit") request.node.add_marker(mark) msg = ( fr"The '{dtype.__name__}' dtype has no unit\. " fr"Please pass in '{dtype.__name__}\[ns\]' instead." ) with pytest.raises(ValueError, match=msg): s.astype(dtype) def test_astype_dt64_to_str(self): # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex dti = date_range("2012-01-01", periods=3) result = Series(dti).astype(str) expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object) tm.assert_series_equal(result, expected) def test_astype_dt64tz_to_str(self): # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern") result = Series(dti_tz).astype(str) expected = Series( [ "2012-01-01 00:00:00-05:00", "2012-01-02 00:00:00-05:00", "2012-01-03 00:00:00-05:00", ], dtype=object, ) tm.assert_series_equal(result, expected) def test_astype_datetime(self): s = Series(iNaT, dtype="M8[ns]", index=range(5)) s = s.astype("O") assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0)]) s = s.astype("O") assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) s[1] = np.nan assert s.dtype == "M8[ns]" s = s.astype("O") assert s.dtype == np.object_ def test_astype_datetime64tz(self): s = Series(date_range("20130101", periods=3, tz="US/Eastern")) # astype result = s.astype(object) expected = Series(s.astype(object), dtype=object) tm.assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz) tm.assert_series_equal(result, s) # astype - object, preserves on construction result = Series(s.astype(object)) expected = s.astype(object) tm.assert_series_equal(result, expected) # astype - datetime64[ns, tz] with tm.assert_produces_warning(FutureWarning): # dt64->dt64tz astype deprecated result = Series(s.values).astype("datetime64[ns, US/Eastern]") tm.assert_series_equal(result, s) with tm.assert_produces_warning(FutureWarning): # dt64->dt64tz astype deprecated result = Series(s.values).astype(s.dtype) tm.assert_series_equal(result, s) result = s.astype("datetime64[ns, CET]") expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET")) tm.assert_series_equal(result, expected) def test_astype_str_cast_dt64(self): # see GH#9757 ts = Series([Timestamp("2010-01-04 00:00:00")]) s = ts.astype(str) expected = Series(["2010-01-04"]) tm.assert_series_equal(s, expected) ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")]) s = ts.astype(str) expected = Series(["2010-01-04 00:00:00-05:00"]) tm.assert_series_equal(s, expected) def test_astype_str_cast_td64(self): # see GH#9757 td = Series([Timedelta(1, unit="d")]) ser = td.astype(str) expected = Series(["1 days"]) tm.assert_series_equal(ser, expected) def test_dt64_series_astype_object(self): dt64ser = Series(date_range("20130101", periods=3)) result = dt64ser.astype(object) assert isinstance(result.iloc[0], datetime) assert result.dtype == np.object_ def test_td64_series_astype_object(self): tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]") result = tdser.astype(object) assert isinstance(result.iloc[0], timedelta) assert result.dtype == np.object_ @pytest.mark.parametrize( "values", [ Series(["x", "y", "z"], dtype="string"), Series(["x", "y", "z"], dtype="category"), Series(3 * [Timestamp("2020-01-01", tz="UTC")]), Series(3 * [Interval(0, 1)]), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) def test_astype_ignores_errors_for_extension_dtypes(self, values, errors): # https://github.com/pandas-dev/pandas/issues/35471 if errors == "ignore": expected = values result = values.astype(float, errors="ignore") tm.assert_series_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): values.astype(float, errors=errors) @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) def test_astype_from_float_to_str(self, dtype): # https://github.com/pandas-dev/pandas/issues/36451 s = Series([0.1], dtype=dtype) result = s.astype(str) expected = Series(["0.1"]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "value, string_value", [ (None, "None"), (np.nan, "nan"), (NA, "<NA>"), ], ) def test_astype_to_str_preserves_na(self, value, string_value): # https://github.com/pandas-dev/pandas/issues/36904 s = Series(["a", "b", value], dtype=object) result = s.astype(str) expected = Series(["a", "b", string_value], dtype=object) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): s = Series(np.random.randn(5), name="foo") as_typed = s.astype(dtype) assert as_typed.dtype == dtype assert as_typed.name == s.name @pytest.mark.parametrize("value", [np.nan, np.inf]) @pytest.mark.parametrize("dtype", [np.int32, np.int64]) def test_astype_cast_nan_inf_int(self, dtype, value): # gh-14265: check NaN and inf raise error when converting to int msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" s = Series([value]) with pytest.raises(ValueError, match=msg): s.astype(dtype) @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) def test_astype_cast_object_int_fail(self, dtype): arr = Series(["car", "house", "tree", "1"]) msg = r"invalid literal for int\(\) with base 10: 'car'" with pytest.raises(ValueError, match=msg): arr.astype(dtype) def test_astype_cast_object_int(self): arr = Series(["1", "2", "3", "4"], dtype=object) result = arr.astype(int) tm.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_unicode(self): # see GH#7758: A bit of magic is required to set # default encoding to utf-8 digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series(["データーサイエンス、お前はもう死んでいる"]), ] former_encoding = None if sys.getdefaultencoding() == "utf-8": test_series.append(Series(["野菜食べないとやばい".encode()])) for s in test_series: res = s.astype("unicode") expec = s.map(str) tm.assert_series_equal(res, expec) # Restore the former encoding if former_encoding is not None and former_encoding != "utf-8": reload(sys) sys.setdefaultencoding(former_encoding) def test_astype_bytes(self): # GH#39474 result = Series(["foo", "bar", "baz"]).astype(bytes) assert result.dtypes == np.dtype("S3")
def test_overlaps_invalid_type(self, other): interval = Interval(0, 1) msg = '`other` must be an Interval, got {other}'.format( other=type(other).__name__) with tm.assert_raises_regex(TypeError, msg): interval.overlaps(other)
def test_length_timestamp(self, tz, left, right, expected): # GH 18789 iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) result = iv.length expected = Timedelta(expected) assert result == expected