Exemplo n.º 1
0
    def test_overlaps_nested(self, start_shift, closed, other_closed):
        start, shift = start_shift
        interval1 = Interval(start, start + 3 * shift, other_closed)
        interval2 = Interval(start + shift, start + 2 * shift, closed)

        # nested intervals should always overlap
        assert interval1.overlaps(interval2)
Exemplo n.º 2
0
    def test_overlaps_disjoint(self, start_shift, closed, other_closed):
        start, shift = start_shift
        interval1 = Interval(start, start + shift, other_closed)
        interval2 = Interval(start + 2 * shift, start + 3 * shift, closed)

        # disjoint intervals should never overlap
        assert not interval1.overlaps(interval2)
Exemplo n.º 3
0
    def test_overlaps_endpoint(self, start_shift, closed, other_closed):
        start, shift = start_shift
        interval1 = Interval(start, start + shift, other_closed)
        interval2 = Interval(start + shift, start + 2 * shift, closed)

        # overlap if shared endpoint is closed for both (overlap at a point)
        result = interval1.overlaps(interval2)
        expected = interval1.closed_right and interval2.closed_left
        assert result == expected
Exemplo n.º 4
0
 def test_construct_errors(self, left, right):
     # GH 23013
     msg = "Only numeric, Timestamp and Timedelta endpoints are allowed"
     with pytest.raises(ValueError, match=msg):
         Interval(left, right)
Exemplo n.º 5
0
    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        msg = "|".join([
            "not supported between instances of 'int' and '.*.Interval'",
            r"Invalid comparison between dtype=interval\[int64, right\] and ",
        ])
        with pytest.raises(TypeError, match=msg):
            self.index > 0
        with pytest.raises(TypeError, match=msg):
            self.index <= 0
        with pytest.raises(TypeError, match=msg):
            self.index > np.arange(2)

        msg = "Lengths must match to compare"
        with pytest.raises(ValueError, match=msg):
            self.index > np.arange(3)
Exemplo n.º 6
0
 def test_deprecated(self):
     ivs = [Interval(0, 1), Interval(1, 2)]
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         IntervalIndex.from_intervals(ivs)
Exemplo n.º 7
0
 def test_overlaps_invalid_type(self, other):
     interval = Interval(0, 1)
     msg = '`other` must be an Interval, got {other}'.format(
         other=type(other).__name__)
     with pytest.raises(TypeError, match=msg):
         interval.overlaps(other)
Exemplo n.º 8
0
class TestIntervalIndex(Base):
    _holder = IntervalIndex

    def setup_method(self, method):
        self.index = IntervalIndex.from_arrays([0, 1], [1, 2])
        self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan,
                                                         (1, 2)])
        self.indices = dict(intervalIndex=tm.makeIntervalIndex(10))

    def create_index(self, closed="right"):
        return IntervalIndex.from_breaks(range(11), closed=closed)

    def create_index_with_nan(self, closed="right"):
        mask = [True, False] + [True] * 8
        return IntervalIndex.from_arrays(
            np.where(mask, np.arange(10), np.nan),
            np.where(mask, np.arange(1, 11), np.nan),
            closed=closed,
        )

    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [
            Interval(l, r, closed) if notna(l) else np.nan
            for l, r in zip(expected_left, expected_right)
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
            [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
            pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
            pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]),
        ],
    )
    def test_length(self, closed, breaks):
        # GH 18789
        index = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.length
        expected = Index(iv.length for iv in index)
        tm.assert_index_equal(result, expected)

        # with NA
        index = index.insert(1, np.nan)
        result = index.length
        expected = Index(iv.length if notna(iv) else iv for iv in index)
        tm.assert_index_equal(result, expected)

    def test_with_nans(self, closed):
        index = self.create_index(closed=closed)
        assert index.hasnans is False

        result = index.isna()
        expected = np.repeat(False, len(index))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.repeat(True, len(index))
        tm.assert_numpy_array_equal(result, expected)

        index = self.create_index_with_nan(closed=closed)
        assert index.hasnans is True

        result = index.isna()
        expected = np.array([False, True] + [False] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.array([True, False] + [True] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

    def test_copy(self, closed):
        expected = self.create_index(closed=closed)

        result = expected.copy()
        assert result.equals(expected)

        result = expected.copy(deep=True)
        assert result.equals(expected)
        assert result.left is not expected.left

    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="same")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="same")

        # by-definition make a copy
        result = IntervalIndex(index._ndarray_values, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="copy")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="copy")

    def test_equals(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
        assert expected.equals(expected)
        assert expected.equals(expected.copy())

        assert not expected.equals(expected.astype(object))
        assert not expected.equals(np.array(expected))
        assert not expected.equals(list(expected))

        assert not expected.equals([1, 2])
        assert not expected.equals(np.array([1, 2]))
        assert not expected.equals(pd.date_range("20130101", periods=2))

        expected_name1 = IntervalIndex.from_breaks(np.arange(5),
                                                   closed=closed,
                                                   name="foo")
        expected_name2 = IntervalIndex.from_breaks(np.arange(5),
                                                   closed=closed,
                                                   name="bar")
        assert expected.equals(expected_name1)
        assert expected_name1.equals(expected_name2)

        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            expected_other_closed = IntervalIndex.from_breaks(
                np.arange(5), closed=other_closed)
            assert not expected.equals(expected_other_closed)

    @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
    def test_where(self, closed, klass):
        idx = self.create_index(closed=closed)
        cond = [True] * len(idx)
        expected = idx
        result = expected.where(klass(cond))
        tm.assert_index_equal(result, expected)

        cond = [False] + [True] * len(idx[1:])
        expected = IntervalIndex([np.nan] + idx[1:].tolist())
        result = idx.where(klass(cond))
        tm.assert_index_equal(result, expected)

    def test_delete(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
        result = self.create_index(closed=closed).delete(0)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "data",
        [
            interval_range(0, periods=10, closed="neither"),
            interval_range(1.7, periods=8, freq=2.5, closed="both"),
            interval_range(Timestamp("20170101"), periods=12, closed="left"),
            interval_range(Timedelta("1 day"), periods=6, closed="right"),
        ],
    )
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        msg = "can only insert Interval objects and NA into an IntervalIndex"
        with pytest.raises(ValueError, match=msg):
            data.insert(1, "foo")

        # invalid closed
        msg = "inserted item must be closed on the same side as the index"
        for closed in {"left", "right", "both", "neither"} - {item.closed}:
            with pytest.raises(ValueError, match=msg):
                bad_item = Interval(item.left, item.right, closed=closed)
                data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in (np.nan, pd.NaT, None):
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)

    def test_take(self, closed):
        index = self.create_index(closed=closed)

        result = index.take(range(10))
        tm.assert_index_equal(result, index)

        result = index.take([0, 0, 1])
        expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    def test_is_unique_interval(self, closed):
        """
        Interval specific tests for is_unique in addition to base class tests
        """
        # unique overlapping - distinct endpoints
        idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
        assert idx.is_unique is True

        # unique overlapping - shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_unique is True

        # unique nested
        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
        assert idx.is_unique is True

    def test_monotonic(self, closed):
        # increasing non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing non-overlapping
        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping
        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping
        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered overlapping
        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)],
                                           closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # stationary
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is False

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

    @pytest.mark.skip(reason="not a valid repr as we use interval notation")
    def test_repr(self):
        i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed="right")
        expected = ("IntervalIndex(left=[0, 1],"
                    "\n              right=[1, 2],"
                    "\n              closed='right',"
                    "\n              dtype='interval[int64]')")
        assert repr(i) == expected

        i = IntervalIndex.from_tuples(
            (Timestamp("20130101"), Timestamp("20130102")),
            (Timestamp("20130102"), Timestamp("20130103")),
            closed="right",
        )
        expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02'],"
                    "\n              right=['2013-01-02', '2013-01-03'],"
                    "\n              closed='right',"
                    "\n              dtype='interval[datetime64[ns]]')")
        assert repr(i) == expected

    @pytest.mark.skip(reason="not a valid repr as we use interval notation")
    def test_repr_max_seq_item_setting(self):
        super().test_repr_max_seq_item_setting()

    @pytest.mark.skip(reason="not a valid repr as we use interval notation")
    def test_repr_roundtrip(self):
        super().test_repr_roundtrip()

    def test_frame_repr(self):
        # https://github.com/pandas-dev/pandas/pull/24134/files
        df = pd.DataFrame({"A": [1, 2, 3, 4]},
                          index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4]))
        result = repr(df)
        expected = "        A\n(0, 1]  1\n(1, 2]  2\n(2, 3]  3\n(3, 4]  4"
        assert result == expected

    @pytest.mark.parametrize(
        "constructor,expected",
        [
            (
                pd.Series,
                ("(0.0, 1.0]    a\n"
                 "NaN           b\n"
                 "(2.0, 3.0]    c\n"
                 "dtype: object"),
            ),
            (
                pd.DataFrame,
                ("            0\n(0.0, 1.0]  a\nNaN         b\n(2.0, 3.0]  c"),
            ),
        ],
    )
    def test_repr_missing(self, constructor, expected):
        # GH 25984
        index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
        obj = constructor(list("abc"), index=index)
        result = repr(obj)
        assert result == expected

    @pytest.mark.parametrize(
        "tuples, closed, expected_data",
        [
            ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
            (
                [(0.5, 1.0), np.nan, (2.0, 3.0)],
                "right",
                ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
            ),
            (
                [
                    (Timestamp("20180101"), Timestamp("20180102")),
                    np.nan,
                    ((Timestamp("20180102"), Timestamp("20180103"))),
                ],
                "both",
                [
                    "[2018-01-01, 2018-01-02]", "NaN",
                    "[2018-01-02, 2018-01-03]"
                ],
            ),
            (
                [
                    (Timedelta("0 days"), Timedelta("1 days")),
                    (Timedelta("1 days"), Timedelta("2 days")),
                    np.nan,
                ],
                "neither",
                [
                    "(0 days 00:00:00, 1 days 00:00:00)",
                    "(1 days 00:00:00, 2 days 00:00:00)",
                    "NaN",
                ],
            ),
        ],
    )
    def test_to_native_types(self, tuples, closed, expected_data):
        # GH 28210
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.to_native_types()
        expected = np.array(expected_data)
        tm.assert_numpy_array_equal(result, expected)

    def test_get_item(self, closed):
        i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                      closed=closed)
        assert i[0] == Interval(0.0, 1.0, closed=closed)
        assert i[1] == Interval(1.0, 2.0, closed=closed)
        assert isna(i[2])

        result = i[0:1]
        expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[0:2]
        expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[1:3]
        expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
    def test_get_loc_length_one_scalar(self, scalar, closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        if scalar in index[0]:
            result = index.get_loc(scalar)
            assert result == 0
        else:
            with pytest.raises(KeyError, match=str(scalar)):
                index.get_loc(scalar)

    @pytest.mark.parametrize("other_closed",
                             ["left", "right", "both", "neither"])
    @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
    def test_get_loc_length_one_interval(self, left, right, closed,
                                         other_closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        interval = Interval(left, right, closed=other_closed)
        if interval == index[0]:
            result = index.get_loc(interval)
            assert result == 0
        else:
            with pytest.raises(
                    KeyError,
                    match=re.escape(
                        "Interval({left}, {right}, closed='{other_closed}')".
                        format(left=left,
                               right=right,
                               other_closed=other_closed)),
            ):
                index.get_loc(interval)

    # Make consistent with test_interval_new.py (see #16316, #16386)
    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_get_loc_datetimelike_nonoverlapping(self, breaks):
        # GH 20636
        # nonoverlapping = IntervalIndex method and no i8 conversion
        index = IntervalIndex.from_breaks(breaks)

        value = index[0].mid
        result = index.get_loc(value)
        expected = 0
        assert result == expected

        interval = Interval(index[0].left, index[0].right)
        result = index.get_loc(interval)
        expected = 0
        assert result == expected

    @pytest.mark.parametrize(
        "arrays",
        [
            (date_range("20180101",
                        periods=4), date_range("20180103", periods=4)),
            (
                date_range("20180101", periods=4, tz="US/Eastern"),
                date_range("20180103", periods=4, tz="US/Eastern"),
            ),
            (
                timedelta_range("0 days", periods=4),
                timedelta_range("2 days", periods=4),
            ),
        ],
        ids=lambda x: str(x[0].dtype),
    )
    def test_get_loc_datetimelike_overlapping(self, arrays):
        # GH 20636
        index = IntervalIndex.from_arrays(*arrays)

        value = index[0].mid + Timedelta("12 hours")
        result = index.get_loc(value)
        expected = slice(0, 2, None)
        assert result == expected

        interval = Interval(index[0].left, index[0].right)
        result = index.get_loc(interval)
        expected = 0
        assert result == expected

    @pytest.mark.parametrize(
        "values",
        [
            date_range("2018-01-04", periods=4, freq="-1D"),
            date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
            timedelta_range("3 days", periods=4, freq="-1D"),
            np.arange(3.0, -1.0, -1.0),
            np.arange(3, -1, -1),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_get_loc_decreasing(self, values):
        # GH 25860
        index = IntervalIndex.from_arrays(values[1:], values[:-1])
        result = index.get_loc(index[0])
        expected = 0
        assert result == expected

    @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
    def test_get_indexer_length_one(self, item, closed):
        # GH 17284
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        result = index.get_indexer(item)
        expected = np.array([0] * len(item), dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("size", [1, 5])
    def test_get_indexer_length_one_interval(self, size, closed):
        # GH 17284
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        result = index.get_indexer([Interval(0, 5, closed)] * size)
        expected = np.array([0] * size, dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_maybe_convert_i8(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        # intervalindex
        result = index._maybe_convert_i8(index)
        expected = IntervalIndex.from_breaks(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # interval
        interval = Interval(breaks[0], breaks[1])
        result = index._maybe_convert_i8(interval)
        expected = Interval(breaks[0].value, breaks[1].value)
        assert result == expected

        # datetimelike index
        result = index._maybe_convert_i8(breaks)
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # datetimelike scalar
        result = index._maybe_convert_i8(breaks[0])
        expected = breaks[0].value
        assert result == expected

        # list-like of datetimelike scalars
        result = index._maybe_convert_i8(list(breaks))
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("2018-01-01", periods=5),
            timedelta_range("0 days", periods=5)
        ],
    )
    def test_maybe_convert_i8_nat(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        to_convert = breaks._constructor([pd.NaT] * 3)
        expected = pd.Float64Index([np.nan] * 3)
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

        to_convert = to_convert.insert(0, breaks[0])
        expected = expected.insert(0, float(breaks[0].value))
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [np.arange(5, dtype="int64"),
         np.arange(5, dtype="float64")],
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_numeric(self, breaks, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)
        key = make_key(breaks)

        # no conversion occurs for numeric
        result = index._maybe_convert_i8(key)
        assert result is key

    @pytest.mark.parametrize(
        "breaks1, breaks2",
        permutations(
            [
                date_range("20180101", periods=4),
                date_range("20180101", periods=4, tz="US/Eastern"),
                timedelta_range("0 days", periods=4),
            ],
            2,
        ),
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks1)
        key = make_key(breaks2)

        msg = ("Cannot index an IntervalIndex of subtype {dtype1} with "
               "values of dtype {dtype2}")
        msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype))
        with pytest.raises(ValueError, match=msg):
            index._maybe_convert_i8(key)

    def test_contains_method(self):
        # can select values that are IN the range of a value
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        expected = np.array([False, False], dtype="bool")
        actual = i.contains(0)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(3)
        tm.assert_numpy_array_equal(actual, expected)

        expected = np.array([True, False], dtype="bool")
        actual = i.contains(0.5)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(1)
        tm.assert_numpy_array_equal(actual, expected)

        # __contains__ not implemented for "interval in interval", follow
        # that for the contains method for now
        with pytest.raises(NotImplementedError,
                           match="contains not implemented for two"):
            i.contains(Interval(0, 1))

    def test_dropna(self, closed):

        expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)],
                                             closed=closed)

        ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

        ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan],
                                       closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

    def test_non_contiguous(self, closed):
        index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
        target = [0.5, 1.5, 2.5]
        actual = index.get_indexer(target)
        expected = np.array([0, -1, 1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        assert 1.5 not in index

    def test_isin(self, closed):
        index = self.create_index(closed=closed)

        expected = np.array([True] + [False] * (len(index) - 1))
        result = index.isin(index[:1])
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin([index[0]])
        tm.assert_numpy_array_equal(result, expected)

        other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
        expected = np.array([True] * (len(index) - 1) + [False])
        result = index.isin(other)
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin(other.tolist())
        tm.assert_numpy_array_equal(result, expected)

        for other_closed in {"right", "left", "both", "neither"}:
            other = self.create_index(closed=other_closed)
            expected = np.repeat(closed == other_closed, len(index))
            result = index.isin(other)
            tm.assert_numpy_array_equal(result, expected)

            result = index.isin(other.tolist())
            tm.assert_numpy_array_equal(result, expected)

    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        with pytest.raises(TypeError, match="unorderable types"):
            self.index > 0
        with pytest.raises(TypeError, match="unorderable types"):
            self.index <= 0
        msg = r"unorderable types: Interval\(\) > int\(\)"
        with pytest.raises(TypeError, match=msg):
            self.index > np.arange(2)
        msg = "Lengths must match to compare"
        with pytest.raises(ValueError, match=msg):
            self.index > np.arange(3)

    def test_missing_values(self, closed):
        idx = Index([
            np.nan,
            Interval(0, 1, closed=closed),
            Interval(1, 2, closed=closed)
        ])
        idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2],
                                         closed=closed)
        assert idx.equals(idx2)

        msg = ("missing values must be missing in the same location both left"
               " and right sides")
        with pytest.raises(ValueError, match=msg):
            IntervalIndex.from_arrays([np.nan, 0, 1],
                                      np.array([0, 1, 2]),
                                      closed=closed)

        tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))

    def test_sort_values(self, closed):
        index = self.create_index(closed=closed)

        result = index.sort_values()
        tm.assert_index_equal(result, index)

        result = index.sort_values(ascending=False)
        tm.assert_index_equal(result, index[::-1])

        # with nan
        index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])

        result = index.sort_values()
        expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
        tm.assert_index_equal(result, expected)

        result = index.sort_values(ascending=False)
        expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_datetime(self, tz):
        start = Timestamp("2000-01-01", tz=tz)
        dates = date_range(start=start, periods=10)
        index = IntervalIndex.from_breaks(dates)

        # test mid
        start = Timestamp("2000-01-01T12:00", tz=tz)
        expected = date_range(start=start, periods=9)
        tm.assert_index_equal(index.mid, expected)

        # __contains__ doesn't check individual points
        assert Timestamp("2000-01-01", tz=tz) not in index
        assert Timestamp("2000-01-01T12", tz=tz) not in index
        assert Timestamp("2000-01-02", tz=tz) not in index
        iv_true = Interval(Timestamp("2000-01-02", tz=tz),
                           Timestamp("2000-01-03", tz=tz))
        iv_false = Interval(Timestamp("1999-12-31", tz=tz),
                            Timestamp("2000-01-01", tz=tz))
        assert iv_true in index
        assert iv_false not in index

        # .contains does check individual points
        assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-02", tz=tz)).any()

        # test get_indexer
        start = Timestamp("1999-12-31T12:00", tz=tz)
        target = date_range(start=start, periods=7, freq="12H")
        actual = index.get_indexer(target)
        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        start = Timestamp("2000-01-08T18:00", tz=tz)
        target = date_range(start=start, periods=7, freq="6H")
        actual = index.get_indexer(target)
        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

    def test_append(self, closed):

        index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
        index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)

        result = index1.append(index2)
        expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = index1.append([index1, index2])
        expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2],
                                             [1, 2, 1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        msg = ("can only append two IntervalIndex objects that are closed "
               "on the same side")
        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2],
                                                           closed=other_closed)
            with pytest.raises(ValueError, match=msg):
                index1.append(index_other_closed)

    def test_is_non_overlapping_monotonic(self, closed):
        # Should be True in all cases
        tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        # Should be False in all cases (overlapping)
        tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False in all cases (non-monotonic)
        tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False for closed='both', otherwise True (GH16560)
        if closed == "both":
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is False
        else:
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is True

    @pytest.mark.parametrize(
        "start, shift, na_value",
        [
            (0, 1, np.nan),
            (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
            (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
        ],
    )
    def test_is_overlapping(self, start, shift, na_value, closed):
        # GH 23309
        # see test_interval_tree.py for extensive tests; interface tests here

        # non-overlapping
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in (0, 2, 4)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # non-overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # overlapping
        tuples = [(start + n * shift, start + (n + 2) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # common endpoints
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        expected = closed == "both"
        assert result is expected

        # common endpoints with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        assert result is expected

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))),
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )),
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )),
        ],
    )
    def test_to_tuples(self, tuples):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples()
        expected = Index(com.asarray_tuplesafe(tuples))
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))) + [np.nan],
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )) + [np.nan],
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )) + [np.nan],
        ],
    )
    @pytest.mark.parametrize("na_tuple", [True, False])
    def test_to_tuples_na(self, tuples, na_tuple):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples(na_tuple=na_tuple)

        # check the non-NA portion
        expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
        result_notna = result[:-1]
        tm.assert_index_equal(result_notna, expected_notna)

        # check the NA portion
        result_na = result[-1]
        if na_tuple:
            assert isinstance(result_na, tuple)
            assert len(result_na) == 2
            assert all(isna(x) for x in result_na)
        else:
            assert isna(result_na)

    def test_nbytes(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")

        result = IntervalIndex.from_arrays(left, right).nbytes
        expected = 64  # 4 * 8 * 2
        assert result == expected

    def test_itemsize(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")
        expected = 16  # 8 * 2

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = IntervalIndex.from_arrays(left, right).itemsize

        assert result == expected

    @pytest.mark.parametrize("new_closed",
                             ["left", "right", "both", "neither"])
    def test_set_closed(self, name, closed, new_closed):
        # GH 21670
        index = interval_range(0, 5, closed=closed, name=name)
        result = index.set_closed(new_closed)
        expected = interval_range(0, 5, closed=new_closed, name=name)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
    def test_set_closed_errors(self, bad_closed):
        # GH 21670
        index = interval_range(0, 5)
        msg = "invalid option for 'closed': {closed}".format(closed=bad_closed)
        with pytest.raises(ValueError, match=msg):
            index.set_closed(bad_closed)

    def test_is_all_dates(self):
        # GH 23576
        year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"),
                                pd.Timestamp("2018-01-01 00:00:00"))
        year_2017_index = pd.IntervalIndex([year_2017])
        assert not year_2017_index.is_all_dates
Exemplo n.º 9
0
class TestClassConstructors(ConstructorTests):
    """Tests specific to the IntervalIndex/Index constructors"""

    @pytest.fixture(
        params=[IntervalIndex, partial(Index, dtype="interval")],
        ids=["IntervalIndex", "Index"],
    )
    def constructor(self, request):
        return request.param

    def get_kwargs_from_breaks(self, breaks, closed="right"):
        """
        converts intervals in breaks format to a dictionary of kwargs to
        specific to the format expected by the IntervalIndex/Index constructors
        """
        if len(breaks) == 0:
            return {"data": breaks}

        ivs = [
            Interval(left, right, closed) if notna(left) else left
            for left, right in zip(breaks[:-1], breaks[1:])
        ]

        if isinstance(breaks, list):
            return {"data": ivs}
        elif is_categorical_dtype(breaks):
            return {"data": breaks._constructor(ivs)}
        return {"data": np.array(ivs, dtype=object)}

    def test_generic_errors(self, constructor):
        """
        override the base class implementation since errors are handled
        differently; checks unnecessary since caught at the Interval level
        """
        pass

    def test_constructor_string(self):
        # GH23013
        # When forming the interval from breaks,
        # the interval of strings is already forbidden.
        pass

    def test_constructor_errors(self, constructor):
        # mismatched closed within intervals with no constructor override
        ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
        msg = "intervals must all be closed on the same side"
        with pytest.raises(ValueError, match=msg):
            constructor(ivs)

        # scalar
        msg = (
            r"IntervalIndex\(...\) must be called with a collection of "
            "some kind, 5 was passed"
        )
        with pytest.raises(TypeError, match=msg):
            constructor(5)

        # not an interval; dtype depends on 32bit/windows builds
        msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
        with pytest.raises(TypeError, match=msg):
            constructor([0, 1])

    @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning")
    @pytest.mark.parametrize(
        "data, closed",
        [
            ([], "both"),
            ([np.nan, np.nan], "neither"),
            (
                [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
                "left",
            ),
            (
                [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
                "neither",
            ),
            (IntervalIndex.from_breaks(range(5), closed="both"), "right"),
        ],
    )
    def test_override_inferred_closed(self, constructor, data, closed):
        # GH 19370
        if isinstance(data, IntervalIndex):
            tuples = data.to_tuples()
        else:
            tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
        expected = IntervalIndex.from_tuples(tuples, closed=closed)
        result = constructor(data, closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "values_constructor", [list, np.array, IntervalIndex, IntervalArray]
    )
    def test_index_object_dtype(self, values_constructor):
        # Index(intervals, dtype=object) is an Index (not an IntervalIndex)
        intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
        values = values_constructor(intervals)
        result = Index(values, dtype=object)

        assert type(result) is Index
        tm.assert_numpy_array_equal(result.values, np.array(values))

    def test_index_mixed_closed(self):
        # GH27172
        intervals = [
            Interval(0, 1, closed="left"),
            Interval(1, 2, closed="right"),
            Interval(2, 3, closed="neither"),
            Interval(3, 4, closed="both"),
        ]
        result = Index(intervals)
        expected = Index(intervals, dtype=object)
        tm.assert_index_equal(result, expected)
Exemplo n.º 10
0
    def test_single_quantile(self):
        # issue 15431
        expected = Series([0, 0])

        s = Series([9., 9.])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex(
            [Interval(8.999, 9.0), Interval(8.999, 9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([-9., -9.])
        expected = Series([0, 0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex(
            [Interval(-9.001, -9.0),
             Interval(-9.001, -9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([0., 0.])
        expected = Series([0, 0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex(
            [Interval(-0.001, 0.0),
             Interval(-0.001, 0.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([9])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([-9])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([0])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)
Exemplo n.º 11
0
 def test_is_scalar_pandas_scalars(self):
     assert is_scalar(Timestamp('2014-01-01'))
     assert is_scalar(Timedelta(hours=1))
     assert is_scalar(Period('2014-01-01'))
     assert is_scalar(Interval(left=0, right=1))
     assert is_scalar(DateOffset(days=1))
Exemplo n.º 12
0
class TestIntervalIndex:
    index = IntervalIndex.from_arrays([0, 1], [1, 2])

    def create_index(self, closed="right"):
        return IntervalIndex.from_breaks(range(11), closed=closed)

    def create_index_with_nan(self, closed="right"):
        mask = [True, False] + [True] * 8
        return IntervalIndex.from_arrays(
            np.where(mask, np.arange(10), np.nan),
            np.where(mask, np.arange(1, 11), np.nan),
            closed=closed,
        )

    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [
            Interval(left, right, closed)
            for left, right in zip(range(10), range(1, 11))
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [
            Interval(left, right, closed) if notna(left) else np.nan
            for left, right in zip(expected_left, expected_right)
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
            [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
            pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
            pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
        ],
    )
    def test_length(self, closed, breaks):
        # GH 18789
        index = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.length
        expected = Index(iv.length for iv in index)
        tm.assert_index_equal(result, expected)

        # with NA
        index = index.insert(1, np.nan)
        result = index.length
        expected = Index(iv.length if notna(iv) else iv for iv in index)
        tm.assert_index_equal(result, expected)

    def test_with_nans(self, closed):
        index = self.create_index(closed=closed)
        assert index.hasnans is False

        result = index.isna()
        expected = np.zeros(len(index), dtype=bool)
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.ones(len(index), dtype=bool)
        tm.assert_numpy_array_equal(result, expected)

        index = self.create_index_with_nan(closed=closed)
        assert index.hasnans is True

        result = index.isna()
        expected = np.array([False, True] + [False] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.array([True, False] + [True] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

    def test_copy(self, closed):
        expected = self.create_index(closed=closed)

        result = expected.copy()
        assert result.equals(expected)

        result = expected.copy(deep=True)
        assert result.equals(expected)
        assert result.left is not expected.left

    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="same")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="same")

        # by-definition make a copy
        result = IntervalIndex(np.array(index), copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="copy")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="copy")

    def test_delete(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
        result = self.create_index(closed=closed).delete(0)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "data",
        [
            interval_range(0, periods=10, closed="neither"),
            interval_range(1.7, periods=8, freq=2.5, closed="both"),
            interval_range(Timestamp("20170101"), periods=12, closed="left"),
            interval_range(Timedelta("1 day"), periods=6, closed="right"),
        ],
    )
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        res = data.insert(1, "foo")
        expected = data.astype(object).insert(1, "foo")
        tm.assert_index_equal(res, expected)

        msg = "can only insert Interval objects and NA into an IntervalArray"
        with pytest.raises(TypeError, match=msg):
            data._data.insert(1, "foo")

        # invalid closed
        msg = "'value.closed' is 'left', expected 'right'."
        for closed in {"left", "right", "both", "neither"} - {item.closed}:
            msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
            bad_item = Interval(item.left, item.right, closed=closed)
            res = data.insert(1, bad_item)
            expected = data.astype(object).insert(1, bad_item)
            tm.assert_index_equal(res, expected)
            with pytest.raises(ValueError, match=msg):
                data._data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in [np.nan, None, pd.NA]:
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)

        if data.left.dtype.kind not in ["m", "M"]:
            # trying to insert pd.NaT into a numeric-dtyped Index should cast
            expected = data.astype(object).insert(1, pd.NaT)

            msg = "can only insert Interval objects and NA into an IntervalArray"
            with pytest.raises(TypeError, match=msg):
                data._data.insert(1, pd.NaT)

        result = data.insert(1, pd.NaT)
        tm.assert_index_equal(result, expected)

    def test_is_unique_interval(self, closed):
        """
        Interval specific tests for is_unique in addition to base class tests
        """
        # unique overlapping - distinct endpoints
        idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
        assert idx.is_unique is True

        # unique overlapping - shared endpoints
        idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                        closed=closed)
        assert idx.is_unique is True

        # unique nested
        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
        assert idx.is_unique is True

        # unique NaN
        idx = IntervalIndex.from_tuples([(np.NaN, np.NaN)], closed=closed)
        assert idx.is_unique is True

        # non-unique NaN
        idx = IntervalIndex.from_tuples([(np.NaN, np.NaN), (np.NaN, np.NaN)],
                                        closed=closed)
        assert idx.is_unique is False

    def test_monotonic(self, closed):
        # increasing non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing non-overlapping
        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping
        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping
        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered overlapping
        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping shared endpoints
        idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping shared endpoints
        idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # stationary
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is False

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

    def test_is_monotonic_with_nans(self):
        # GH#41831
        index = IntervalIndex([np.nan, np.nan])

        assert not index.is_monotonic
        assert not index._is_strictly_monotonic_increasing
        assert not index.is_monotonic_increasing
        assert not index._is_strictly_monotonic_decreasing
        assert not index.is_monotonic_decreasing

    def test_get_item(self, closed):
        i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                      closed=closed)
        assert i[0] == Interval(0.0, 1.0, closed=closed)
        assert i[1] == Interval(1.0, 2.0, closed=closed)
        assert isna(i[2])

        result = i[0:1]
        expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[0:2]
        expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[1:3]
        expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_maybe_convert_i8(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        # intervalindex
        result = index._maybe_convert_i8(index)
        expected = IntervalIndex.from_breaks(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # interval
        interval = Interval(breaks[0], breaks[1])
        result = index._maybe_convert_i8(interval)
        expected = Interval(breaks[0].value, breaks[1].value)
        assert result == expected

        # datetimelike index
        result = index._maybe_convert_i8(breaks)
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # datetimelike scalar
        result = index._maybe_convert_i8(breaks[0])
        expected = breaks[0].value
        assert result == expected

        # list-like of datetimelike scalars
        result = index._maybe_convert_i8(list(breaks))
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("2018-01-01", periods=5),
            timedelta_range("0 days", periods=5)
        ],
    )
    def test_maybe_convert_i8_nat(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        to_convert = breaks._constructor([pd.NaT] * 3)
        expected = pd.Float64Index([np.nan] * 3)
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

        to_convert = to_convert.insert(0, breaks[0])
        expected = expected.insert(0, float(breaks[0].value))
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [np.arange(5, dtype="int64"),
         np.arange(5, dtype="float64")],
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_numeric(self, breaks, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)
        key = make_key(breaks)

        # no conversion occurs for numeric
        result = index._maybe_convert_i8(key)
        assert result is key

    @pytest.mark.parametrize(
        "breaks1, breaks2",
        permutations(
            [
                date_range("20180101", periods=4),
                date_range("20180101", periods=4, tz="US/Eastern"),
                timedelta_range("0 days", periods=4),
            ],
            2,
        ),
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks1)
        key = make_key(breaks2)

        msg = (
            f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
            f"values of dtype {breaks2.dtype}")
        msg = re.escape(msg)
        with pytest.raises(ValueError, match=msg):
            index._maybe_convert_i8(key)

    def test_contains_method(self):
        # can select values that are IN the range of a value
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        expected = np.array([False, False], dtype="bool")
        actual = i.contains(0)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(3)
        tm.assert_numpy_array_equal(actual, expected)

        expected = np.array([True, False], dtype="bool")
        actual = i.contains(0.5)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(1)
        tm.assert_numpy_array_equal(actual, expected)

        # __contains__ not implemented for "interval in interval", follow
        # that for the contains method for now
        with pytest.raises(NotImplementedError,
                           match="contains not implemented for two"):
            i.contains(Interval(0, 1))

    def test_contains_dunder(self):

        index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")

        # __contains__ requires perfect matches to intervals.
        assert 0 not in index
        assert 1 not in index
        assert 2 not in index

        assert Interval(0, 1, closed="right") in index
        assert Interval(0, 2, closed="right") not in index
        assert Interval(0, 0.5, closed="right") not in index
        assert Interval(3, 5, closed="right") not in index
        assert Interval(-1, 0, closed="left") not in index
        assert Interval(0, 1, closed="left") not in index
        assert Interval(0, 1, closed="both") not in index

    def test_dropna(self, closed):

        expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)],
                                             closed=closed)

        ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

        ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan],
                                       closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

    def test_non_contiguous(self, closed):
        index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
        target = [0.5, 1.5, 2.5]
        actual = index.get_indexer(target)
        expected = np.array([0, -1, 1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        assert 1.5 not in index

    def test_isin(self, closed):
        index = self.create_index(closed=closed)

        expected = np.array([True] + [False] * (len(index) - 1))
        result = index.isin(index[:1])
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin([index[0]])
        tm.assert_numpy_array_equal(result, expected)

        other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
        expected = np.array([True] * (len(index) - 1) + [False])
        result = index.isin(other)
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin(other.tolist())
        tm.assert_numpy_array_equal(result, expected)

        for other_closed in {"right", "left", "both", "neither"}:
            other = self.create_index(closed=other_closed)
            expected = np.repeat(closed == other_closed, len(index))
            result = index.isin(other)
            tm.assert_numpy_array_equal(result, expected)

            result = index.isin(other.tolist())
            tm.assert_numpy_array_equal(result, expected)

    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        msg = "|".join([
            "not supported between instances of 'int' and '.*.Interval'",
            r"Invalid comparison between dtype=interval\[int64, right\] and ",
        ])
        with pytest.raises(TypeError, match=msg):
            self.index > 0
        with pytest.raises(TypeError, match=msg):
            self.index <= 0
        with pytest.raises(TypeError, match=msg):
            self.index > np.arange(2)

        msg = "Lengths must match to compare"
        with pytest.raises(ValueError, match=msg):
            self.index > np.arange(3)

    def test_missing_values(self, closed):
        idx = Index([
            np.nan,
            Interval(0, 1, closed=closed),
            Interval(1, 2, closed=closed)
        ])
        idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2],
                                         closed=closed)
        assert idx.equals(idx2)

        msg = ("missing values must be missing in the same location both left "
               "and right sides")
        with pytest.raises(ValueError, match=msg):
            IntervalIndex.from_arrays([np.nan, 0, 1],
                                      np.array([0, 1, 2]),
                                      closed=closed)

        tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))

    def test_sort_values(self, closed):
        index = self.create_index(closed=closed)

        result = index.sort_values()
        tm.assert_index_equal(result, index)

        result = index.sort_values(ascending=False)
        tm.assert_index_equal(result, index[::-1])

        # with nan
        index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])

        result = index.sort_values()
        expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
        tm.assert_index_equal(result, expected)

        result = index.sort_values(ascending=False, na_position="first")
        expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_datetime(self, tz):
        start = Timestamp("2000-01-01", tz=tz)
        dates = date_range(start=start, periods=10)
        index = IntervalIndex.from_breaks(dates)

        # test mid
        start = Timestamp("2000-01-01T12:00", tz=tz)
        expected = date_range(start=start, periods=9)
        tm.assert_index_equal(index.mid, expected)

        # __contains__ doesn't check individual points
        assert Timestamp("2000-01-01", tz=tz) not in index
        assert Timestamp("2000-01-01T12", tz=tz) not in index
        assert Timestamp("2000-01-02", tz=tz) not in index
        iv_true = Interval(Timestamp("2000-01-02", tz=tz),
                           Timestamp("2000-01-03", tz=tz))
        iv_false = Interval(Timestamp("1999-12-31", tz=tz),
                            Timestamp("2000-01-01", tz=tz))
        assert iv_true in index
        assert iv_false not in index

        # .contains does check individual points
        assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-02", tz=tz)).any()

        # test get_indexer
        start = Timestamp("1999-12-31T12:00", tz=tz)
        target = date_range(start=start, periods=7, freq="12H")
        actual = index.get_indexer(target)
        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        start = Timestamp("2000-01-08T18:00", tz=tz)
        target = date_range(start=start, periods=7, freq="6H")
        actual = index.get_indexer(target)
        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

    def test_append(self, closed):

        index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
        index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)

        result = index1.append(index2)
        expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = index1.append([index1, index2])
        expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2],
                                             [1, 2, 1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2],
                                                           closed=other_closed)
            result = index1.append(index_other_closed)
            expected = index1.astype(object).append(
                index_other_closed.astype(object))
            tm.assert_index_equal(result, expected)

    def test_is_non_overlapping_monotonic(self, closed):
        # Should be True in all cases
        tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        # Should be False in all cases (overlapping)
        tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False in all cases (non-monotonic)
        tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False for closed='both', otherwise True (GH16560)
        if closed == "both":
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is False
        else:
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is True

    @pytest.mark.parametrize(
        "start, shift, na_value",
        [
            (0, 1, np.nan),
            (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
            (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
        ],
    )
    def test_is_overlapping(self, start, shift, na_value, closed):
        # GH 23309
        # see test_interval_tree.py for extensive tests; interface tests here

        # non-overlapping
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in (0, 2, 4)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # non-overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # overlapping
        tuples = [(start + n * shift, start + (n + 2) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # common endpoints
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        expected = closed == "both"
        assert result is expected

        # common endpoints with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        assert result is expected

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))),
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )),
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )),
        ],
    )
    def test_to_tuples(self, tuples):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples()
        expected = Index(com.asarray_tuplesafe(tuples))
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))) + [np.nan],
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )) + [np.nan],
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )) + [np.nan],
        ],
    )
    @pytest.mark.parametrize("na_tuple", [True, False])
    def test_to_tuples_na(self, tuples, na_tuple):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples(na_tuple=na_tuple)

        # check the non-NA portion
        expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
        result_notna = result[:-1]
        tm.assert_index_equal(result_notna, expected_notna)

        # check the NA portion
        result_na = result[-1]
        if na_tuple:
            assert isinstance(result_na, tuple)
            assert len(result_na) == 2
            assert all(isna(x) for x in result_na)
        else:
            assert isna(result_na)

    def test_nbytes(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")

        result = IntervalIndex.from_arrays(left, right).nbytes
        expected = 64  # 4 * 8 * 2
        assert result == expected

    @pytest.mark.parametrize("new_closed",
                             ["left", "right", "both", "neither"])
    def test_set_closed(self, name, closed, new_closed):
        # GH 21670
        index = interval_range(0, 5, closed=closed, name=name)
        result = index.set_closed(new_closed)
        expected = interval_range(0, 5, closed=new_closed, name=name)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
    def test_set_closed_errors(self, bad_closed):
        # GH 21670
        index = interval_range(0, 5)
        msg = f"invalid option for 'closed': {bad_closed}"
        with pytest.raises(ValueError, match=msg):
            index.set_closed(bad_closed)

    def test_is_all_dates(self):
        # GH 23576
        year_2017 = Interval(Timestamp("2017-01-01 00:00:00"),
                             Timestamp("2018-01-01 00:00:00"))
        year_2017_index = IntervalIndex([year_2017])
        assert not year_2017_index._is_all_dates

    @pytest.mark.parametrize("key", [[5], (2, 3)])
    def test_get_value_non_scalar_errors(self, key):
        # GH 31117
        idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10),
                                         (3, 10)])
        s = pd.Series(range(len(idx)), index=idx)

        msg = str(key)
        with pytest.raises(InvalidIndexError, match=msg):
            with tm.assert_produces_warning(FutureWarning):
                idx.get_value(s, key)

    @pytest.mark.parametrize("closed", ["left", "right", "both"])
    def test_pickle_round_trip_closed(self, closed):
        # https://github.com/pandas-dev/pandas/issues/35658
        idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
        result = tm.round_trip_pickle(idx)
        tm.assert_index_equal(result, idx)
Exemplo n.º 13
0
class TestInterval:
    def test_properties(self, interval):
        assert interval.closed == "right"
        assert interval.left == 0
        assert interval.right == 1
        assert interval.mid == 0.5

    def test_repr(self, interval):
        assert repr(interval) == "Interval(0, 1, closed='right')"
        assert str(interval) == "(0, 1]"

        interval_left = Interval(0, 1, closed="left")
        assert repr(interval_left) == "Interval(0, 1, closed='left')"
        assert str(interval_left) == "[0, 1)"

    def test_contains(self, interval):
        assert 0.5 in interval
        assert 1 in interval
        assert 0 not in interval

        msg = "__contains__ not defined for two intervals"
        with pytest.raises(TypeError, match=msg):
            interval in interval

        interval_both = Interval(0, 1, closed="both")
        assert 0 in interval_both
        assert 1 in interval_both

        interval_neither = Interval(0, 1, closed="neither")
        assert 0 not in interval_neither
        assert 0.5 in interval_neither
        assert 1 not in interval_neither

    def test_equal(self):
        assert Interval(0, 1) == Interval(0, 1, closed="right")
        assert Interval(0, 1) != Interval(0, 1, closed="left")
        assert Interval(0, 1) != 0

    def test_comparison(self):
        msg = ("'<' not supported between instances of "
               "'pandas._libs.interval.Interval' and 'int'")
        with pytest.raises(TypeError, match=msg):
            Interval(0, 1) < 2

        assert Interval(0, 1) < Interval(1, 2)
        assert Interval(0, 1) < Interval(0, 2)
        assert Interval(0, 1) < Interval(0.5, 1.5)
        assert Interval(0, 1) <= Interval(0, 1)
        assert Interval(0, 1) > Interval(-1, 2)
        assert Interval(0, 1) >= Interval(0, 1)

    def test_hash(self, interval):
        # should not raise
        hash(interval)

    @pytest.mark.parametrize(
        "left, right, expected",
        [
            (0, 5, 5),
            (-2, 5.5, 7.5),
            (10, 10, 0),
            (10, np.inf, np.inf),
            (-np.inf, -5, np.inf),
            (-np.inf, np.inf, np.inf),
            (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")),
            (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")),
            (Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")),
            (Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")),
        ],
    )
    def test_length(self, left, right, expected):
        # GH 18789
        iv = Interval(left, right)
        result = iv.length
        assert result == expected

    @pytest.mark.parametrize(
        "left, right, expected",
        [
            ("2017-01-01", "2017-01-06", "5 days"),
            ("2017-01-01", "2017-01-01 12:00:00", "12 hours"),
            ("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"),
            ("2017-01-01 12:01", "2017-01-05 17:31:00",
             "4 days 5 hours 30 min"),
        ],
    )
    @pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern"))
    def test_length_timestamp(self, tz, left, right, expected):
        # GH 18789
        iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz))
        result = iv.length
        expected = Timedelta(expected)
        assert result == expected

    @pytest.mark.parametrize(
        "left, right",
        [
            (0, 1),
            (Timedelta("0 days"), Timedelta("1 day")),
            (Timestamp("2018-01-01"), Timestamp("2018-01-02")),
            (
                Timestamp("2018-01-01", tz="US/Eastern"),
                Timestamp("2018-01-02", tz="US/Eastern"),
            ),
        ],
    )
    def test_is_empty(self, left, right, closed):
        # GH27219
        # non-empty always return False
        iv = Interval(left, right, closed)
        assert iv.is_empty is False

        # same endpoint is empty except when closed='both' (contains one point)
        iv = Interval(left, left, closed)
        result = iv.is_empty
        expected = closed != "both"
        assert result is expected

    @pytest.mark.parametrize(
        "left, right",
        [
            ("a", "z"),
            (("a", "b"), ("c", "d")),
            (list("AB"), list("ab")),
            (Interval(0, 1), Interval(1, 2)),
            (Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")),
        ],
    )
    def test_construct_errors(self, left, right):
        # GH 23013
        msg = "Only numeric, Timestamp and Timedelta endpoints are allowed"
        with pytest.raises(ValueError, match=msg):
            Interval(left, right)

    def test_math_add(self, closed):
        interval = Interval(0, 1, closed=closed)
        expected = Interval(1, 2, closed=closed)

        result = interval + 1
        assert result == expected

        result = 1 + interval
        assert result == expected

        result = interval
        result += 1
        assert result == expected

        msg = r"unsupported operand type\(s\) for \+"
        with pytest.raises(TypeError, match=msg):
            interval + interval

        with pytest.raises(TypeError, match=msg):
            interval + "foo"

    def test_math_sub(self, closed):
        interval = Interval(0, 1, closed=closed)
        expected = Interval(-1, 0, closed=closed)

        result = interval - 1
        assert result == expected

        result = interval
        result -= 1
        assert result == expected

        msg = r"unsupported operand type\(s\) for -"
        with pytest.raises(TypeError, match=msg):
            interval - interval

        with pytest.raises(TypeError, match=msg):
            interval - "foo"

    def test_math_mult(self, closed):
        interval = Interval(0, 1, closed=closed)
        expected = Interval(0, 2, closed=closed)

        result = interval * 2
        assert result == expected

        result = 2 * interval
        assert result == expected

        result = interval
        result *= 2
        assert result == expected

        msg = r"unsupported operand type\(s\) for \*"
        with pytest.raises(TypeError, match=msg):
            interval * interval

        msg = r"can\'t multiply sequence by non-int"
        with pytest.raises(TypeError, match=msg):
            interval * "foo"

    def test_math_div(self, closed):
        interval = Interval(0, 1, closed=closed)
        expected = Interval(0, 0.5, closed=closed)

        result = interval / 2.0
        assert result == expected

        result = interval
        result /= 2.0
        assert result == expected

        msg = r"unsupported operand type\(s\) for /"
        with pytest.raises(TypeError, match=msg):
            interval / interval

        with pytest.raises(TypeError, match=msg):
            interval / "foo"

    def test_math_floordiv(self, closed):
        interval = Interval(1, 2, closed=closed)
        expected = Interval(0, 1, closed=closed)

        result = interval // 2
        assert result == expected

        result = interval
        result //= 2
        assert result == expected

        msg = r"unsupported operand type\(s\) for //"
        with pytest.raises(TypeError, match=msg):
            interval // interval

        with pytest.raises(TypeError, match=msg):
            interval // "foo"

    def test_constructor_errors(self):
        msg = "invalid option for 'closed': foo"
        with pytest.raises(ValueError, match=msg):
            Interval(0, 1, closed="foo")

        msg = "left side of interval must be <= right side"
        with pytest.raises(ValueError, match=msg):
            Interval(1, 0)

    @pytest.mark.parametrize("tz_left, tz_right", [(None, "UTC"),
                                                   ("UTC", None),
                                                   ("UTC", "US/Eastern")])
    def test_constructor_errors_tz(self, tz_left, tz_right):
        # GH 18538
        left = Timestamp("2017-01-01", tz=tz_left)
        right = Timestamp("2017-01-02", tz=tz_right)

        if com.any_none(tz_left, tz_right):
            error = TypeError
            msg = "Cannot compare tz-naive and tz-aware timestamps"
        else:
            error = ValueError
            msg = "left and right must have the same time zone"
        with pytest.raises(error, match=msg):
            Interval(left, right)

    def test_equality_comparison_broadcasts_over_array(self):
        # https://github.com/pandas-dev/pandas/issues/35931
        interval = Interval(0, 1)
        arr = np.array([interval, interval])
        result = interval == arr
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(result, expected)
Exemplo n.º 14
0
def make_data():
    N = 100
    left = np.random.uniform(size=N).cumsum()
    right = left + np.random.uniform(size=N)
    return [Interval(l, r) for l, r in zip(left, right)]
Exemplo n.º 15
0
def interval():
    return Interval(0, 1)
Exemplo n.º 16
0
 def test_length(self, left, right, expected):
     # GH 18789
     iv = Interval(left, right)
     result = iv.length
     assert result == expected
Exemplo n.º 17
0
    def test_comparison(self):
        msg = ("'<' not supported between instances of "
               "'pandas._libs.interval.Interval' and 'int'")
        with pytest.raises(TypeError, match=msg):
            Interval(0, 1) < 2

        assert Interval(0, 1) < Interval(1, 2)
        assert Interval(0, 1) < Interval(0, 2)
        assert Interval(0, 1) < Interval(0.5, 1.5)
        assert Interval(0, 1) <= Interval(0, 1)
        assert Interval(0, 1) > Interval(-1, 2)
        assert Interval(0, 1) >= Interval(0, 1)
Exemplo n.º 18
0
 def test_equal(self):
     assert Interval(0, 1) == Interval(0, 1, closed="right")
     assert Interval(0, 1) != Interval(0, 1, closed="left")
     assert Interval(0, 1) != 0
Exemplo n.º 19
0
 def test_qcut_index(self):
     result = qcut([0, 2], 2)
     intervals = [Interval(-0.001, 1), Interval(1, 2)]
     expected = Categorical(intervals, ordered=True)
     tm.assert_categorical_equal(result, expected)
Exemplo n.º 20
0
 def test_is_all_dates(self):
     # GH 23576
     year_2017 = Interval(Timestamp("2017-01-01 00:00:00"),
                          Timestamp("2018-01-01 00:00:00"))
     year_2017_index = IntervalIndex([year_2017])
     assert not year_2017_index._is_all_dates
Exemplo n.º 21
0
    def test_value_counts_bins(self):
        klasses = [Index, Series]
        for klass in klasses:
            s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
            s = klass(s_values)

            # bins
            pytest.raises(TypeError, lambda bins: s.value_counts(bins=bins), 1)

            s1 = Series([1, 1, 2, 3])
            res1 = s1.value_counts(bins=1)
            exp1 = Series({Interval(0.997, 3.0): 4})
            tm.assert_series_equal(res1, exp1)
            res1n = s1.value_counts(bins=1, normalize=True)
            exp1n = Series({Interval(0.997, 3.0): 1.0})
            tm.assert_series_equal(res1n, exp1n)

            if isinstance(s1, Index):
                tm.assert_index_equal(s1.unique(), Index([1, 2, 3]))
            else:
                exp = np.array([1, 2, 3], dtype=np.int64)
                tm.assert_numpy_array_equal(s1.unique(), exp)

            assert s1.nunique() == 3

            # these return the same
            res4 = s1.value_counts(bins=4, dropna=True)
            intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
            exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
            tm.assert_series_equal(res4, exp4)

            res4 = s1.value_counts(bins=4, dropna=False)
            intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
            exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
            tm.assert_series_equal(res4, exp4)

            res4n = s1.value_counts(bins=4, normalize=True)
            exp4n = Series([0.5, 0.25, 0.25, 0],
                           index=intervals.take([0, 3, 1, 2]))
            tm.assert_series_equal(res4n, exp4n)

            # handle NA's properly
            s_values = [
                'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'
            ]
            s = klass(s_values)
            expected = Series([4, 3, 2], index=['b', 'a', 'd'])
            tm.assert_series_equal(s.value_counts(), expected)

            if isinstance(s, Index):
                exp = Index(['a', 'b', np.nan, 'd'])
                tm.assert_index_equal(s.unique(), exp)
            else:
                exp = np.array(['a', 'b', np.nan, 'd'], dtype=object)
                tm.assert_numpy_array_equal(s.unique(), exp)
            assert s.nunique() == 3

            s = klass({})
            expected = Series([], dtype=np.int64)
            tm.assert_series_equal(s.value_counts(),
                                   expected,
                                   check_index_type=False)
            # returned dtype differs depending on original
            if isinstance(s, Index):
                tm.assert_index_equal(s.unique(), Index([]), exact=False)
            else:
                tm.assert_numpy_array_equal(s.unique(),
                                            np.array([]),
                                            check_dtype=False)

            assert s.nunique() == 0
Exemplo n.º 22
0
def test_searchsorted_invalid_argument(arg):
    values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
    msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
    with pytest.raises(TypeError, match=msg):
        values.searchsorted(arg)
Exemplo n.º 23
0
 def test_intervals(self):
     # 19967
     for i in [Interval(0, 1), Interval(0, 1, "left"), Interval(10, 25.0, "right")]:
         i_rec = self.encode_decode(i)
         assert i == i_rec
Exemplo n.º 24
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get('typ')
    if typ is None:
        return obj
    elif typ == 'timestamp':
        freq = obj['freq'] if 'freq' in obj else obj['offset']
        return Timestamp(obj['value'], tz=obj['tz'], freq=freq)
    elif typ == 'nat':
        return NaT
    elif typ == 'period':
        return Period(ordinal=obj['ordinal'], freq=obj['freq'])
    elif typ == 'index':
        dtype = dtype_for(obj['dtype'])
        data = unconvert(obj['data'], dtype, obj.get('compress'))
        return Index(data, dtype=dtype, name=obj['name'])
    elif typ == 'range_index':
        return RangeIndex(obj['start'],
                          obj['stop'],
                          obj['step'],
                          name=obj['name'])
    elif typ == 'multi_index':
        dtype = dtype_for(obj['dtype'])
        data = unconvert(obj['data'], dtype, obj.get('compress'))
        data = [tuple(x) for x in data]
        return MultiIndex.from_tuples(data, names=obj['names'])
    elif typ == 'period_index':
        data = unconvert(obj['data'], np.int64, obj.get('compress'))
        d = dict(name=obj['name'], freq=obj['freq'])
        freq = d.pop('freq', None)
        return PeriodIndex(PeriodArray(data, freq), **d)

    elif typ == 'datetime_index':
        data = unconvert(obj['data'], np.int64, obj.get('compress'))
        d = dict(name=obj['name'], freq=obj['freq'])
        result = DatetimeIndex(data, **d)
        tz = obj['tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ in ('interval_index', 'interval_array'):
        return globals()[obj['klass']].from_arrays(obj['left'],
                                                   obj['right'],
                                                   obj['closed'],
                                                   name=obj['name'])
    elif typ == 'category':
        from_codes = globals()[obj['klass']].from_codes
        return from_codes(codes=obj['codes'],
                          categories=obj['categories'],
                          ordered=obj['ordered'])

    elif typ == 'interval':
        return Interval(obj['left'], obj['right'], obj['closed'])
    elif typ == 'series':
        dtype = dtype_for(obj['dtype'])
        pd_dtype = pandas_dtype(dtype)

        index = obj['index']
        result = Series(unconvert(obj['data'], dtype, obj['compress']),
                        index=index,
                        dtype=pd_dtype,
                        name=obj['name'])
        return result

    elif typ == 'block_manager':
        axes = obj['axes']

        def create_block(b):
            values = _safe_reshape(
                unconvert(b['values'], dtype_for(b['dtype']), b['compress']),
                b['shape'])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if 'locs' in b:
                placement = b['locs']
            else:
                placement = axes[0].get_indexer(b['items'])

            if is_datetime64tz_dtype(b['dtype']):
                assert isinstance(values, np.ndarray), type(values)
                assert values.dtype == 'M8[ns]', values.dtype
                values = DatetimeArray(values, dtype=b['dtype'])

            return make_block(values=values,
                              klass=getattr(internals, b['klass']),
                              placement=placement,
                              dtype=b['dtype'])

        blocks = [create_block(b) for b in obj['blocks']]
        return globals()[obj['klass']](BlockManager(blocks, axes))
    elif typ == 'datetime':
        return parse(obj['data'])
    elif typ == 'datetime64':
        return np.datetime64(parse(obj['data']))
    elif typ == 'date':
        return parse(obj['data']).date()
    elif typ == 'timedelta':
        return timedelta(*obj['data'])
    elif typ == 'timedelta64':
        return np.timedelta64(int(obj['data']))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return SparseSeries(
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return SparseDataFrame(
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return SparsePanel(
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == 'block_index':
        return globals()[obj['klass']](obj['length'], obj['blocs'],
                                       obj['blengths'])
    elif typ == 'int_index':
        return globals()[obj['klass']](obj['length'], obj['indices'])
    elif typ == 'ndarray':
        return unconvert(obj['data'], np.typeDict[obj['dtype']],
                         obj.get('compress')).reshape(obj['shape'])
    elif typ == 'np_scalar':
        if obj.get('sub_typ') == 'np_complex':
            return c2f(obj['real'], obj['imag'], obj['dtype'])
        else:
            dtype = dtype_for(obj['dtype'])
            try:
                return dtype(obj['data'])
            except (ValueError, TypeError):
                return dtype.type(obj['data'])
    elif typ == 'np_complex':
        return complex(obj['real'] + '+' + obj['imag'] + 'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj
Exemplo n.º 25
0
 def test_get_indexer_length_one_interval(self, size, closed):
     # GH 17284
     index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
     result = index.get_indexer([Interval(0, 5, closed)] * size)
     expected = np.array([0] * size, dtype="intp")
     tm.assert_numpy_array_equal(result, expected)
Exemplo n.º 26
0
        datetime(2000, 1, 3),
        datetime(2000, 1, 3),
        datetime(2000, 1, 4),
        datetime(2000, 1, 4),
        datetime(2000, 1, 4),
        datetime(2000, 1, 5),
    ]

    return Series(np.random.randn(len(dates)), index=dates)


# ----------------------------------------------------------------
# Scalars
# ----------------------------------------------------------------
@pytest.fixture(params=[
    (Interval(left=0, right=5), IntervalDtype("int64", "right")),
    (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")),
    (Period("2012-01", freq="M"), "period[M]"),
    (Period("2012-02-01", freq="D"), "period[D]"),
    (
        Timestamp("2011-01-01", tz="US/Eastern"),
        DatetimeTZDtype(tz="US/Eastern"),
    ),
    (Timedelta(seconds=500), "timedelta64[ns]"),
])
def ea_scalar_and_dtype(request):
    return request.param


# ----------------------------------------------------------------
# Operators & Operations
Exemplo n.º 27
0
class TestClassConstructors(Base):
    """Tests specific to the IntervalIndex/Index constructors"""
    @pytest.fixture(params=[IntervalIndex,
                            partial(Index, dtype='interval')],
                    ids=['IntervalIndex', 'Index'])
    def constructor(self, request):
        return request.param

    def get_kwargs_from_breaks(self, breaks, closed='right'):
        """
        converts intervals in breaks format to a dictionary of kwargs to
        specific to the format expected by the IntervalIndex/Index constructors
        """
        if len(breaks) == 0:
            return {'data': breaks}

        ivs = [
            Interval(l, r, closed) if notna(l) else l
            for l, r in zip(breaks[:-1], breaks[1:])
        ]

        if isinstance(breaks, list):
            return {'data': ivs}
        elif is_categorical_dtype(breaks):
            return {'data': breaks._constructor(ivs)}
        return {'data': np.array(ivs, dtype=object)}

    def test_generic_errors(self, constructor):
        """
        override the base class implementation since errors are handled
        differently; checks unnecessary since caught at the Interval level
        """
        pass

    def test_constructor_errors(self, constructor):
        # mismatched closed within intervals with no constructor override
        ivs = [Interval(0, 1, closed='right'), Interval(2, 3, closed='left')]
        msg = 'intervals must all be closed on the same side'
        with tm.assert_raises_regex(ValueError, msg):
            constructor(ivs)

        # scalar
        msg = (r'IntervalIndex\(...\) must be called with a collection of '
               'some kind, 5 was passed')
        with tm.assert_raises_regex(TypeError, msg):
            constructor(5)

        # not an interval
        msg = ("type <(class|type) 'numpy.int64'> with value 0 "
               "is not an interval")
        with tm.assert_raises_regex(TypeError, msg):
            constructor([0, 1])

    @pytest.mark.parametrize(
        'data, closed',
        [([], 'both'), ([np.nan, np.nan], 'neither'),
         ([Interval(0, 3, closed='neither'),
           Interval(2, 5, closed='neither')], 'left'),
         ([Interval(0, 3, closed='left'),
           Interval(2, 5, closed='right')], 'neither'),
         (IntervalIndex.from_breaks(range(5), closed='both'), 'right')])
    def test_override_inferred_closed(self, constructor, data, closed):
        # GH 19370
        if isinstance(data, IntervalIndex):
            tuples = data.to_tuples()
        else:
            tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
        expected = IntervalIndex.from_tuples(tuples, closed=closed)
        result = constructor(data, closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('values_constructor',
                             [list, np.array, IntervalIndex, IntervalArray])
    def test_index_object_dtype(self, values_constructor):
        # Index(intervals, dtype=object) is an Index (not an IntervalIndex)
        intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
        values = values_constructor(intervals)
        result = Index(values, dtype=object)

        assert type(result) is Index
        tm.assert_numpy_array_equal(result.values, np.array(values))
Exemplo n.º 28
0
 def test_slice_interval_step(self, series_with_interval_index):
     # GH#31658 allows for integer step!=1, not Interval step
     ser = series_with_interval_index.copy()
     msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
     with pytest.raises(ValueError, match=msg):
         ser[0 : 4 : Interval(0, 1)]
Exemplo n.º 29
0
class TestDataFrameSetItem:
    def test_setitem_str_subclass(self):
        # GH#37366
        class mystring(str):
            pass

        data = ["2020-10-22 01:21:00+00:00"]
        index = DatetimeIndex(data)
        df = DataFrame({"a": [1]}, index=index)
        df["b"] = 2
        df[mystring("c")] = 3
        expected = DataFrame({
            "a": [1],
            "b": [2],
            mystring("c"): [3]
        },
                             index=index)
        tm.assert_equal(df, expected)

    @pytest.mark.parametrize(
        "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"])
    def test_setitem_dtype(self, dtype, float_frame):
        arr = np.random.randn(len(float_frame))

        float_frame[dtype] = np.array(arr, dtype=dtype)
        assert float_frame[dtype].dtype.name == dtype

    def test_setitem_list_not_dataframe(self, float_frame):
        data = np.random.randn(len(float_frame), 2)
        float_frame[["A", "B"]] = data
        tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

    def test_setitem_error_msmgs(self):

        # GH 7432
        df = DataFrame(
            {
                "bar": [1, 2, 3],
                "baz": ["d", "e", "f"]
            },
            index=Index(["a", "b", "c"], name="foo"),
        )
        ser = Series(
            ["g", "h", "i", "j"],
            index=Index(["a", "b", "c", "a"], name="foo"),
            name="fiz",
        )
        msg = "cannot reindex on an axis with duplicate labels"
        with pytest.raises(ValueError, match=msg):
            with tm.assert_produces_warning(FutureWarning, match="non-unique"):
                df["newcol"] = ser

        # GH 4107, more descriptive error message
        df = DataFrame(np.random.randint(0, 2, (4, 4)),
                       columns=["a", "b", "c", "d"])

        msg = "incompatible index of inserted column with frame index"
        with pytest.raises(TypeError, match=msg):
            df["gr"] = df.groupby(["b", "c"]).count()

    def test_setitem_benchmark(self):
        # from the vb_suite/frame_methods/frame_insert_columns
        N = 10
        K = 5
        df = DataFrame(index=range(N))
        new_col = np.random.randn(N)
        for i in range(K):
            df[i] = new_col
        expected = DataFrame(np.repeat(new_col, K).reshape(N, K),
                             index=range(N))
        tm.assert_frame_equal(df, expected)

    def test_setitem_different_dtype(self):
        df = DataFrame(np.random.randn(5, 3),
                       index=np.arange(5),
                       columns=["c", "b", "a"])
        df.insert(0, "foo", df["a"])
        df.insert(2, "bar", df["c"])

        # diff dtype

        # new item
        df["x"] = df["a"].astype("float32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 5 + [np.dtype("float32")],
            index=["foo", "c", "bar", "b", "a", "x"],
        )
        tm.assert_series_equal(result, expected)

        # replacing current (in different block)
        df["a"] = df["a"].astype("float32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2,
            index=["foo", "c", "bar", "b", "a", "x"],
        )
        tm.assert_series_equal(result, expected)

        df["y"] = df["a"].astype("int32")
        result = df.dtypes
        expected = Series(
            [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 +
            [np.dtype("int32")],
            index=["foo", "c", "bar", "b", "a", "x", "y"],
        )
        tm.assert_series_equal(result, expected)

    def test_setitem_empty_columns(self):
        # GH 13522
        df = DataFrame(index=["A", "B", "C"])
        df["X"] = df.index
        df["X"] = ["x", "y", "z"]
        exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"])
        tm.assert_frame_equal(df, exp)

    def test_setitem_dt64_index_empty_columns(self):
        rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
        df = DataFrame(index=np.arange(len(rng)))

        df["A"] = rng
        assert df["A"].dtype == np.dtype("M8[ns]")

    def test_setitem_timestamp_empty_columns(self):
        # GH#19843
        df = DataFrame(index=range(3))
        df["now"] = Timestamp("20130101", tz="UTC")

        expected = DataFrame([[Timestamp("20130101", tz="UTC")]] * 3,
                             index=[0, 1, 2],
                             columns=["now"])
        tm.assert_frame_equal(df, expected)

    def test_setitem_wrong_length_categorical_dtype_raises(self):
        # GH#29523
        cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"])
        df = DataFrame(range(10), columns=["bar"])

        msg = (rf"Length of values \({len(cat)}\) "
               rf"does not match length of index \({len(df)}\)")
        with pytest.raises(ValueError, match=msg):
            df["foo"] = cat

    def test_setitem_with_sparse_value(self):
        # GH#8131
        df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
        sp_array = SparseArray([0, 0, 1])
        df["new_column"] = sp_array

        expected = Series(sp_array, name="new_column")
        tm.assert_series_equal(df["new_column"], expected)

    def test_setitem_with_unaligned_sparse_value(self):
        df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]})
        sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0])

        df["new_column"] = sp_series
        expected = Series(SparseArray([1, 0, 0]), name="new_column")
        tm.assert_series_equal(df["new_column"], expected)

    def test_setitem_period_preserves_dtype(self):
        # GH: 26861
        data = [Period("2003-12", "D")]
        result = DataFrame([])
        result["a"] = data

        expected = DataFrame({"a": data})

        tm.assert_frame_equal(result, expected)

    def test_setitem_dict_preserves_dtypes(self):
        # https://github.com/pandas-dev/pandas/issues/34573
        expected = DataFrame({
            "a": Series([0, 1, 2], dtype="int64"),
            "b": Series([1, 2, 3], dtype=float),
            "c": Series([1, 2, 3], dtype=float),
            "d": Series([1, 2, 3], dtype="uint32"),
        })
        df = DataFrame({
            "a": Series([], dtype="int64"),
            "b": Series([], dtype=float),
            "c": Series([], dtype=float),
            "d": Series([], dtype="uint32"),
        })
        for idx, b in enumerate([1, 2, 3]):
            df.loc[df.shape[0]] = {
                "a": int(idx),
                "b": float(b),
                "c": float(b),
                "d": np.uint32(b),
            }
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "obj,dtype",
        [
            (Period("2020-01"), PeriodDtype("M")),
            (
                Interval(left=0, right=5, inclusive="right"),
                IntervalDtype("int64", "right"),
            ),
            (
                Timestamp("2011-01-01", tz="US/Eastern"),
                DatetimeTZDtype(tz="US/Eastern"),
            ),
        ],
    )
    def test_setitem_extension_types(self, obj, dtype):
        # GH: 34832
        expected = DataFrame({
            "idx": [1, 2, 3],
            "obj": Series([obj] * 3, dtype=dtype)
        })

        df = DataFrame({"idx": [1, 2, 3]})
        df["obj"] = obj

        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize(
        "ea_name",
        [
            dtype.name for dtype in ea_registry.dtypes
            # property would require instantiation
            if not isinstance(dtype.name, property)
        ]
        # mypy doesn't allow adding lists of different types
        # https://github.com/python/mypy/issues/5492
        + ["datetime64[ns, UTC]", "period[D]"],  # type: ignore[list-item]
    )
    def test_setitem_with_ea_name(self, ea_name):
        # GH 38386
        result = DataFrame([0])
        result[ea_name] = [1]
        expected = DataFrame({0: [0], ea_name: [1]})
        tm.assert_frame_equal(result, expected)

    def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self):
        # GH#7492
        data_ns = np.array([1, "nat"], dtype="datetime64[ns]")
        result = Series(data_ns).to_frame()
        result["new"] = data_ns
        expected = DataFrame({
            0: [1, None],
            "new": [1, None]
        },
                             dtype="datetime64[ns]")
        tm.assert_frame_equal(result, expected)

        # OutOfBoundsDatetime error shouldn't occur
        data_s = np.array([1, "nat"], dtype="datetime64[s]")
        result["new"] = data_s
        expected = DataFrame({
            0: [1, None],
            "new": [1e9, None]
        },
                             dtype="datetime64[ns]")
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
    def test_frame_setitem_datetime64_col_other_units(self, unit):
        # Check that non-nano dt64 values get cast to dt64 on setitem
        #  into a not-yet-existing column
        n = 100

        dtype = np.dtype(f"M8[{unit}]")
        vals = np.arange(n, dtype=np.int64).view(dtype)
        ex_vals = vals.astype("datetime64[ns]")

        df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
        df[unit] = vals

        assert df[unit].dtype == np.dtype("M8[ns]")
        assert (df[unit].values == ex_vals).all()

    @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"])
    def test_frame_setitem_existing_datetime64_col_other_units(self, unit):
        # Check that non-nano dt64 values get cast to dt64 on setitem
        #  into an already-existing dt64 column
        n = 100

        dtype = np.dtype(f"M8[{unit}]")
        vals = np.arange(n, dtype=np.int64).view(dtype)
        ex_vals = vals.astype("datetime64[ns]")

        df = DataFrame({"ints": np.arange(n)}, index=np.arange(n))
        df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]")

        # We overwrite existing dt64 column with new, non-nano dt64 vals
        df["dates"] = vals
        assert (df["dates"].values == ex_vals).all()

    def test_setitem_dt64tz(self, timezone_frame):

        df = timezone_frame
        idx = df["B"].rename("foo")

        # setitem
        df["C"] = idx
        tm.assert_series_equal(df["C"], Series(idx, name="C"))

        df["D"] = "foo"
        df["D"] = idx
        tm.assert_series_equal(df["D"], Series(idx, name="D"))
        del df["D"]

        # assert that A & C are not sharing the same base (e.g. they
        # are copies)
        v1 = df._mgr.arrays[1]
        v2 = df._mgr.arrays[2]
        tm.assert_extension_array_equal(v1, v2)
        v1base = v1._data.base
        v2base = v2._data.base
        assert v1base is None or (id(v1base) != id(v2base))

        # with nan
        df2 = df.copy()
        df2.iloc[1, 1] = NaT
        df2.iloc[1, 2] = NaT
        result = df2["B"]
        tm.assert_series_equal(notna(result),
                               Series([True, False, True], name="B"))
        tm.assert_series_equal(df2.dtypes, df.dtypes)

    def test_setitem_periodindex(self):
        rng = period_range("1/1/2000", periods=5, name="index")
        df = DataFrame(np.random.randn(5, 3), index=rng)

        df["Index"] = rng
        rs = Index(df["Index"])
        tm.assert_index_equal(rs, rng, check_names=False)
        assert rs.name == "Index"
        assert rng.name == "index"

        rs = df.reset_index().set_index("index")
        assert isinstance(rs.index, PeriodIndex)
        tm.assert_index_equal(rs.index, rng)

    def test_setitem_complete_column_with_array(self):
        # GH#37954
        df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]})
        arr = np.array([[1, 1], [3, 1], [5, 1]])
        df[["c", "d"]] = arr
        expected = DataFrame({
            "a": ["one", "two", "three"],
            "b": [1, 2, 3],
            "c": [1, 3, 5],
            "d": [1, 1, 1],
        })
        expected["c"] = expected["c"].astype(arr.dtype)
        expected["d"] = expected["d"].astype(arr.dtype)
        assert expected["c"].dtype == arr.dtype
        assert expected["d"].dtype == arr.dtype
        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"])
    def test_setitem_bool_with_numeric_index(self, dtype):
        # GH#36319
        cols = Index([1, 2, 3], dtype=dtype)
        df = DataFrame(np.random.randn(3, 3), columns=cols)

        df[False] = ["a", "b", "c"]

        expected_cols = Index([1, 2, 3, False], dtype=object)
        if dtype == "f8":
            expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object)

        tm.assert_index_equal(df.columns, expected_cols)

    @pytest.mark.parametrize("indexer", ["B", ["B"]])
    def test_setitem_frame_length_0_str_key(self, indexer):
        # GH#38831
        df = DataFrame(columns=["A", "B"])
        other = DataFrame({"B": [1, 2]})
        df[indexer] = other
        expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]})
        expected["A"] = expected["A"].astype("object")
        tm.assert_frame_equal(df, expected)

    def test_setitem_frame_duplicate_columns(self, using_array_manager):
        # GH#15695
        warn = FutureWarning if using_array_manager else None
        msg = "will attempt to set the values inplace"

        cols = ["A", "B", "C"] * 2
        df = DataFrame(index=range(3), columns=cols)
        df.loc[0, "A"] = (0, 3)
        with tm.assert_produces_warning(warn, match=msg):
            df.loc[:, "B"] = (1, 4)
        df["C"] = (2, 5)
        expected = DataFrame(
            [
                [0, 1, 2, 3, 4, 5],
                [np.nan, 1, 2, np.nan, 4, 5],
                [np.nan, 1, 2, np.nan, 4, 5],
            ],
            dtype="object",
        )

        if using_array_manager:
            # setitem replaces column so changes dtype

            expected.columns = cols
            expected["C"] = expected["C"].astype("int64")
            # TODO(ArrayManager) .loc still overwrites
            expected["B"] = expected["B"].astype("int64")

        else:
            # set these with unique columns to be extra-unambiguous
            expected[2] = expected[2].astype(np.int64)
            expected[5] = expected[5].astype(np.int64)
            expected.columns = cols

        tm.assert_frame_equal(df, expected)

    def test_setitem_frame_duplicate_columns_size_mismatch(self):
        # GH#39510
        cols = ["A", "B", "C"] * 2
        df = DataFrame(index=range(3), columns=cols)
        with pytest.raises(ValueError,
                           match="Columns must be same length as key"):
            df[["A"]] = (0, 3, 5)

        df2 = df.iloc[:, :3]  # unique columns
        with pytest.raises(ValueError,
                           match="Columns must be same length as key"):
            df2[["A"]] = (0, 3, 5)

    @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]])
    def test_setitem_df_wrong_column_number(self, cols):
        # GH#38604
        df = DataFrame([[1, 2, 3]], columns=cols)
        rhs = DataFrame([[10, 11]], columns=["d", "e"])
        msg = "Columns must be same length as key"
        with pytest.raises(ValueError, match=msg):
            df["a"] = rhs

    def test_setitem_listlike_indexer_duplicate_columns(self):
        # GH#38604
        df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
        rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
        df[["a", "b"]] = rhs
        expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"])
        tm.assert_frame_equal(df, expected)

        df[["c", "b"]] = rhs
        expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"])
        tm.assert_frame_equal(df, expected)

    def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self):
        # GH#39403
        df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"])
        rhs = DataFrame([[10, 11]], columns=["a", "b"])
        msg = "Columns must be same length as key"
        with pytest.raises(ValueError, match=msg):
            df[["a", "b"]] = rhs

    def test_setitem_intervals(self):

        df = DataFrame({"A": range(10)})
        ser = cut(df["A"], 5)
        assert isinstance(ser.cat.categories, IntervalIndex)

        # B & D end up as Categoricals
        # the remainder are converted to in-line objects
        # containing an IntervalIndex.values
        df["B"] = ser
        df["C"] = np.array(ser)
        df["D"] = ser.values
        df["E"] = np.array(ser.values)
        df["F"] = ser.astype(object)

        assert is_categorical_dtype(df["B"].dtype)
        assert is_interval_dtype(df["B"].cat.categories)
        assert is_categorical_dtype(df["D"].dtype)
        assert is_interval_dtype(df["D"].cat.categories)

        # These go through the Series constructor and so get inferred back
        #  to IntervalDtype
        assert is_interval_dtype(df["C"])
        assert is_interval_dtype(df["E"])

        # But the Series constructor doesn't do inference on Series objects,
        #  so setting df["F"] doesn't get cast back to IntervalDtype
        assert is_object_dtype(df["F"])

        # they compare equal as Index
        # when converted to numpy objects
        c = lambda x: Index(np.array(x))
        tm.assert_index_equal(c(df.B), c(df.B))
        tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
        tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
        tm.assert_index_equal(c(df.C), c(df.D), check_names=False)

        # B & D are the same Series
        tm.assert_series_equal(df["B"], df["B"])
        tm.assert_series_equal(df["B"], df["D"], check_names=False)

        # C & E are the same Series
        tm.assert_series_equal(df["C"], df["C"])
        tm.assert_series_equal(df["C"], df["E"], check_names=False)

    def test_setitem_categorical(self):
        # GH#35369
        df = DataFrame({"h": Series(list("mn")).astype("category")})
        df.h = df.h.cat.reorder_categories(["n", "m"])
        expected = DataFrame(
            {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])})
        tm.assert_frame_equal(df, expected)

    def test_setitem_with_empty_listlike(self):
        # GH#17101
        index = Index([], name="idx")
        result = DataFrame(columns=["A"], index=index)
        result["A"] = []
        expected = DataFrame(columns=["A"], index=index)
        tm.assert_index_equal(result.index, expected.index)

    @pytest.mark.parametrize(
        "cols, values, expected",
        [
            (["C", "D", "D", "a"], [1, 2, 3, 4], 4),  # with duplicates
            (["D", "C", "D", "a"], [1, 2, 3, 4], 4),  # mixed order
            (["C", "B", "B", "a"], [1, 2, 3, 4], 4),  # other duplicate cols
            (["C", "B", "a"], [1, 2, 3], 3),  # no duplicates
            (["B", "C", "a"], [3, 2, 1], 1),  # alphabetical order
            (["C", "a", "B"], [3, 2, 1], 2),  # in the middle
        ],
    )
    def test_setitem_same_column(self, cols, values, expected):
        # GH#23239
        df = DataFrame([values], columns=cols)
        df["a"] = df["a"]
        result = df["a"].values[0]
        assert result == expected

    def test_setitem_multi_index(self):
        # GH#7655, test that assigning to a sub-frame of a frame
        # with multi-index columns aligns both rows and columns
        it = ["jim", "joe", "jolie"], ["first",
                                       "last"], ["left", "center", "right"]

        cols = MultiIndex.from_product(it)
        index = date_range("20141006", periods=20)
        vals = np.random.randint(1, 1000, (len(index), len(cols)))
        df = DataFrame(vals, columns=cols, index=index)

        i, j = df.index.values.copy(), it[-1][:]

        np.random.shuffle(i)
        df["jim"] = df["jolie"].loc[i, ::-1]
        tm.assert_frame_equal(df["jim"], df["jolie"])

        np.random.shuffle(j)
        df[("joe", "first")] = df[("jolie", "last")].loc[i, j]
        tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")])

        np.random.shuffle(j)
        df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
        tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])

    @pytest.mark.parametrize(
        "columns,box,expected",
        [
            (
                ["A", "B", "C", "D"],
                7,
                DataFrame(
                    [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["C", "D"],
                [7, 8],
                DataFrame(
                    [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["A", "B", "C"],
                np.array([7, 8, 9], dtype=np.int64),
                DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]],
                          columns=["A", "B", "C"]),
            ),
            (
                ["B", "C", "D"],
                [[7, 8, 9], [10, 11, 12], [13, 14, 15]],
                DataFrame(
                    [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["C", "A", "D"],
                np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]],
                         dtype=np.int64),
                DataFrame(
                    [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]],
                    columns=["A", "B", "C", "D"],
                ),
            ),
            (
                ["A", "C"],
                DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
                DataFrame([[7, 2, 8], [9, 4, 10], [11, 6, 12]],
                          columns=["A", "B", "C"]),
            ),
        ],
    )
    def test_setitem_list_missing_columns(self, columns, box, expected):
        # GH#29334
        df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
        df[columns] = box
        tm.assert_frame_equal(df, expected)

    def test_setitem_list_of_tuples(self, float_frame):
        tuples = list(zip(float_frame["A"], float_frame["B"]))
        float_frame["tuples"] = tuples

        result = float_frame["tuples"]
        expected = Series(tuples, index=float_frame.index, name="tuples")
        tm.assert_series_equal(result, expected)

    def test_setitem_iloc_generator(self):
        # GH#39614
        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        indexer = (x for x in [1, 2])
        df.iloc[indexer] = 1
        expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]})
        tm.assert_frame_equal(df, expected)

    def test_setitem_iloc_two_dimensional_generator(self):
        df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        indexer = (x for x in [1, 2])
        df.iloc[indexer, 1] = 1
        expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]})
        tm.assert_frame_equal(df, expected)

    def test_setitem_dtypes_bytes_type_to_object(self):
        # GH 20734
        index = Series(name="id", dtype="S24")
        df = DataFrame(index=index)
        df["a"] = Series(name="a", index=index, dtype=np.uint32)
        df["b"] = Series(name="b", index=index, dtype="S64")
        df["c"] = Series(name="c", index=index, dtype="S64")
        df["d"] = Series(name="d", index=index, dtype=np.uint8)
        result = df.dtypes
        expected = Series([np.uint32, object, object, np.uint8],
                          index=list("abcd"))
        tm.assert_series_equal(result, expected)

    def test_boolean_mask_nullable_int64(self):
        # GH 28928
        result = DataFrame({
            "a": [3, 4],
            "b": [5, 6]
        }).astype({
            "a": "int64",
            "b": "Int64"
        })
        mask = Series(False, index=result.index)
        result.loc[mask, "a"] = result["a"]
        result.loc[mask, "b"] = result["b"]
        expected = DataFrame({
            "a": [3, 4],
            "b": [5, 6]
        }).astype({
            "a": "int64",
            "b": "Int64"
        })
        tm.assert_frame_equal(result, expected)

    def test_setitem_ea_dtype_rhs_series(self):
        # GH#47425
        df = DataFrame({"a": [1, 2]})
        df["a"] = Series([1, 2], dtype="Int64")
        expected = DataFrame({"a": [1, 2]}, dtype="Int64")
        tm.assert_frame_equal(df, expected)

    # TODO(ArrayManager) set column with 2d column array, see #44788
    @td.skip_array_manager_not_yet_implemented
    def test_setitem_npmatrix_2d(self):
        # GH#42376
        # for use-case df["x"] = sparse.random(10, 10).mean(axis=1)
        expected = DataFrame(
            {
                "np-array": np.ones(10),
                "np-matrix": np.ones(10)
            },
            index=np.arange(10))

        a = np.ones((10, 1))
        df = DataFrame(index=np.arange(10))
        df["np-array"] = a

        # Instantiation of `np.matrix` gives PendingDeprecationWarning
        with tm.assert_produces_warning(PendingDeprecationWarning):
            df["np-matrix"] = np.matrix(a)

        tm.assert_frame_equal(df, expected)

    @pytest.mark.parametrize("vals", [{}, {"d": "a"}])
    def test_setitem_aligning_dict_with_index(self, vals):
        # GH#47216
        df = DataFrame({"a": [1, 2], "b": [3, 4], **vals})
        df.loc[:, "a"] = {1: 100, 0: 200}
        df.loc[:, "c"] = {0: 5, 1: 6}
        df.loc[:, "e"] = {1: 5}
        expected = DataFrame({
            "a": [200, 100],
            "b": [3, 4],
            **vals, "c": [5, 6],
            "e": [np.nan, 5]
        })
        tm.assert_frame_equal(df, expected)

    def test_setitem_rhs_dataframe(self):
        # GH#47578
        df = DataFrame({"a": [1, 2]})
        df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2])
        expected = DataFrame({"a": [np.nan, 10]})
        tm.assert_frame_equal(df, expected)

        df = DataFrame({"a": [1, 2]})
        df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2]))
        tm.assert_frame_equal(df, expected)
Exemplo n.º 30
0
class TestIntervalIndex(object):
    def _compare_tuple_of_numpy_array(self, result, expected):
        lidx, ridx = result
        lidx_expected, ridx_expected = expected

        tm.assert_numpy_array_equal(lidx, lidx_expected)
        tm.assert_numpy_array_equal(ridx, ridx_expected)

    @pytest.mark.parametrize("side", ['right', 'left', 'both', 'neither'])
    def test_get_loc_interval(self, closed, side):

        idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)

        for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3],
                      [-1, 4]]:
            # if get_loc is supplied an interval, it should only search
            # for exact matches, not overlaps or covers, else KeyError.
            if closed == side:
                if bound == [0, 1]:
                    assert idx.get_loc(Interval(0, 1, closed=side)) == 0
                elif bound == [2, 3]:
                    assert idx.get_loc(Interval(2, 3, closed=side)) == 1
                else:
                    with pytest.raises(KeyError):
                        idx.get_loc(Interval(*bound, closed=side))
            else:
                with pytest.raises(KeyError):
                    idx.get_loc(Interval(*bound, closed=side))

    @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
    def test_get_loc_scalar(self, closed, scalar):

        # correct = {side: {query: answer}}.
        # If query is not in the dict, that query should raise a KeyError
        correct = {
            'right': {
                0.5: 0,
                1: 0,
                2.5: 1,
                3: 1
            },
            'left': {
                0: 0,
                0.5: 0,
                2: 1,
                2.5: 1
            },
            'both': {
                0: 0,
                0.5: 0,
                1: 0,
                2: 1,
                2.5: 1,
                3: 1
            },
            'neither': {
                0.5: 0,
                2.5: 1
            }
        }

        idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)

        # if get_loc is supplied a scalar, it should return the index of
        # the interval which contains the scalar, or KeyError.
        if scalar in correct[closed].keys():
            assert idx.get_loc(scalar) == correct[closed][scalar]
        else:
            pytest.raises(KeyError, idx.get_loc, scalar)

    def test_slice_locs_with_interval(self):

        # increasing monotonically
        index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (2, 1)

        # decreasing monotonically
        index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (2, 1)
        assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (0, 3)

        # sorted duplicates
        index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (2, 2)

        # unsorted duplicates
        index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])

        pytest.raises(
            KeyError, index.slice_locs(start=Interval(0, 2),
                                       end=Interval(2, 4)))
        pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2)))
        assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
        pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2)))
        pytest.raises(
            KeyError, index.slice_locs(start=Interval(2, 4),
                                       end=Interval(0, 2)))

        # another unsorted duplicates
        index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (2, 2)

    def test_slice_locs_with_ints_and_floats_succeeds(self):

        # increasing non-overlapping
        index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])

        assert index.slice_locs(0, 1) == (0, 1)
        assert index.slice_locs(0, 2) == (0, 2)
        assert index.slice_locs(0, 3) == (0, 2)
        assert index.slice_locs(3, 1) == (2, 1)
        assert index.slice_locs(3, 4) == (2, 3)
        assert index.slice_locs(0, 4) == (0, 3)

        # decreasing non-overlapping
        index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
        assert index.slice_locs(0, 1) == (3, 2)
        assert index.slice_locs(0, 2) == (3, 1)
        assert index.slice_locs(0, 3) == (3, 1)
        assert index.slice_locs(3, 1) == (1, 2)
        assert index.slice_locs(3, 4) == (1, 0)
        assert index.slice_locs(0, 4) == (3, 0)

    @pytest.mark.parametrize("query",
                             [[0, 1], [0, 2], [0, 3], [3, 1], [3, 4], [0, 4]])
    @pytest.mark.parametrize("tuples",
                             [[(0, 2), (1, 3),
                               (2, 4)], [(2, 4), (1, 3),
                                         (0, 2)], [(0, 2), (0, 2), (2, 4)],
                              [(0, 2), (2, 4),
                               (0, 2)], [(0, 2), (0, 2), (2, 4), (1, 3)]])
    def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
        index = IntervalIndex.from_tuples(tuples)
        with pytest.raises(KeyError):
            index.slice_locs(query)

    @pytest.mark.parametrize('query, expected',
                             [(Interval(1, 3, closed='right'), 1),
                              (Interval(1, 3, closed='left'), -1),
                              (Interval(1, 3, closed='both'), -1),
                              (Interval(1, 3, closed='neither'), -1),
                              (Interval(1, 4, closed='right'), -1),
                              (Interval(0, 4, closed='right'), -1),
                              (Interval(1, 2, closed='right'), -1)])
    def test_get_indexer_with_interval_single_queries(self, query, expected):

        index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)],
                                          closed='right')

        result = index.get_indexer([query])
        expected = np.array([expected], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        'query, expected',
        [([Interval(2, 4, closed='right'),
           Interval(1, 3, closed='right')], [2, 1]),
         ([Interval(1, 3, closed='right'),
           Interval(0, 2, closed='right')], [1, -1]),
         ([Interval(1, 3, closed='right'),
           Interval(1, 3, closed='left')], [1, -1])])
    def test_get_indexer_with_interval_multiple_queries(self, query, expected):

        index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)],
                                          closed='right')

        result = index.get_indexer(query)
        expected = np.array(expected, dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize('query, expected', [(-0.5, -1), (0, -1), (0.5, 0),
                                                 (1, 0), (1.5, 1), (2, 1),
                                                 (2.5, -1), (3, -1), (3.5, 2),
                                                 (4, 2), (4.5, -1)])
    def test_get_indexer_with_ints_and_floats_single_queries(
            self, query, expected):

        index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)],
                                          closed='right')

        result = index.get_indexer([query])
        expected = np.array([expected], dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize('query, expected',
                             [([1, 2], [0, 1]), ([1, 2, 3], [0, 1, -1]),
                              ([1, 2, 3, 4], [0, 1, -1, 2]),
                              ([1, 2, 3, 4, 2], [0, 1, -1, 2, 1])])
    def test_get_indexer_with_ints_and_floats_multiple_queries(
            self, query, expected):

        index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)],
                                          closed='right')

        result = index.get_indexer(query)
        expected = np.array(expected, dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

        index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
        # TODO: @shoyer believes this should raise, master branch doesn't

    @pytest.mark.parametrize(
        'query, expected',
        [(-0.5, (Int64Index([], dtype='int64'), np.array([0]))),
         (0, (Int64Index([0], dtype='int64'), np.array([]))),
         (0.5, (Int64Index([0], dtype='int64'), np.array([]))),
         (1, (Int64Index([0, 1], dtype='int64'), np.array([]))),
         (1.5, (Int64Index([0, 1], dtype='int64'), np.array([]))),
         (2, (Int64Index([0, 1, 2], dtype='int64'), np.array([]))),
         (2.5, (Int64Index([1, 2], dtype='int64'), np.array([]))),
         (3, (Int64Index([2], dtype='int64'), np.array([]))),
         (3.5, (Int64Index([2], dtype='int64'), np.array([]))),
         (4, (Int64Index([], dtype='int64'), np.array([0]))),
         (4.5, (Int64Index([], dtype='int64'), np.array([0])))])
    def test_get_indexer_non_unique_with_ints_and_floats_single_queries(
            self, query, expected):

        index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)],
                                          closed='left')

        result = index.get_indexer_non_unique([query])
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        'query, expected',
        [([1, 2], (Int64Index([0, 1, 0, 1, 2], dtype='int64'), np.array([]))),
         ([1, 2, 3],
          (Int64Index([0, 1, 0, 1, 2, 2], dtype='int64'), np.array([]))),
         ([1, 2, 3, 4],
          (Int64Index([0, 1, 0, 1, 2, 2, -1], dtype='int64'), np.array([3]))),
         ([1, 2, 3, 4, 2], (Int64Index([0, 1, 0, 1, 2, 2, -1, 0, 1, 2],
                                       dtype='int64'), np.array([3])))])
    def test_get_indexer_non_unique_with_ints_and_floats_multiple_queries(
            self, query, expected):

        index = IntervalIndex.from_tuples([(0, 2.5), (1, 3), (2, 4)],
                                          closed='left')

        result = index.get_indexer_non_unique(query)
        tm.assert_numpy_array_equal(result, expected)

        # TODO we may also want to test get_indexer for the case when
        # the intervals are duplicated, decreasing, non-monotonic, etc..

    def test_contains(self):

        index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right')

        # __contains__ requires perfect matches to intervals.
        assert 0 not in index
        assert 1 not in index
        assert 2 not in index

        assert Interval(0, 1, closed='right') in index
        assert Interval(0, 2, closed='right') not in index
        assert Interval(0, 0.5, closed='right') not in index
        assert Interval(3, 5, closed='right') not in index
        assert Interval(-1, 0, closed='left') not in index
        assert Interval(0, 1, closed='left') not in index
        assert Interval(0, 1, closed='both') not in index

    def test_contains_method(self):

        index = IntervalIndex.from_arrays([0, 1], [1, 2], closed='right')

        assert not index.contains(0)
        assert index.contains(0.1)
        assert index.contains(0.5)
        assert index.contains(1)

        assert index.contains(Interval(0, 1, closed='right'))
        assert not index.contains(Interval(0, 1, closed='left'))
        assert not index.contains(Interval(0, 1, closed='both'))
        assert not index.contains(Interval(0, 2, closed='right'))

        assert not index.contains(Interval(0, 3, closed='right'))
        assert not index.contains(Interval(1, 3, closed='right'))

        assert not index.contains(20)
        assert not index.contains(-20)
Exemplo n.º 31
0
 def test_overlaps_self(self, start_shift, closed):
     start, shift = start_shift
     interval = Interval(start, start + shift, closed)
     assert interval.overlaps(interval)
Exemplo n.º 32
0
    def test_slice_locs_with_interval(self):

        # increasing monotonically
        index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (2, 1)

        # decreasing monotonically
        index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (2, 1)
        assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (0, 3)

        # sorted duplicates
        index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (2, 2)

        # unsorted duplicates
        index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])

        pytest.raises(
            KeyError, index.slice_locs(start=Interval(0, 2),
                                       end=Interval(2, 4)))
        pytest.raises(KeyError, index.slice_locs(start=Interval(0, 2)))
        assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
        pytest.raises(KeyError, index.slice_locs(end=Interval(0, 2)))
        pytest.raises(
            KeyError, index.slice_locs(start=Interval(2, 4),
                                       end=Interval(0, 2)))

        # another unsorted duplicates
        index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])

        assert index.slice_locs(start=Interval(0, 2),
                                end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
        assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
        assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
        assert index.slice_locs(start=Interval(2, 4),
                                end=Interval(0, 2)) == (2, 2)
Exemplo n.º 33
0
    def test_errors(self):
        # not enough params
        msg = (
            "Of the four parameters: start, end, periods, and freq, "
            "exactly three must be specified"
        )

        with pytest.raises(ValueError, match=msg):
            interval_range(start=0)

        with pytest.raises(ValueError, match=msg):
            interval_range(end=5)

        with pytest.raises(ValueError, match=msg):
            interval_range(periods=2)

        with pytest.raises(ValueError, match=msg):
            interval_range()

        # too many params
        with pytest.raises(ValueError, match=msg):
            interval_range(start=0, end=5, periods=6, freq=1.5)

        # mixed units
        msg = "start, end, freq need to be type compatible"
        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, end=Timestamp("20130101"), freq=2)

        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, end=Timedelta("1 day"), freq=2)

        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, end=10, freq="D")

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timestamp("20130101"), end=10, freq="D")

        with pytest.raises(TypeError, match=msg):
            interval_range(
                start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
            )

        with pytest.raises(TypeError, match=msg):
            interval_range(
                start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
            )

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timedelta("1 day"), end=10, freq="D")

        with pytest.raises(TypeError, match=msg):
            interval_range(
                start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
            )

        with pytest.raises(TypeError, match=msg):
            interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)

        # invalid periods
        msg = "periods must be a number, got foo"
        with pytest.raises(TypeError, match=msg):
            interval_range(start=0, periods="foo")

        # invalid start
        msg = "start must be numeric or datetime-like, got foo"
        with pytest.raises(ValueError, match=msg):
            interval_range(start="foo", periods=10)

        # invalid end
        msg = r"end must be numeric or datetime-like, got \(0, 1\]"
        with pytest.raises(ValueError, match=msg):
            interval_range(end=Interval(0, 1), periods=10)

        # invalid freq for datetime-like
        msg = "freq must be numeric or convertible to DateOffset, got foo"
        with pytest.raises(ValueError, match=msg):
            interval_range(start=0, end=10, freq="foo")

        with pytest.raises(ValueError, match=msg):
            interval_range(start=Timestamp("20130101"), periods=10, freq="foo")

        with pytest.raises(ValueError, match=msg):
            interval_range(end=Timedelta("1 day"), periods=10, freq="foo")

        # mixed tz
        start = Timestamp("2017-01-01", tz="US/Eastern")
        end = Timestamp("2017-01-07", tz="US/Pacific")
        msg = "Start and end cannot both be tz-aware with different timezones"
        with pytest.raises(TypeError, match=msg):
            interval_range(start=start, end=end)
Exemplo n.º 34
0
class TestAstype:
    @pytest.mark.parametrize("dtype", np.typecodes["All"])
    def test_astype_empty_constructor_equality(self, dtype):
        # see GH#15524

        if dtype not in (
            "S",
            "V",  # poor support (if any) currently
            "M",
            "m",  # Generic timestamps raise a ValueError. Already tested.
        ):
            init_empty = Series([], dtype=dtype)
            with tm.assert_produces_warning(DeprecationWarning):
                as_type_empty = Series([]).astype(dtype)
            tm.assert_series_equal(init_empty, as_type_empty)

    @pytest.mark.parametrize("dtype", [str, np.str_])
    @pytest.mark.parametrize(
        "series",
        [
            Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
            Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]),
        ],
    )
    def test_astype_str_map(self, dtype, series):
        # see GH#4405
        result = series.astype(dtype)
        expected = series.map(str)
        tm.assert_series_equal(result, expected)

    def test_astype_float_to_period(self):
        result = Series([np.nan]).astype("period[D]")
        expected = Series([NaT], dtype="period[D]")
        tm.assert_series_equal(result, expected)

    def test_astype_no_pandas_dtype(self):
        # https://github.com/pandas-dev/pandas/pull/24866
        ser = Series([1, 2], dtype="int64")
        # Don't have PandasDtype in the public API, so we use `.array.dtype`,
        # which is a PandasDtype.
        result = ser.astype(ser.array.dtype)
        tm.assert_series_equal(result, ser)

    @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
    def test_astype_generic_timestamp_no_frequency(self, dtype, request):
        # see GH#15524, GH#15987
        data = [1]
        s = Series(data)

        if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
            mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
            request.node.add_marker(mark)

        msg = (
            fr"The '{dtype.__name__}' dtype has no unit\. "
            fr"Please pass in '{dtype.__name__}\[ns\]' instead."
        )
        with pytest.raises(ValueError, match=msg):
            s.astype(dtype)

    def test_astype_dt64_to_str(self):
        # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
        dti = date_range("2012-01-01", periods=3)
        result = Series(dti).astype(str)
        expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object)
        tm.assert_series_equal(result, expected)

    def test_astype_dt64tz_to_str(self):
        # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
        dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
        result = Series(dti_tz).astype(str)
        expected = Series(
            [
                "2012-01-01 00:00:00-05:00",
                "2012-01-02 00:00:00-05:00",
                "2012-01-03 00:00:00-05:00",
            ],
            dtype=object,
        )
        tm.assert_series_equal(result, expected)

    def test_astype_datetime(self):
        s = Series(iNaT, dtype="M8[ns]", index=range(5))

        s = s.astype("O")
        assert s.dtype == np.object_

        s = Series([datetime(2001, 1, 2, 0, 0)])

        s = s.astype("O")
        assert s.dtype == np.object_

        s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])

        s[1] = np.nan
        assert s.dtype == "M8[ns]"

        s = s.astype("O")
        assert s.dtype == np.object_

    def test_astype_datetime64tz(self):
        s = Series(date_range("20130101", periods=3, tz="US/Eastern"))

        # astype
        result = s.astype(object)
        expected = Series(s.astype(object), dtype=object)
        tm.assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
        tm.assert_series_equal(result, s)

        # astype - object, preserves on construction
        result = Series(s.astype(object))
        expected = s.astype(object)
        tm.assert_series_equal(result, expected)

        # astype - datetime64[ns, tz]
        with tm.assert_produces_warning(FutureWarning):
            # dt64->dt64tz astype deprecated
            result = Series(s.values).astype("datetime64[ns, US/Eastern]")
        tm.assert_series_equal(result, s)

        with tm.assert_produces_warning(FutureWarning):
            # dt64->dt64tz astype deprecated
            result = Series(s.values).astype(s.dtype)
        tm.assert_series_equal(result, s)

        result = s.astype("datetime64[ns, CET]")
        expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
        tm.assert_series_equal(result, expected)

    def test_astype_str_cast_dt64(self):
        # see GH#9757
        ts = Series([Timestamp("2010-01-04 00:00:00")])
        s = ts.astype(str)

        expected = Series(["2010-01-04"])
        tm.assert_series_equal(s, expected)

        ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
        s = ts.astype(str)

        expected = Series(["2010-01-04 00:00:00-05:00"])
        tm.assert_series_equal(s, expected)

    def test_astype_str_cast_td64(self):
        # see GH#9757

        td = Series([Timedelta(1, unit="d")])
        ser = td.astype(str)

        expected = Series(["1 days"])
        tm.assert_series_equal(ser, expected)

    def test_dt64_series_astype_object(self):
        dt64ser = Series(date_range("20130101", periods=3))
        result = dt64ser.astype(object)
        assert isinstance(result.iloc[0], datetime)
        assert result.dtype == np.object_

    def test_td64_series_astype_object(self):
        tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
        result = tdser.astype(object)
        assert isinstance(result.iloc[0], timedelta)
        assert result.dtype == np.object_

    @pytest.mark.parametrize(
        "values",
        [
            Series(["x", "y", "z"], dtype="string"),
            Series(["x", "y", "z"], dtype="category"),
            Series(3 * [Timestamp("2020-01-01", tz="UTC")]),
            Series(3 * [Interval(0, 1)]),
        ],
    )
    @pytest.mark.parametrize("errors", ["raise", "ignore"])
    def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
        # https://github.com/pandas-dev/pandas/issues/35471
        if errors == "ignore":
            expected = values
            result = values.astype(float, errors="ignore")
            tm.assert_series_equal(result, expected)
        else:
            msg = "(Cannot cast)|(could not convert)"
            with pytest.raises((ValueError, TypeError), match=msg):
                values.astype(float, errors=errors)

    @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
    def test_astype_from_float_to_str(self, dtype):
        # https://github.com/pandas-dev/pandas/issues/36451
        s = Series([0.1], dtype=dtype)
        result = s.astype(str)
        expected = Series(["0.1"])
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "value, string_value",
        [
            (None, "None"),
            (np.nan, "nan"),
            (NA, "<NA>"),
        ],
    )
    def test_astype_to_str_preserves_na(self, value, string_value):
        # https://github.com/pandas-dev/pandas/issues/36904
        s = Series(["a", "b", value], dtype=object)
        result = s.astype(str)
        expected = Series(["a", "b", string_value], dtype=object)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
    def test_astype(self, dtype):
        s = Series(np.random.randn(5), name="foo")
        as_typed = s.astype(dtype)

        assert as_typed.dtype == dtype
        assert as_typed.name == s.name

    @pytest.mark.parametrize("value", [np.nan, np.inf])
    @pytest.mark.parametrize("dtype", [np.int32, np.int64])
    def test_astype_cast_nan_inf_int(self, dtype, value):
        # gh-14265: check NaN and inf raise error when converting to int
        msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
        s = Series([value])

        with pytest.raises(ValueError, match=msg):
            s.astype(dtype)

    @pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
    def test_astype_cast_object_int_fail(self, dtype):
        arr = Series(["car", "house", "tree", "1"])
        msg = r"invalid literal for int\(\) with base 10: 'car'"
        with pytest.raises(ValueError, match=msg):
            arr.astype(dtype)

    def test_astype_cast_object_int(self):
        arr = Series(["1", "2", "3", "4"], dtype=object)
        result = arr.astype(int)

        tm.assert_series_equal(result, Series(np.arange(1, 5)))

    def test_astype_unicode(self):
        # see GH#7758: A bit of magic is required to set
        # default encoding to utf-8
        digits = string.digits
        test_series = [
            Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
            Series(["データーサイエンス、お前はもう死んでいる"]),
        ]

        former_encoding = None

        if sys.getdefaultencoding() == "utf-8":
            test_series.append(Series(["野菜食べないとやばい".encode()]))

        for s in test_series:
            res = s.astype("unicode")
            expec = s.map(str)
            tm.assert_series_equal(res, expec)

        # Restore the former encoding
        if former_encoding is not None and former_encoding != "utf-8":
            reload(sys)
            sys.setdefaultencoding(former_encoding)

    def test_astype_bytes(self):
        # GH#39474
        result = Series(["foo", "bar", "baz"]).astype(bytes)
        assert result.dtypes == np.dtype("S3")
Exemplo n.º 35
0
 def test_overlaps_invalid_type(self, other):
     interval = Interval(0, 1)
     msg = '`other` must be an Interval, got {other}'.format(
         other=type(other).__name__)
     with tm.assert_raises_regex(TypeError, msg):
         interval.overlaps(other)
Exemplo n.º 36
0
 def test_length_timestamp(self, tz, left, right, expected):
     # GH 18789
     iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz))
     result = iv.length
     expected = Timedelta(expected)
     assert result == expected