示例#1
0
    def test_float_subtype(self, start, end, freq):
        # Has float subtype if any of start/end/freq are float, even if all
        # resulting endpoints can safely be upcast to integers

        # defined from start/end/freq
        index = interval_range(start=start, end=end, freq=freq)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(start + end + freq) else 'float64'
        assert result == expected

        # defined from start/periods/freq
        index = interval_range(start=start, periods=5, freq=freq)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(start + freq) else 'float64'
        assert result == expected

        # defined from end/periods/freq
        index = interval_range(end=end, periods=5, freq=freq)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(end + freq) else 'float64'
        assert result == expected

        # GH 20976: linspace behavior defined from start/end/periods
        index = interval_range(start=start, end=end, periods=5)
        result = index.dtype.subtype
        expected = 'int64' if is_integer(start + end) else 'float64'
        assert result == expected
示例#2
0
    def test_constructor_timestamp(self, closed, name, freq, periods, tz):
        start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz)
        breaks = date_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

        # defined from start/end/freq
        result = interval_range(
            start=start, end=end, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from start/periods/freq
        result = interval_range(
            start=start, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from end/periods/freq
        result = interval_range(
            end=end, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 20976: linspace behavior defined from start/end/periods
        if not breaks.freq.isAnchored() and tz is None:
            # matches expected only for non-anchored offsets and tz naive
            # (anchored/DST transitions cause unequal spacing in expected)
            result = interval_range(start=start, end=end, periods=periods,
                                    name=name, closed=closed)
            tm.assert_index_equal(result, expected)
示例#3
0
    def test_interval_array_equal_message(self):
        a = pd.interval_range(0, periods=4).values
        b = pd.interval_range(1, periods=4).values

        msg = textwrap.dedent("""\
            IntervalArray.left are different

            IntervalArray.left values are different \\(100.0 %\\)
            \\[left\\]:  Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)
            \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""")
        with tm.assert_raises_regex(AssertionError, msg):
            tm.assert_interval_array_equal(a, b)
def test_interval_array_equal_periods_mismatch():
    kwargs = dict(start=0)
    arr1 = interval_range(periods=5, **kwargs).values
    arr2 = interval_range(periods=6, **kwargs).values

    msg = """\
IntervalArray.left are different

IntervalArray.left length are different
\\[left\\]:  5, Int64Index\\(\\[0, 1, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: 6, Int64Index\\(\\[0, 1, 2, 3, 4, 5\\], dtype='int64'\\)"""

    with pytest.raises(AssertionError, match=msg):
        assert_interval_array_equal(arr1, arr2)
def test_interval_array_equal_end_mismatch():
    kwargs = dict(start=0, periods=5)
    arr1 = interval_range(end=10, **kwargs).values
    arr2 = interval_range(end=20, **kwargs).values

    msg = """\
IntervalArray.left are different

IntervalArray.left values are different \\(80.0 %\\)
\\[left\\]:  Int64Index\\(\\[0, 2, 4, 6, 8\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[0, 4, 8, 12, 16\\], dtype='int64'\\)"""

    with pytest.raises(AssertionError, match=msg):
        assert_interval_array_equal(arr1, arr2)
def test_interval_array_equal_start_mismatch():
    kwargs = dict(periods=4)
    arr1 = interval_range(start=0, **kwargs).values
    arr2 = interval_range(start=1, **kwargs).values

    msg = """\
IntervalArray.left are different

IntervalArray.left values are different \\(100.0 %\\)
\\[left\\]:  Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""

    with pytest.raises(AssertionError, match=msg):
        assert_interval_array_equal(arr1, arr2)
def test_interval_array_equal_closed_mismatch():
    kwargs = dict(start=0, periods=5)
    arr1 = interval_range(closed="left", **kwargs).values
    arr2 = interval_range(closed="right", **kwargs).values

    msg = """\
IntervalArray are different

Attribute "closed" are different
\\[left\\]:  left
\\[right\\]: right"""

    with pytest.raises(AssertionError, match=msg):
        assert_interval_array_equal(arr1, arr2)
示例#8
0
    def test_set_incompatible_types(self, closed, op_name, sort):
        index = monotonic_index(0, 11, closed=closed)
        set_op = getattr(index, op_name)

        # TODO: standardize return type of non-union setops type(self vs other)
        # non-IntervalIndex
        if op_name == 'difference':
            expected = index
        else:
            expected = getattr(index.astype('O'), op_name)(Index([1, 2, 3]))
        result = set_op(Index([1, 2, 3]), sort=sort)
        tm.assert_index_equal(result, expected)

        # mixed closed
        msg = ('can only do set operations between two IntervalIndex objects '
               'that are closed on the same side')
        for other_closed in {'right', 'left', 'both', 'neither'} - {closed}:
            other = monotonic_index(0, 11, closed=other_closed)
            with pytest.raises(ValueError, match=msg):
                set_op(other, sort=sort)

        # GH 19016: incompatible dtypes
        other = interval_range(Timestamp('20180101'), periods=9, closed=closed)
        msg = ('can only do {op} between two IntervalIndex objects that have '
               'compatible dtypes').format(op=op_name)
        with pytest.raises(TypeError, match=msg):
            set_op(other, sort=sort)
示例#9
0
    def test_subtype_integer_errors(self):
        # float64 -> uint64 fails with negative values
        index = interval_range(-10.0, 10.0)
        dtype = IntervalDtype('uint64')
        with pytest.raises(ValueError):
            index.astype(dtype)

        # float64 -> integer-like fails with non-integer valued floats
        index = interval_range(0.0, 10.0, freq=0.25)
        dtype = IntervalDtype('int64')
        with pytest.raises(ValueError):
            index.astype(dtype)

        dtype = IntervalDtype('uint64')
        with pytest.raises(ValueError):
            index.astype(dtype)
示例#10
0
def test_cython_agg_empty_buckets_nanops(observed):
    # GH-18869 can't call nanops on empty groups, so hardcode expected
    # for these
    df = pd.DataFrame([11, 12, 13], columns=['a'])
    grps = range(0, 25, 5)
    # add / sum
    result = df.groupby(pd.cut(df['a'], grps),
                        observed=observed)._cython_agg_general('add')
    intervals = pd.interval_range(0, 20, freq=5)
    expected = pd.DataFrame(
        {"a": [0, 0, 36, 0]},
        index=pd.CategoricalIndex(intervals, name='a', ordered=True))
    if observed:
        expected = expected[expected.a != 0]

    tm.assert_frame_equal(result, expected)

    # prod
    result = df.groupby(pd.cut(df['a'], grps),
                        observed=observed)._cython_agg_general('prod')
    expected = pd.DataFrame(
        {"a": [1, 1, 1716, 1]},
        index=pd.CategoricalIndex(intervals, name='a', ordered=True))
    if observed:
        expected = expected[expected.a != 1]

    tm.assert_frame_equal(result, expected)
示例#11
0
def _fully_loaded_dataframe_example():
    from distutils.version import LooseVersion

    index = pd.MultiIndex.from_arrays([
        pd.date_range('2000-01-01', periods=5).repeat(2),
        np.tile(np.array(['foo', 'bar'], dtype=object), 5)
    ])

    c1 = pd.date_range('2000-01-01', periods=10)
    data = {
        0: c1,
        1: c1.tz_localize('utc'),
        2: c1.tz_localize('US/Eastern'),
        3: c1[::2].tz_localize('utc').repeat(2).astype('category'),
        4: ['foo', 'bar'] * 5,
        5: pd.Series(['foo', 'bar'] * 5).astype('category').values,
        6: [True, False] * 5,
        7: np.random.randn(10),
        8: np.random.randint(0, 100, size=10),
        9: pd.period_range('2013', periods=10, freq='M')
    }

    if LooseVersion(pd.__version__) >= '0.21':
        # There is an issue with pickling IntervalIndex in pandas 0.20.x
        data[10] = pd.interval_range(start=1, freq=1, periods=10)

    return pd.DataFrame(data, index=index)
示例#12
0
    def test_subtype_datetimelike(self):
        # datetime -> timedelta raises
        dtype = IntervalDtype('timedelta64[ns]')
        msg = 'Cannot convert .* to .*; subtypes are incompatible'

        index = interval_range(Timestamp('2018-01-01'), periods=10)
        with pytest.raises(TypeError, match=msg):
            index.astype(dtype)

        index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10)
        with pytest.raises(TypeError, match=msg):
            index.astype(dtype)

        # timedelta -> datetime raises
        dtype = IntervalDtype('datetime64[ns]')
        index = interval_range(Timedelta('0 days'), periods=10)
        with pytest.raises(TypeError, match=msg):
            index.astype(dtype)
示例#13
0
    def test_no_invalid_float_truncation(self, start, end, freq):
        # GH 21161
        if freq is None:
            breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
        else:
            breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
        expected = IntervalIndex.from_breaks(breaks)

        result = interval_range(start=start, end=end, periods=4, freq=freq)
        tm.assert_index_equal(result, expected)
示例#14
0
    def test_constructor_coverage(self):
        # float value for periods
        expected = interval_range(start=0, periods=10)
        result = interval_range(start=0, periods=10.5)
        tm.assert_index_equal(result, expected)

        # equivalent timestamp-like start/end
        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
        expected = interval_range(start=start, end=end)

        result = interval_range(start=start.to_pydatetime(),
                                end=end.to_pydatetime())
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start.asm8, end=end.asm8)
        tm.assert_index_equal(result, expected)

        # equivalent freq with timestamp
        equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
                      DateOffset(days=1)]
        for freq in equiv_freq:
            result = interval_range(start=start, end=end, freq=freq)
            tm.assert_index_equal(result, expected)

        # equivalent timedelta-like start/end
        start, end = Timedelta(days=1), Timedelta(days=10)
        expected = interval_range(start=start, end=end)

        result = interval_range(start=start.to_pytimedelta(),
                                end=end.to_pytimedelta())
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start.asm8, end=end.asm8)
        tm.assert_index_equal(result, expected)

        # equivalent freq with timedelta
        equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
        for freq in equiv_freq:
            result = interval_range(start=start, end=end, freq=freq)
            tm.assert_index_equal(result, expected)
示例#15
0
    def test_subtype_integer(self, subtype):
        index = interval_range(0.0, 10.0)
        dtype = IntervalDtype(subtype)
        result = index.astype(dtype)
        expected = IntervalIndex.from_arrays(index.left.astype(subtype),
                                             index.right.astype(subtype),
                                             closed=index.closed)
        tm.assert_index_equal(result, expected)

        # raises with NA
        msg = 'Cannot convert NA to integer'
        with pytest.raises(ValueError, match=msg):
            index.insert(0, np.nan).astype(dtype)
示例#16
0
    def test_constructor_numeric(self, closed, name, freq, periods):
        start, end = 0, 100
        breaks = np.arange(101, step=freq)
        expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

        # defined from start/end/freq
        result = interval_range(
            start=start, end=end, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from start/periods/freq
        result = interval_range(
            start=start, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from end/periods/freq
        result = interval_range(
            end=end, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 20976: linspace behavior defined from start/end/periods
        result = interval_range(
            start=start, end=end, periods=periods, name=name, closed=closed)
        tm.assert_index_equal(result, expected)
示例#17
0
    def test_constructor_timedelta(self, closed, name, freq, periods):
        start, end = Timedelta('0 days'), Timedelta('100 days')
        breaks = timedelta_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)

        # defined from start/end/freq
        result = interval_range(
            start=start, end=end, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from start/periods/freq
        result = interval_range(
            start=start, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # defined from end/periods/freq
        result = interval_range(
            end=end, periods=periods, freq=freq, name=name, closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 20976: linspace behavior defined from start/end/periods
        result = interval_range(
            start=start, end=end, periods=periods, name=name, closed=closed)
        tm.assert_index_equal(result, expected)
示例#18
0
    def test_set_operation_errors(self, closed, op_name):
        index = self.create_index(closed=closed)
        set_op = getattr(index, op_name)

        # non-IntervalIndex
        msg = ('can only do set operations between two IntervalIndex objects '
               'that are closed on the same side')
        with tm.assert_raises_regex(ValueError, msg):
            set_op(Index([1, 2, 3]))

        # mixed closed
        for other_closed in {'right', 'left', 'both', 'neither'} - {closed}:
            other = self.create_index(closed=other_closed)
            with tm.assert_raises_regex(ValueError, msg):
                set_op(other)

        # GH 19016: incompatible dtypes
        other = interval_range(Timestamp('20180101'), periods=9, closed=closed)
        msg = ('can only do {op} between two IntervalIndex objects that have '
               'compatible dtypes').format(op=op_name)
        with tm.assert_raises_regex(TypeError, msg):
            set_op(other)
示例#19
0
    def test_construction_from_timedelta(self, closed):
        # combinations of start/end/periods without freq
        start, end = Timedelta('1 day'), Timedelta('6 days')
        breaks = timedelta_range(start=start, end=end)
        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)

        result = interval_range(start=start, end=end, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start, periods=5, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=end, periods=5, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # combinations of start/end/periods with fixed freq
        freq = '2D'
        start, end = Timedelta('1 day'), Timedelta('7 days')
        breaks = timedelta_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)

        result = interval_range(start=start, end=end, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start, periods=3, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=end, periods=3, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # output truncates early if freq causes end to be skipped.
        end = Timedelta('7 days 1 hour')
        result = interval_range(start=start, end=end, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)
示例#20
0
    def test_construction_from_numeric(self, closed):
        # combinations of start/end/periods without freq
        expected = IntervalIndex.from_breaks(
            np.arange(0, 6), name='foo', closed=closed)

        result = interval_range(start=0, end=5, name='foo', closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=0, periods=5, name='foo', closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=5, periods=5, name='foo', closed=closed)
        tm.assert_index_equal(result, expected)

        # combinations of start/end/periods with freq
        expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)],
                                             name='foo', closed=closed)

        result = interval_range(start=0, end=6, freq=2, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=0, periods=3, freq=2, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=6, periods=3, freq=2, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # output truncates early if freq causes end to be skipped.
        expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)],
                                             name='foo', closed=closed)
        result = interval_range(start=0, end=4, freq=1.5, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)
示例#21
0
class TestIntervalIndex(Base):
    _holder = IntervalIndex

    def setup_method(self, method):
        self.index = IntervalIndex.from_arrays([0, 1], [1, 2])
        self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan,
                                                         (1, 2)])
        self.indices = dict(intervalIndex=tm.makeIntervalIndex(10))

    def create_index(self, closed="right"):
        return IntervalIndex.from_breaks(range(11), closed=closed)

    def create_index_with_nan(self, closed="right"):
        mask = [True, False] + [True] * 8
        return IntervalIndex.from_arrays(
            np.where(mask, np.arange(10), np.nan),
            np.where(mask, np.arange(1, 11), np.nan),
            closed=closed,
        )

    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [
            Interval(l, r, closed) if notna(l) else np.nan
            for l, r in zip(expected_left, expected_right)
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
            [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
            pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
            pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]),
        ],
    )
    def test_length(self, closed, breaks):
        # GH 18789
        index = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.length
        expected = Index(iv.length for iv in index)
        tm.assert_index_equal(result, expected)

        # with NA
        index = index.insert(1, np.nan)
        result = index.length
        expected = Index(iv.length if notna(iv) else iv for iv in index)
        tm.assert_index_equal(result, expected)

    def test_with_nans(self, closed):
        index = self.create_index(closed=closed)
        assert index.hasnans is False

        result = index.isna()
        expected = np.repeat(False, len(index))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.repeat(True, len(index))
        tm.assert_numpy_array_equal(result, expected)

        index = self.create_index_with_nan(closed=closed)
        assert index.hasnans is True

        result = index.isna()
        expected = np.array([False, True] + [False] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.array([True, False] + [True] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

    def test_copy(self, closed):
        expected = self.create_index(closed=closed)

        result = expected.copy()
        assert result.equals(expected)

        result = expected.copy(deep=True)
        assert result.equals(expected)
        assert result.left is not expected.left

    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="same")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="same")

        # by-definition make a copy
        result = IntervalIndex(index._ndarray_values, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="copy")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="copy")

    def test_equals(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
        assert expected.equals(expected)
        assert expected.equals(expected.copy())

        assert not expected.equals(expected.astype(object))
        assert not expected.equals(np.array(expected))
        assert not expected.equals(list(expected))

        assert not expected.equals([1, 2])
        assert not expected.equals(np.array([1, 2]))
        assert not expected.equals(pd.date_range("20130101", periods=2))

        expected_name1 = IntervalIndex.from_breaks(np.arange(5),
                                                   closed=closed,
                                                   name="foo")
        expected_name2 = IntervalIndex.from_breaks(np.arange(5),
                                                   closed=closed,
                                                   name="bar")
        assert expected.equals(expected_name1)
        assert expected_name1.equals(expected_name2)

        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            expected_other_closed = IntervalIndex.from_breaks(
                np.arange(5), closed=other_closed)
            assert not expected.equals(expected_other_closed)

    @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
    def test_where(self, closed, klass):
        idx = self.create_index(closed=closed)
        cond = [True] * len(idx)
        expected = idx
        result = expected.where(klass(cond))
        tm.assert_index_equal(result, expected)

        cond = [False] + [True] * len(idx[1:])
        expected = IntervalIndex([np.nan] + idx[1:].tolist())
        result = idx.where(klass(cond))
        tm.assert_index_equal(result, expected)

    def test_delete(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
        result = self.create_index(closed=closed).delete(0)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "data",
        [
            interval_range(0, periods=10, closed="neither"),
            interval_range(1.7, periods=8, freq=2.5, closed="both"),
            interval_range(Timestamp("20170101"), periods=12, closed="left"),
            interval_range(Timedelta("1 day"), periods=6, closed="right"),
        ],
    )
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        msg = "can only insert Interval objects and NA into an IntervalIndex"
        with pytest.raises(ValueError, match=msg):
            data.insert(1, "foo")

        # invalid closed
        msg = "inserted item must be closed on the same side as the index"
        for closed in {"left", "right", "both", "neither"} - {item.closed}:
            with pytest.raises(ValueError, match=msg):
                bad_item = Interval(item.left, item.right, closed=closed)
                data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in (np.nan, pd.NaT, None):
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)

    def test_take(self, closed):
        index = self.create_index(closed=closed)

        result = index.take(range(10))
        tm.assert_index_equal(result, index)

        result = index.take([0, 0, 1])
        expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    def test_is_unique_interval(self, closed):
        """
        Interval specific tests for is_unique in addition to base class tests
        """
        # unique overlapping - distinct endpoints
        idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
        assert idx.is_unique is True

        # unique overlapping - shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_unique is True

        # unique nested
        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
        assert idx.is_unique is True

    def test_monotonic(self, closed):
        # increasing non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing non-overlapping
        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping
        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping
        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered overlapping
        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)],
                                           closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # stationary
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is False

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

    @pytest.mark.skip(reason="not a valid repr as we use interval notation")
    def test_repr(self):
        i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed="right")
        expected = ("IntervalIndex(left=[0, 1],"
                    "\n              right=[1, 2],"
                    "\n              closed='right',"
                    "\n              dtype='interval[int64]')")
        assert repr(i) == expected

        i = IntervalIndex.from_tuples(
            (Timestamp("20130101"), Timestamp("20130102")),
            (Timestamp("20130102"), Timestamp("20130103")),
            closed="right",
        )
        expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02'],"
                    "\n              right=['2013-01-02', '2013-01-03'],"
                    "\n              closed='right',"
                    "\n              dtype='interval[datetime64[ns]]')")
        assert repr(i) == expected

    @pytest.mark.skip(reason="not a valid repr as we use interval notation")
    def test_repr_max_seq_item_setting(self):
        super().test_repr_max_seq_item_setting()

    @pytest.mark.skip(reason="not a valid repr as we use interval notation")
    def test_repr_roundtrip(self):
        super().test_repr_roundtrip()

    def test_frame_repr(self):
        # https://github.com/pandas-dev/pandas/pull/24134/files
        df = pd.DataFrame({"A": [1, 2, 3, 4]},
                          index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4]))
        result = repr(df)
        expected = "        A\n(0, 1]  1\n(1, 2]  2\n(2, 3]  3\n(3, 4]  4"
        assert result == expected

    @pytest.mark.parametrize(
        "constructor,expected",
        [
            (
                pd.Series,
                ("(0.0, 1.0]    a\n"
                 "NaN           b\n"
                 "(2.0, 3.0]    c\n"
                 "dtype: object"),
            ),
            (
                pd.DataFrame,
                ("            0\n(0.0, 1.0]  a\nNaN         b\n(2.0, 3.0]  c"),
            ),
        ],
    )
    def test_repr_missing(self, constructor, expected):
        # GH 25984
        index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
        obj = constructor(list("abc"), index=index)
        result = repr(obj)
        assert result == expected

    def test_get_item(self, closed):
        i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                      closed=closed)
        assert i[0] == Interval(0.0, 1.0, closed=closed)
        assert i[1] == Interval(1.0, 2.0, closed=closed)
        assert isna(i[2])

        result = i[0:1]
        expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[0:2]
        expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[1:3]
        expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
    def test_get_loc_length_one_scalar(self, scalar, closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        if scalar in index[0]:
            result = index.get_loc(scalar)
            assert result == 0
        else:
            with pytest.raises(KeyError, match=str(scalar)):
                index.get_loc(scalar)

    @pytest.mark.parametrize("other_closed",
                             ["left", "right", "both", "neither"])
    @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
    def test_get_loc_length_one_interval(self, left, right, closed,
                                         other_closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        interval = Interval(left, right, closed=other_closed)
        if interval == index[0]:
            result = index.get_loc(interval)
            assert result == 0
        else:
            with pytest.raises(
                    KeyError,
                    match=re.escape(
                        "Interval({left}, {right}, closed='{other_closed}')".
                        format(left=left,
                               right=right,
                               other_closed=other_closed)),
            ):
                index.get_loc(interval)

    # Make consistent with test_interval_new.py (see #16316, #16386)
    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_get_loc_datetimelike_nonoverlapping(self, breaks):
        # GH 20636
        # nonoverlapping = IntervalIndex method and no i8 conversion
        index = IntervalIndex.from_breaks(breaks)

        value = index[0].mid
        result = index.get_loc(value)
        expected = 0
        assert result == expected

        interval = Interval(index[0].left, index[0].right)
        result = index.get_loc(interval)
        expected = 0
        assert result == expected

    @pytest.mark.parametrize(
        "arrays",
        [
            (date_range("20180101",
                        periods=4), date_range("20180103", periods=4)),
            (
                date_range("20180101", periods=4, tz="US/Eastern"),
                date_range("20180103", periods=4, tz="US/Eastern"),
            ),
            (
                timedelta_range("0 days", periods=4),
                timedelta_range("2 days", periods=4),
            ),
        ],
        ids=lambda x: str(x[0].dtype),
    )
    def test_get_loc_datetimelike_overlapping(self, arrays):
        # GH 20636
        index = IntervalIndex.from_arrays(*arrays)

        value = index[0].mid + Timedelta("12 hours")
        result = index.get_loc(value)
        expected = slice(0, 2, None)
        assert result == expected

        interval = Interval(index[0].left, index[0].right)
        result = index.get_loc(interval)
        expected = 0
        assert result == expected

    @pytest.mark.parametrize(
        "values",
        [
            date_range("2018-01-04", periods=4, freq="-1D"),
            date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
            timedelta_range("3 days", periods=4, freq="-1D"),
            np.arange(3.0, -1.0, -1.0),
            np.arange(3, -1, -1),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_get_loc_decreasing(self, values):
        # GH 25860
        index = IntervalIndex.from_arrays(values[1:], values[:-1])
        result = index.get_loc(index[0])
        expected = 0
        assert result == expected

    @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
    def test_get_indexer_length_one(self, item, closed):
        # GH 17284
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        result = index.get_indexer(item)
        expected = np.array([0] * len(item), dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize("size", [1, 5])
    def test_get_indexer_length_one_interval(self, size, closed):
        # GH 17284
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        result = index.get_indexer([Interval(0, 5, closed)] * size)
        expected = np.array([0] * size, dtype="intp")
        tm.assert_numpy_array_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_maybe_convert_i8(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        # intervalindex
        result = index._maybe_convert_i8(index)
        expected = IntervalIndex.from_breaks(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # interval
        interval = Interval(breaks[0], breaks[1])
        result = index._maybe_convert_i8(interval)
        expected = Interval(breaks[0].value, breaks[1].value)
        assert result == expected

        # datetimelike index
        result = index._maybe_convert_i8(breaks)
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # datetimelike scalar
        result = index._maybe_convert_i8(breaks[0])
        expected = breaks[0].value
        assert result == expected

        # list-like of datetimelike scalars
        result = index._maybe_convert_i8(list(breaks))
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("2018-01-01", periods=5),
            timedelta_range("0 days", periods=5)
        ],
    )
    def test_maybe_convert_i8_nat(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        to_convert = breaks._constructor([pd.NaT] * 3)
        expected = pd.Float64Index([np.nan] * 3)
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

        to_convert = to_convert.insert(0, breaks[0])
        expected = expected.insert(0, float(breaks[0].value))
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [np.arange(5, dtype="int64"),
         np.arange(5, dtype="float64")],
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_numeric(self, breaks, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)
        key = make_key(breaks)

        # no conversion occurs for numeric
        result = index._maybe_convert_i8(key)
        assert result is key

    @pytest.mark.parametrize(
        "breaks1, breaks2",
        permutations(
            [
                date_range("20180101", periods=4),
                date_range("20180101", periods=4, tz="US/Eastern"),
                timedelta_range("0 days", periods=4),
            ],
            2,
        ),
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks1)
        key = make_key(breaks2)

        msg = ("Cannot index an IntervalIndex of subtype {dtype1} with "
               "values of dtype {dtype2}")
        msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype))
        with pytest.raises(ValueError, match=msg):
            index._maybe_convert_i8(key)

    def test_contains_method(self):
        # can select values that are IN the range of a value
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        expected = np.array([False, False], dtype="bool")
        actual = i.contains(0)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(3)
        tm.assert_numpy_array_equal(actual, expected)

        expected = np.array([True, False], dtype="bool")
        actual = i.contains(0.5)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(1)
        tm.assert_numpy_array_equal(actual, expected)

        # __contains__ not implemented for "interval in interval", follow
        # that for the contains method for now
        with pytest.raises(NotImplementedError,
                           match="contains not implemented for two"):
            i.contains(Interval(0, 1))

    def test_dropna(self, closed):

        expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)],
                                             closed=closed)

        ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

        ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan],
                                       closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

    def test_non_contiguous(self, closed):
        index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
        target = [0.5, 1.5, 2.5]
        actual = index.get_indexer(target)
        expected = np.array([0, -1, 1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        assert 1.5 not in index

    def test_isin(self, closed):
        index = self.create_index(closed=closed)

        expected = np.array([True] + [False] * (len(index) - 1))
        result = index.isin(index[:1])
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin([index[0]])
        tm.assert_numpy_array_equal(result, expected)

        other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
        expected = np.array([True] * (len(index) - 1) + [False])
        result = index.isin(other)
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin(other.tolist())
        tm.assert_numpy_array_equal(result, expected)

        for other_closed in {"right", "left", "both", "neither"}:
            other = self.create_index(closed=other_closed)
            expected = np.repeat(closed == other_closed, len(index))
            result = index.isin(other)
            tm.assert_numpy_array_equal(result, expected)

            result = index.isin(other.tolist())
            tm.assert_numpy_array_equal(result, expected)

    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        with pytest.raises(TypeError, match="unorderable types"):
            self.index > 0
        with pytest.raises(TypeError, match="unorderable types"):
            self.index <= 0
        msg = r"unorderable types: Interval\(\) > int\(\)"
        with pytest.raises(TypeError, match=msg):
            self.index > np.arange(2)
        msg = "Lengths must match to compare"
        with pytest.raises(ValueError, match=msg):
            self.index > np.arange(3)

    def test_missing_values(self, closed):
        idx = Index([
            np.nan,
            Interval(0, 1, closed=closed),
            Interval(1, 2, closed=closed)
        ])
        idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2],
                                         closed=closed)
        assert idx.equals(idx2)

        msg = ("missing values must be missing in the same location both left"
               " and right sides")
        with pytest.raises(ValueError, match=msg):
            IntervalIndex.from_arrays([np.nan, 0, 1],
                                      np.array([0, 1, 2]),
                                      closed=closed)

        tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))

    def test_sort_values(self, closed):
        index = self.create_index(closed=closed)

        result = index.sort_values()
        tm.assert_index_equal(result, index)

        result = index.sort_values(ascending=False)
        tm.assert_index_equal(result, index[::-1])

        # with nan
        index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])

        result = index.sort_values()
        expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
        tm.assert_index_equal(result, expected)

        result = index.sort_values(ascending=False)
        expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_datetime(self, tz):
        start = Timestamp("2000-01-01", tz=tz)
        dates = date_range(start=start, periods=10)
        index = IntervalIndex.from_breaks(dates)

        # test mid
        start = Timestamp("2000-01-01T12:00", tz=tz)
        expected = date_range(start=start, periods=9)
        tm.assert_index_equal(index.mid, expected)

        # __contains__ doesn't check individual points
        assert Timestamp("2000-01-01", tz=tz) not in index
        assert Timestamp("2000-01-01T12", tz=tz) not in index
        assert Timestamp("2000-01-02", tz=tz) not in index
        iv_true = Interval(Timestamp("2000-01-02", tz=tz),
                           Timestamp("2000-01-03", tz=tz))
        iv_false = Interval(Timestamp("1999-12-31", tz=tz),
                            Timestamp("2000-01-01", tz=tz))
        assert iv_true in index
        assert iv_false not in index

        # .contains does check individual points
        assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-02", tz=tz)).any()

        # test get_indexer
        start = Timestamp("1999-12-31T12:00", tz=tz)
        target = date_range(start=start, periods=7, freq="12H")
        actual = index.get_indexer(target)
        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        start = Timestamp("2000-01-08T18:00", tz=tz)
        target = date_range(start=start, periods=7, freq="6H")
        actual = index.get_indexer(target)
        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

    def test_append(self, closed):

        index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
        index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)

        result = index1.append(index2)
        expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = index1.append([index1, index2])
        expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2],
                                             [1, 2, 1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        msg = ("can only append two IntervalIndex objects that are closed "
               "on the same side")
        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2],
                                                           closed=other_closed)
            with pytest.raises(ValueError, match=msg):
                index1.append(index_other_closed)

    def test_is_non_overlapping_monotonic(self, closed):
        # Should be True in all cases
        tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        # Should be False in all cases (overlapping)
        tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False in all cases (non-monotonic)
        tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False for closed='both', otherwise True (GH16560)
        if closed == "both":
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is False
        else:
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is True

    @pytest.mark.parametrize(
        "start, shift, na_value",
        [
            (0, 1, np.nan),
            (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
            (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
        ],
    )
    def test_is_overlapping(self, start, shift, na_value, closed):
        # GH 23309
        # see test_interval_tree.py for extensive tests; interface tests here

        # non-overlapping
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in (0, 2, 4)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # non-overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # overlapping
        tuples = [(start + n * shift, start + (n + 2) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # common endpoints
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        expected = closed == "both"
        assert result is expected

        # common endpoints with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        assert result is expected

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))),
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )),
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )),
        ],
    )
    def test_to_tuples(self, tuples):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples()
        expected = Index(com.asarray_tuplesafe(tuples))
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))) + [np.nan],
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )) + [np.nan],
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )) + [np.nan],
        ],
    )
    @pytest.mark.parametrize("na_tuple", [True, False])
    def test_to_tuples_na(self, tuples, na_tuple):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples(na_tuple=na_tuple)

        # check the non-NA portion
        expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
        result_notna = result[:-1]
        tm.assert_index_equal(result_notna, expected_notna)

        # check the NA portion
        result_na = result[-1]
        if na_tuple:
            assert isinstance(result_na, tuple)
            assert len(result_na) == 2
            assert all(isna(x) for x in result_na)
        else:
            assert isna(result_na)

    def test_nbytes(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")

        result = IntervalIndex.from_arrays(left, right).nbytes
        expected = 64  # 4 * 8 * 2
        assert result == expected

    def test_itemsize(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")
        expected = 16  # 8 * 2

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = IntervalIndex.from_arrays(left, right).itemsize

        assert result == expected

    @pytest.mark.parametrize("new_closed",
                             ["left", "right", "both", "neither"])
    def test_set_closed(self, name, closed, new_closed):
        # GH 21670
        index = interval_range(0, 5, closed=closed, name=name)
        result = index.set_closed(new_closed)
        expected = interval_range(0, 5, closed=new_closed, name=name)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
    def test_set_closed_errors(self, bad_closed):
        # GH 21670
        index = interval_range(0, 5)
        msg = "invalid option for 'closed': {closed}".format(closed=bad_closed)
        with pytest.raises(ValueError, match=msg):
            index.set_closed(bad_closed)

    def test_is_all_dates(self):
        # GH 23576
        year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"),
                                pd.Timestamp("2018-01-01 00:00:00"))
        year_2017_index = pd.IntervalIndex([year_2017])
        assert not year_2017_index.is_all_dates
示例#22
0
 def test_linspace_dst_transition(self, start, mid, end):
     # GH 20976: linspace behavior defined from start/end/periods
     # accounts for the hour gained/lost during DST transition
     result = interval_range(start=start, end=end, periods=2)
     expected = IntervalIndex.from_breaks([start, mid, end])
     tm.assert_index_equal(result, expected)
示例#23
0
 def f():
     interval_range(0, Timestamp('20130101'), freq=2)
示例#24
0
    [["foo", 100], ["bar", 200]],
    index=pd.Float64Index([1.23, 2.34]),
)
st._arrow_table(df)

"## Int64Index"
df = pd.DataFrame(
    [["foo", 100], ["bar", 200]],
    index=[1, 2],
)
st._arrow_table(df)

"## IntervalIndex"
df = pd.DataFrame(
    [["foo", 100], ["bar", 200]],
    index=pd.interval_range(start=0, end=2),
)
st._arrow_table(df)

"## MultiIndex"
df = pd.DataFrame(
    [["foo", 100], ["bar", 200]],
    index=[["a", "b"], [1, 2]],
)
st._arrow_table(df)

"## PeriodIndex"
df = pd.DataFrame(
    [["foo", 100], ["bar", 200]],
    index=pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]),
)
示例#25
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("estResNumber", help="estimation result number", type=int)
    parser.add_argument("--latentToPlot", help="trial to plot", type=int, default=0)
    parser.add_argument("--neuronToPlot", help="neuron to plot", type=int, default=0)
    parser.add_argument("--trialToPlot", help="trial to plot", type=int, default=0)
    parser.add_argument("--dtCIF", help="neuron to plot", type=float, default=1.0)
    parser.add_argument("--ksTestGamma", help="gamma value for KS test", type=int, default=10)
    parser.add_argument("--nTestPoints", help="number of test points where to plot latents", type=int, default=2000)
    parser.add_argument("--location", help="location to analyze", type=int,
                        default=0)
    parser.add_argument("--trials_indices", help="trials indices to analyze",
                        default="[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]")
    parser.add_argument("--from_time", help="starting spike analysis time",
                        type=float, default=750.0)
    parser.add_argument("--to_time", help="ending spike analysis time",
                        type=float, default=2500.0)
    parser.add_argument("--min_nSpikes_perNeuron_perTrial",
                        help="min number of spikes per neuron per trial",
                        type=int, default=1)
    parser.add_argument("--data_filename", help="data filename",
                        default="~/dev/research/gatsby-swc/datasets/george20040123_hnlds.mat")
    args = parser.parse_args()

    estResNumber = args.estResNumber
    latentToPlot = args.latentToPlot
    neuronToPlot = args.neuronToPlot
    trialToPlot = args.trialToPlot
    dtCIF = args.dtCIF
    ksTestGamma = args.ksTestGamma
    nTestPoints = args.nTestPoints
    location = args.location
    trials_indices = [int(str) for str in args.trials_indices[1:-1].split(",")]
    from_time = args.from_time
    to_time = args.to_time
    min_nSpikes_perNeuron_perTrial = args.min_nSpikes_perNeuron_perTrial
    data_filename = args.data_filename

    estimResMetaDataFilename = "results/{:08d}_estimation_metaData.ini".format(estResNumber)
    modelSaveFilename = "results/{:08d}_estimatedModel.pickle".format(estResNumber)
    lowerBoundHistVsIterNoFigFilenamePattern = "figures/{:08d}_lowerBoundHistVSIterNo.{{:s}}".format(estResNumber)
    lowerBoundHistVsElapsedTimeFigFilenamePattern = "figures/{:08d}_lowerBoundHistVsElapsedTime.{{:s}}".format(estResNumber)
    latentsFigFilenamePattern = "figures/{:08d}_estimatedLatent_latent{:03d}.{{:s}}".format(estResNumber, latentToPlot)
    embeddingsFigFilenamePattern = "figures/{:08d}_estimatedEmbedding_neuron{:d}.{{:s}}".format(estResNumber, neuronToPlot)
    embeddingParamsFigFilenamePattern = "figures/{:08d}_estimatedEmbeddingParams.{{:s}}".format(estResNumber)
    CIFFigFilenamePattern = "figures/{:08d}_CIF_trial{:03d}_neuron{:03d}.{{:s}}".format(estResNumber, trialToPlot, neuronToPlot)
    ksTestTimeRescalingNumericalCorrectionFigFilename = "figures/{:08d}_ksTestTimeRescaling_numericalCorrection_trial{:03d}_neuron{:03d}.png".format(estResNumber, trialToPlot, neuronToPlot)
    rocFigFilename = "figures/{:08d}_rocAnalysis_trial{:03d}_neuron{:03d}.png".format(estResNumber, trialToPlot, neuronToPlot)
    kernelsParamsFigFilenamePattern = "figures/{:08d}_estimatedKernelsParams.{{:s}}".format(estResNumber)

    spikes_times, neurons_indices = \
            shenoyUtils.getSpikesTimes(data_filename=data_filename,
                                       trials_indices=trials_indices,
                                       location=location,
                                       from_time=from_time,
                                       to_time=to_time,
                                       min_nSpikes_perNeuron_perTrial=
                                        min_nSpikes_perNeuron_perTrial)

    estimResConfig = configparser.ConfigParser()
    estimResConfig.read(estimResMetaDataFilename)
    nLatents = int(estimResConfig["data_params"]["nLatents"])
    from_time = float(estimResConfig["data_params"]["from_time"])
    to_time = float(estimResConfig["data_params"]["to_time"])
    trials = [float(str) for str in
              estimResConfig["data_params"]["trials_indices"][1:-1].split(",")]
    nTrials = len(trials)
    trial_times = torch.arange(from_time, to_time, dtCIF)
    trial_times_numpy = trial_times.detach().numpy()

    with open(modelSaveFilename, "rb") as f: estResults = pickle.load(f)
    lowerBoundHist = estResults["lowerBoundHist"]
    elapsedTimeHist = estResults["elapsedTimeHist"]
    model = estResults["model"]
    neurons_indices = estResults["neurons_indices"]
    neuronToPlot_index = torch.nonzero(torch.tensor(neurons_indices)==neuronToPlot)
    neurons_indices_str = "".join(str(i)+" " for i in neurons_indices)
    if len(neuronToPlot_index)==0:
        raise ValueError("Neuron {:d} is not valid. Valid neurons are ".format(neuronToPlot) + neurons_indices_str)

    # plot lower bound history
    fig = plot.svGPFA.plotUtilsPlotly.getPlotLowerBoundHist(lowerBoundHist=lowerBoundHist)
    fig.write_image(lowerBoundHistVsIterNoFigFilenamePattern.format("png"))
    fig.write_html(lowerBoundHistVsIterNoFigFilenamePattern.format("html"))

    fig = plot.svGPFA.plotUtilsPlotly.getPlotLowerBoundHist(elapsedTimeHist=elapsedTimeHist, lowerBoundHist=lowerBoundHist)
    fig.write_image(lowerBoundHistVsElapsedTimeFigFilenamePattern.format("png"))
    fig.write_html(lowerBoundHistVsElapsedTimeFigFilenamePattern.format("html"))

    # plot estimated latent across trials
    testMuK, testVarK = model.predictLatents(times=trial_times)
    indPointsLocs = model.getIndPointsLocs()
    fig = plot.svGPFA.plotUtilsPlotly.getPlotLatentAcrossTrials(times=trial_times, latentsMeans=testMuK, latentsSTDs=torch.sqrt(testVarK), indPointsLocs=indPointsLocs, latentToPlot=latentToPlot, xlabel="Time (msec)")
    fig.write_image(latentsFigFilenamePattern.format("png"))
    fig.write_html(latentsFigFilenamePattern.format("html"))

    # plot embedding
    embeddingMeans, embeddingVars = model.predictEmbedding(times=trial_times)
    embeddingMeans = embeddingMeans.detach().numpy()
    embeddingVars = embeddingVars.detach().numpy()
    title = "Neuron {:d}".format(neuronToPlot)
    fig = plot.svGPFA.plotUtilsPlotly.getPlotEmbeddingAcrossTrials(times=trial_times_numpy, embeddingsMeans=embeddingMeans[:,:,neuronToPlot], embeddingsSTDs=np.sqrt(embeddingVars[:,:,neuronToPlot]), title=title)
    fig.write_image(embeddingsFigFilenamePattern.format("png"))
    fig.write_html(embeddingsFigFilenamePattern.format("html"))

    # calculate expected CIF values (for KS test and CIF plots)
    with torch.no_grad():
        ePosCIFValues = model.computeExpectedPosteriorCIFs(times=trial_times)
    spikesTimesKS = spikes_times[trialToPlot][neuronToPlot]
    cifValuesKS = ePosCIFValues[trialToPlot][neuronToPlot]
    title = "Trial {:d}, Neuron {:d} ({:d} spikes)".format(trialToPlot, neuronToPlot, len(spikesTimesKS))

    # CIF
    fig = plot.svGPFA.plotUtilsPlotly.getPlotCIF(times=trial_times, values=ePosCIFValues[trialToPlot][neuronToPlot], title=title)
    fig.write_image(CIFFigFilenamePattern.format("png"))
    fig.write_html(CIFFigFilenamePattern.format("html"))

    # plot KS test time rescaling (numerical correction)
    diffECDFsX, diffECDFsY, estECDFx, estECDFy, simECDFx, simECDFy, cb = stats.pointProcess.tests.KSTestTimeRescalingNumericalCorrection(spikesTimes=spikesTimesKS, cifTimes=trial_times, cifValues=cifValuesKS, gamma=ksTestGamma)
    plot.svGPFA.plotUtils.plotResKSTestTimeRescalingNumericalCorrection(diffECDFsX=diffECDFsX, diffECDFsY=diffECDFsY, estECDFx=estECDFx, estECDFy=estECDFy, simECDFx=simECDFx, simECDFy=simECDFy, cb=cb, figFilename=ksTestTimeRescalingNumericalCorrectionFigFilename, title=title)
    plt.close("all")

    # ROC predictive analysis
    pk = cifValuesKS*dtCIF
    bins = pd.interval_range(start=int(min(trial_times)),
                             end=int(max(trial_times)), periods=len(pk))
    cutRes, _ = pd.cut(spikesTimesKS, bins=bins, retbins=True)
    Y = torch.from_numpy(cutRes.value_counts().values)
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(Y, pk, pos_label=1)
    roc_auc = sklearn.metrics.auc(fpr, tpr)
    plot.svGPFA.plotUtils.plotResROCAnalysis(fpr=fpr, tpr=tpr, auc=roc_auc, title=title, figFilename=rocFigFilename)
    plt.close("all")

    # plot embedding parameters
    estimatedC, estimatedD = model.getSVEmbeddingParams()
    fig = plot.svGPFA.plotUtilsPlotly.getPlotEmbeddingParams(C=estimatedC.numpy(), d=estimatedD.numpy())
    fig.write_image(embeddingParamsFigFilenamePattern.format("png"))
    fig.write_html(embeddingParamsFigFilenamePattern.format("html"))

    # plot kernel parameters
    kernelsParams = model.getKernelsParams()
    kernelsTypes = [type(kernel).__name__ for kernel in model.getKernels()]
    fig = plot.svGPFA.plotUtilsPlotly.getPlotKernelsParams(
        kernelsTypes=kernelsTypes, kernelsParams=kernelsParams)
    fig.write_image(kernelsParamsFigFilenamePattern.format("png"))
    fig.write_html(kernelsParamsFigFilenamePattern.format("html"))
def test_interval_array_equal(kwargs):
    arr = interval_range(**kwargs).values
    assert_interval_array_equal(arr, arr)
示例#27
0
    def testDataSerialize(self):
        for type_, compress in itertools.product(
            (None, ) + tuple(dataserializer.SerialType.__members__.values()),
            (None, ) +
                tuple(dataserializer.CompressType.__members__.values())):
            array = np.random.rand(1000, 100)
            assert_array_equal(
                array,
                dataserializer.loads(
                    dataserializer.dumps(array,
                                         serial_type=type_,
                                         compress=compress)))

            array = np.random.rand(1000, 100)
            assert_array_equal(
                array,
                dataserializer.load(
                    BytesIO(
                        dataserializer.dumps(array,
                                             serial_type=type_,
                                             compress=compress))))

            array = np.random.rand(1000, 100).T  # test non c-contiguous
            assert_array_equal(
                array,
                dataserializer.loads(
                    dataserializer.dumps(array,
                                         serial_type=type_,
                                         compress=compress)))

            array = np.float64(0.2345)
            assert_array_equal(
                array,
                dataserializer.loads(
                    dataserializer.dumps(array,
                                         serial_type=type_,
                                         compress=compress)))

        # test non-serializable object
        if pyarrow:
            non_serial = type('non_serial', (object, ), dict(nbytes=10))
            with self.assertRaises(SerializationFailed):
                dataserializer.dumps(non_serial())

        # test structured arrays.
        rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'), ('c', '<U8')])
        array = np.ones((100, ), dtype=rec_dtype)
        array_loaded = dataserializer.loads(dataserializer.dumps(array))
        self.assertEqual(array.dtype, array_loaded.dtype)
        assert_array_equal(array, array_loaded)

        fn = os.path.join(tempfile.gettempdir(),
                          f'test_dump_file_{id(self)}.bin')
        try:
            array = np.random.rand(1000, 100).T  # test non c-contiguous
            with open(fn, 'wb') as dump_file:
                dataserializer.dump(array, dump_file)
            with open(fn, 'rb') as dump_file:
                assert_array_equal(array, dataserializer.load(dump_file))

            with open(fn, 'wb') as dump_file:
                dataserializer.dump(array,
                                    dump_file,
                                    compress=dataserializer.CompressType.LZ4)
            with open(fn, 'rb') as dump_file:
                assert_array_equal(array, dataserializer.load(dump_file))

            with open(fn, 'wb') as dump_file:
                dataserializer.dump(array,
                                    dump_file,
                                    compress=dataserializer.CompressType.GZIP)
            with open(fn, 'rb') as dump_file:
                assert_array_equal(array, dataserializer.load(dump_file))
        finally:
            if os.path.exists(fn):
                os.unlink(fn)

        # test sparse
        if sps:
            mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr'))
            des_mat = dataserializer.loads(dataserializer.dumps(mat))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            des_mat = dataserializer.loads(
                dataserializer.dumps(mat,
                                     compress=dataserializer.CompressType.LZ4))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            des_mat = dataserializer.loads(
                dataserializer.dumps(
                    mat, compress=dataserializer.CompressType.GZIP))
            self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)

            vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)),
                                         shape=(2, ))
            des_vector = dataserializer.loads(dataserializer.dumps(vector))
            self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0)

            des_vector = dataserializer.loads(
                dataserializer.dumps(vector,
                                     compress=dataserializer.CompressType.LZ4))
            self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0)

            des_vector = dataserializer.loads(
                dataserializer.dumps(
                    vector, compress=dataserializer.CompressType.GZIP))
            self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0)

        # test groupby
        df1 = pd.DataFrame({
            'a': [3, 4, 5, 3, 5, 4, 1, 2, 3],
            'b': [1, 3, 4, 5, 6, 5, 4, 4, 4],
            'c': list('aabaaddce')
        })
        grouped = wrapped_groupby(df1, 'b')
        restored = dataserializer.loads(dataserializer.dumps(grouped))
        assert_groupby_equal(grouped, restored.groupby_obj)

        grouped = wrapped_groupby(df1, 'b').c
        restored = dataserializer.loads(dataserializer.dumps(grouped))
        assert_groupby_equal(grouped, restored.groupby_obj)

        grouped = wrapped_groupby(df1, 'b')
        getattr(grouped, 'indices')
        restored = dataserializer.loads(dataserializer.dumps(grouped))
        assert_groupby_equal(grouped, restored.groupby_obj)

        grouped = wrapped_groupby(df1.b, lambda x: x % 2)
        restored = dataserializer.loads(dataserializer.dumps(grouped))
        assert_groupby_equal(grouped, restored.groupby_obj)

        grouped = wrapped_groupby(df1.b, lambda x: x % 2)
        getattr(grouped, 'indices')
        restored = dataserializer.loads(dataserializer.dumps(grouped))
        assert_groupby_equal(grouped, restored.groupby_obj)

        # test categorical
        s = np.random.RandomState(0).random(10)
        cat = pd.cut(s, [0.3, 0.5, 0.8])
        self.assertIsInstance(cat, pd.Categorical)
        des_cat = dataserializer.loads(dataserializer.dumps(cat))
        self.assertEqual(len(cat), len(des_cat))
        for c, dc in zip(cat, des_cat):
            np.testing.assert_equal(c, dc)

        # test IntervalIndex
        s = pd.interval_range(10, 100, 3)
        dest_s = dataserializer.loads((dataserializer.dumps(s)))
        pd.testing.assert_index_equal(s, dest_s)

        # test complex
        s = complex(10 + 5j)
        dest_s = dataserializer.loads((dataserializer.dumps(s)))
        self.assertIs(type(s), type(dest_s))
        self.assertEqual(s, dest_s)

        s = np.complex64(10 + 5j)
        dest_s = dataserializer.loads((dataserializer.dumps(s)))
        self.assertIs(type(s), type(dest_s))
        self.assertEqual(s, dest_s)

        # test ArrowArray
        df = pd.DataFrame({
            'a': ['s1', 's2', 's3'],
            'b': [['s1', 's2'], ['s3'], ['s4', 's5']]
        })
        df['a'] = df['a'].astype(ArrowStringDtype())
        df['b'] = df['b'].astype(ArrowListDtype(str))
        dest_df = dataserializer.loads(dataserializer.dumps(df))
        self.assertIs(type(df), type(dest_df))
        pd.testing.assert_frame_equal(df, dest_df)

        # test DataFrame with SparseDtype
        s = pd.Series([1, 2, np.nan, np.nan,
                       3]).astype(pd.SparseDtype(np.dtype(np.float64), np.nan))
        dest_s = dataserializer.loads((dataserializer.dumps(s)))
        pd.testing.assert_series_equal(s, dest_s)
        df = pd.DataFrame({'s': s})
        dest_df = dataserializer.loads((dataserializer.dumps(df)))
        pd.testing.assert_frame_equal(df, dest_df)
示例#28
0
 def test_construction(self):
     result = interval_range(0, 5, name='foo', closed='both')
     expected = IntervalIndex.from_breaks(np.arange(0, 5),
                                          name='foo',
                                          closed='both')
     tm.assert_index_equal(result, expected)
percentage_half_more = 100 - percentile_rank_half_less

## 7. Finding Percentiles with pandas ##

wnba = pd.read_csv('wnba.csv')
age_upper_quartile = wnba["Age"].describe()[6]
age_middle_quartile = wnba["Age"].describe()[5]
age_95th_percentile = wnba["Age"].describe(percentiles=[.95])[5]

question1 = True
question2 = False
question3 = True

## 8. Grouped Frequency Distribution Tables ##

wnba = pd.read_csv('wnba.csv')

grouped_freq_table = wnba["PTS"].value_counts(
    bins=10, normalize=True).sort_index(ascending=False) * 100

## 10. Readability for Grouped Frequency Tables ##

wnba = pd.read_csv('wnba.csv')
interval = pd.interval_range(start=0, end=600, freq=60)
gr_freq_table_10 = pd.Series([0 for _ in range(10)], index=interval)

for row in wnba["PTS"]:
    for i in interval:
        if row in i:
            gr_freq_table_10.loc[i] += 1
            break
示例#30
0
class TestIntervalIndex:
    index = IntervalIndex.from_arrays([0, 1], [1, 2])

    def create_index(self, closed="right"):
        return IntervalIndex.from_breaks(range(11), closed=closed)

    def create_index_with_nan(self, closed="right"):
        mask = [True, False] + [True] * 8
        return IntervalIndex.from_arrays(
            np.where(mask, np.arange(10), np.nan),
            np.where(mask, np.arange(1, 11), np.nan),
            closed=closed,
        )

    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [
            Interval(l, r, closed) if notna(l) else np.nan
            for l, r in zip(expected_left, expected_right)
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
            [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
            pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
            pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]),
        ],
    )
    def test_length(self, closed, breaks):
        # GH 18789
        index = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.length
        expected = Index(iv.length for iv in index)
        tm.assert_index_equal(result, expected)

        # with NA
        index = index.insert(1, np.nan)
        result = index.length
        expected = Index(iv.length if notna(iv) else iv for iv in index)
        tm.assert_index_equal(result, expected)

    def test_with_nans(self, closed):
        index = self.create_index(closed=closed)
        assert index.hasnans is False

        result = index.isna()
        expected = np.zeros(len(index), dtype=bool)
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.ones(len(index), dtype=bool)
        tm.assert_numpy_array_equal(result, expected)

        index = self.create_index_with_nan(closed=closed)
        assert index.hasnans is True

        result = index.isna()
        expected = np.array([False, True] + [False] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.array([True, False] + [True] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

    def test_copy(self, closed):
        expected = self.create_index(closed=closed)

        result = expected.copy()
        assert result.equals(expected)

        result = expected.copy(deep=True)
        assert result.equals(expected)
        assert result.left is not expected.left

    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="same")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="same")

        # by-definition make a copy
        result = IntervalIndex(np.array(index), copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same="copy")
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same="copy")

    def test_delete(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
        result = self.create_index(closed=closed).delete(0)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "data",
        [
            interval_range(0, periods=10, closed="neither"),
            interval_range(1.7, periods=8, freq=2.5, closed="both"),
            interval_range(Timestamp("20170101"), periods=12, closed="left"),
            interval_range(Timedelta("1 day"), periods=6, closed="right"),
        ],
    )
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        msg = "can only insert Interval objects and NA into an IntervalIndex"
        with pytest.raises(ValueError, match=msg):
            data.insert(1, "foo")

        # invalid closed
        msg = "inserted item must be closed on the same side as the index"
        for closed in {"left", "right", "both", "neither"} - {item.closed}:
            with pytest.raises(ValueError, match=msg):
                bad_item = Interval(item.left, item.right, closed=closed)
                data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in (np.nan, pd.NaT, None):
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)

    def test_is_unique_interval(self, closed):
        """
        Interval specific tests for is_unique in addition to base class tests
        """
        # unique overlapping - distinct endpoints
        idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
        assert idx.is_unique is True

        # unique overlapping - shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_unique is True

        # unique nested
        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
        assert idx.is_unique is True

    def test_monotonic(self, closed):
        # increasing non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing non-overlapping
        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping
        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping
        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # unordered overlapping
        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # increasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is False
        assert idx._is_strictly_monotonic_decreasing is False

        # decreasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)],
                                           closed=closed)
        assert idx.is_monotonic is False
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

        # stationary
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is False
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is False

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_monotonic is True
        assert idx._is_strictly_monotonic_increasing is True
        assert idx.is_monotonic_decreasing is True
        assert idx._is_strictly_monotonic_decreasing is True

    def test_get_item(self, closed):
        i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                      closed=closed)
        assert i[0] == Interval(0.0, 1.0, closed=closed)
        assert i[1] == Interval(1.0, 2.0, closed=closed)
        assert isna(i[2])

        result = i[0:1]
        expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[0:2]
        expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[1:3]
        expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("20180101", periods=4),
            date_range("20180101", periods=4, tz="US/Eastern"),
            timedelta_range("0 days", periods=4),
        ],
        ids=lambda x: str(x.dtype),
    )
    def test_maybe_convert_i8(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        # intervalindex
        result = index._maybe_convert_i8(index)
        expected = IntervalIndex.from_breaks(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # interval
        interval = Interval(breaks[0], breaks[1])
        result = index._maybe_convert_i8(interval)
        expected = Interval(breaks[0].value, breaks[1].value)
        assert result == expected

        # datetimelike index
        result = index._maybe_convert_i8(breaks)
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

        # datetimelike scalar
        result = index._maybe_convert_i8(breaks[0])
        expected = breaks[0].value
        assert result == expected

        # list-like of datetimelike scalars
        result = index._maybe_convert_i8(list(breaks))
        expected = Index(breaks.asi8)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [
            date_range("2018-01-01", periods=5),
            timedelta_range("0 days", periods=5)
        ],
    )
    def test_maybe_convert_i8_nat(self, breaks):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)

        to_convert = breaks._constructor([pd.NaT] * 3)
        expected = pd.Float64Index([np.nan] * 3)
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

        to_convert = to_convert.insert(0, breaks[0])
        expected = expected.insert(0, float(breaks[0].value))
        result = index._maybe_convert_i8(to_convert)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "breaks",
        [np.arange(5, dtype="int64"),
         np.arange(5, dtype="float64")],
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_numeric(self, breaks, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks)
        key = make_key(breaks)

        # no conversion occurs for numeric
        result = index._maybe_convert_i8(key)
        assert result is key

    @pytest.mark.parametrize(
        "breaks1, breaks2",
        permutations(
            [
                date_range("20180101", periods=4),
                date_range("20180101", periods=4, tz="US/Eastern"),
                timedelta_range("0 days", periods=4),
            ],
            2,
        ),
        ids=lambda x: str(x.dtype),
    )
    @pytest.mark.parametrize(
        "make_key",
        [
            IntervalIndex.from_breaks,
            lambda breaks: Interval(breaks[0], breaks[1]),
            lambda breaks: breaks,
            lambda breaks: breaks[0],
            list,
        ],
        ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
    )
    def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
        # GH 20636
        index = IntervalIndex.from_breaks(breaks1)
        key = make_key(breaks2)

        msg = (
            f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
            f"values of dtype {breaks2.dtype}")
        msg = re.escape(msg)
        with pytest.raises(ValueError, match=msg):
            index._maybe_convert_i8(key)

    def test_contains_method(self):
        # can select values that are IN the range of a value
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        expected = np.array([False, False], dtype="bool")
        actual = i.contains(0)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(3)
        tm.assert_numpy_array_equal(actual, expected)

        expected = np.array([True, False], dtype="bool")
        actual = i.contains(0.5)
        tm.assert_numpy_array_equal(actual, expected)
        actual = i.contains(1)
        tm.assert_numpy_array_equal(actual, expected)

        # __contains__ not implemented for "interval in interval", follow
        # that for the contains method for now
        with pytest.raises(NotImplementedError,
                           match="contains not implemented for two"):
            i.contains(Interval(0, 1))

    def test_contains_dunder(self):

        index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")

        # __contains__ requires perfect matches to intervals.
        assert 0 not in index
        assert 1 not in index
        assert 2 not in index

        assert Interval(0, 1, closed="right") in index
        assert Interval(0, 2, closed="right") not in index
        assert Interval(0, 0.5, closed="right") not in index
        assert Interval(3, 5, closed="right") not in index
        assert Interval(-1, 0, closed="left") not in index
        assert Interval(0, 1, closed="left") not in index
        assert Interval(0, 1, closed="both") not in index

    def test_dropna(self, closed):

        expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)],
                                             closed=closed)

        ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

        ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan],
                                       closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

    def test_non_contiguous(self, closed):
        index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
        target = [0.5, 1.5, 2.5]
        actual = index.get_indexer(target)
        expected = np.array([0, -1, 1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        assert 1.5 not in index

    def test_isin(self, closed):
        index = self.create_index(closed=closed)

        expected = np.array([True] + [False] * (len(index) - 1))
        result = index.isin(index[:1])
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin([index[0]])
        tm.assert_numpy_array_equal(result, expected)

        other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
        expected = np.array([True] * (len(index) - 1) + [False])
        result = index.isin(other)
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin(other.tolist())
        tm.assert_numpy_array_equal(result, expected)

        for other_closed in {"right", "left", "both", "neither"}:
            other = self.create_index(closed=other_closed)
            expected = np.repeat(closed == other_closed, len(index))
            result = index.isin(other)
            tm.assert_numpy_array_equal(result, expected)

            result = index.isin(other.tolist())
            tm.assert_numpy_array_equal(result, expected)

    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        msg = ("not supported between instances of 'int' and "
               "'pandas._libs.interval.Interval'")
        with pytest.raises(TypeError, match=msg):
            self.index > 0
        with pytest.raises(TypeError, match=msg):
            self.index <= 0
        with pytest.raises(TypeError, match=msg):
            self.index > np.arange(2)

        msg = "Lengths must match to compare"
        with pytest.raises(ValueError, match=msg):
            self.index > np.arange(3)

    def test_missing_values(self, closed):
        idx = Index([
            np.nan,
            Interval(0, 1, closed=closed),
            Interval(1, 2, closed=closed)
        ])
        idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2],
                                         closed=closed)
        assert idx.equals(idx2)

        msg = ("missing values must be missing in the same location both left "
               "and right sides")
        with pytest.raises(ValueError, match=msg):
            IntervalIndex.from_arrays([np.nan, 0, 1],
                                      np.array([0, 1, 2]),
                                      closed=closed)

        tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))

    def test_sort_values(self, closed):
        index = self.create_index(closed=closed)

        result = index.sort_values()
        tm.assert_index_equal(result, index)

        result = index.sort_values(ascending=False)
        tm.assert_index_equal(result, index[::-1])

        # with nan
        index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])

        result = index.sort_values()
        expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
        tm.assert_index_equal(result, expected)

        result = index.sort_values(ascending=False)
        expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("tz", [None, "US/Eastern"])
    def test_datetime(self, tz):
        start = Timestamp("2000-01-01", tz=tz)
        dates = date_range(start=start, periods=10)
        index = IntervalIndex.from_breaks(dates)

        # test mid
        start = Timestamp("2000-01-01T12:00", tz=tz)
        expected = date_range(start=start, periods=9)
        tm.assert_index_equal(index.mid, expected)

        # __contains__ doesn't check individual points
        assert Timestamp("2000-01-01", tz=tz) not in index
        assert Timestamp("2000-01-01T12", tz=tz) not in index
        assert Timestamp("2000-01-02", tz=tz) not in index
        iv_true = Interval(Timestamp("2000-01-02", tz=tz),
                           Timestamp("2000-01-03", tz=tz))
        iv_false = Interval(Timestamp("1999-12-31", tz=tz),
                            Timestamp("2000-01-01", tz=tz))
        assert iv_true in index
        assert iv_false not in index

        # .contains does check individual points
        assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
        assert index.contains(Timestamp("2000-01-02", tz=tz)).any()

        # test get_indexer
        start = Timestamp("1999-12-31T12:00", tz=tz)
        target = date_range(start=start, periods=7, freq="12H")
        actual = index.get_indexer(target)
        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

        start = Timestamp("2000-01-08T18:00", tz=tz)
        target = date_range(start=start, periods=7, freq="6H")
        actual = index.get_indexer(target)
        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
        tm.assert_numpy_array_equal(actual, expected)

    def test_append(self, closed):

        index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
        index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)

        result = index1.append(index2)
        expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = index1.append([index1, index2])
        expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2],
                                             [1, 2, 1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        msg = "Intervals must all be closed on the same side"
        for other_closed in {"left", "right", "both", "neither"} - {closed}:
            index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2],
                                                           closed=other_closed)
            with pytest.raises(ValueError, match=msg):
                index1.append(index_other_closed)

    def test_is_non_overlapping_monotonic(self, closed):
        # Should be True in all cases
        tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        # Should be False in all cases (overlapping)
        tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False in all cases (non-monotonic)
        tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False for closed='both', otherwise True (GH16560)
        if closed == "both":
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is False
        else:
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is True

    @pytest.mark.parametrize(
        "start, shift, na_value",
        [
            (0, 1, np.nan),
            (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
            (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
        ],
    )
    def test_is_overlapping(self, start, shift, na_value, closed):
        # GH 23309
        # see test_interval_tree.py for extensive tests; interface tests here

        # non-overlapping
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in (0, 2, 4)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # non-overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is False

        # overlapping
        tuples = [(start + n * shift, start + (n + 2) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # overlapping with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        assert index.is_overlapping is True

        # common endpoints
        tuples = [(start + n * shift, start + (n + 1) * shift)
                  for n in range(3)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        expected = closed == "both"
        assert result is expected

        # common endpoints with NA
        tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
        index = IntervalIndex.from_tuples(tuples, closed=closed)
        result = index.is_overlapping
        assert result is expected

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))),
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )),
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )),
        ],
    )
    def test_to_tuples(self, tuples):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples()
        expected = Index(com.asarray_tuplesafe(tuples))
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "tuples",
        [
            list(zip(range(10), range(1, 11))) + [np.nan],
            list(
                zip(
                    date_range("20170101", periods=10),
                    date_range("20170101", periods=10),
                )) + [np.nan],
            list(
                zip(
                    timedelta_range("0 days", periods=10),
                    timedelta_range("1 day", periods=10),
                )) + [np.nan],
        ],
    )
    @pytest.mark.parametrize("na_tuple", [True, False])
    def test_to_tuples_na(self, tuples, na_tuple):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples(na_tuple=na_tuple)

        # check the non-NA portion
        expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
        result_notna = result[:-1]
        tm.assert_index_equal(result_notna, expected_notna)

        # check the NA portion
        result_na = result[-1]
        if na_tuple:
            assert isinstance(result_na, tuple)
            assert len(result_na) == 2
            assert all(isna(x) for x in result_na)
        else:
            assert isna(result_na)

    def test_nbytes(self):
        # GH 19209
        left = np.arange(0, 4, dtype="i8")
        right = np.arange(1, 5, dtype="i8")

        result = IntervalIndex.from_arrays(left, right).nbytes
        expected = 64  # 4 * 8 * 2
        assert result == expected

    @pytest.mark.parametrize("new_closed",
                             ["left", "right", "both", "neither"])
    def test_set_closed(self, name, closed, new_closed):
        # GH 21670
        index = interval_range(0, 5, closed=closed, name=name)
        result = index.set_closed(new_closed)
        expected = interval_range(0, 5, closed=new_closed, name=name)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
    def test_set_closed_errors(self, bad_closed):
        # GH 21670
        index = interval_range(0, 5)
        msg = f"invalid option for 'closed': {bad_closed}"
        with pytest.raises(ValueError, match=msg):
            index.set_closed(bad_closed)

    def test_is_all_dates(self):
        # GH 23576
        year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"),
                                pd.Timestamp("2018-01-01 00:00:00"))
        year_2017_index = pd.IntervalIndex([year_2017])
        assert not year_2017_index.is_all_dates

    @pytest.mark.parametrize("key", [[5], (2, 3)])
    def test_get_value_non_scalar_errors(self, key):
        # GH 31117
        idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10),
                                         (3, 10)])
        s = pd.Series(range(len(idx)), index=idx)

        msg = str(key)
        with pytest.raises(InvalidIndexError, match=msg):
            with tm.assert_produces_warning(FutureWarning):
                idx.get_value(s, key)
示例#31
0
 def f():
     interval_range(periods=2)
(the prefix "uni-" means "only one").
When a distribution has more than two modes, we say that the distribution is multimodal 
(the prefix "multi-" means many).
We can also have cases when there is no mode at all.
conside the no of bedroom in houses:-   [1, 1, 2, 2, 3, 3, 4, 4]
Each unique value occurs twice in the distribution above, so there's no value 
(or values) that occurs more often than others. For this reason, this distribution 
doesn't have a mode. Contextually, we could say that there's no typical house 
on the market with respect to the number of bedrooms.
The workaround is to organize the continuous variable in a grouped frequency table, 
and select for the mode the midpoint of the class interval (the bin) with the highest 
frequency. This method has its limitations, but it generally gives reasonable answers
'''

# frequency table
intervals = pd.interval_range(start=0, end=800000, freq=100000)
gr_freq_table = pd.Series([0, 0, 0, 0, 0, 0, 0, 0], index=intervals)

for value in houses['SalePrice']:
    for interval in intervals:
        if value in interval:
            gr_freq_table.loc[interval] += 1
            break

print(gr_freq_table)
mode = 150000
mean = houses['SalePrice'].mean()
median = houses['SalePrice'].median()
'''
When we plot a histogram or a kernel density plot to visualize the shape of a distribution,
the mode will always be the peak of the distribution
示例#33
0
labels = model_config_dict["mappings"]["labels"]
print(labels)

if body_cam is None:
    raise RuntimeError("Error initializing body camera")

decimate = 20
max_dist = 4000.0
height = 400.0
width = 640.0
cx = width / decimate / 2
cy = height / decimate / 2
fx = 1.4  # values found by measuring known sized objects at known distances
fy = 2.05

x_bins = pd.interval_range(start=-2000, end=2000, periods=40)
y_bins = pd.interval_range(start=0, end=800, periods=8)

while True:  # main loop until 'q' is pressed

    nnet_packets, data_packets = body_cam.get_available_nnet_and_data_packets()

    for nnet_packet in nnet_packets:
        detections = list(nnet_packet.getDetectedObjects())
        for detection in detections:
            if detection.label == 5:  # we're looking for a bottle...
                print('Bottle is ' + '{:.2f}'.format(detection.depth_z) +
                      'm away.')

    for packet in data_packets:
示例#34
0
 def f():
     interval_range(0, 10, freq=Timedelta('1day'))
示例#35
0
 def test_subtype_integer_errors(self):
     # int64 -> uint64 fails with negative values
     index = interval_range(-10, 10)
     dtype = IntervalDtype('uint64')
     with pytest.raises(ValueError):
         index.astype(dtype)
def create_data():
    """ create the pickle data """
    data = {
        "A": [0.0, 1.0, 2.0, 3.0, np.nan],
        "B": [0, 1, 0, 1, 0],
        "C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
        "D": date_range("1/1/2009", periods=5),
        "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0],
    }

    scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M"))

    index = dict(
        int=Index(np.arange(10)),
        date=date_range("20130101", periods=10),
        period=period_range("2013-01-01", freq="M", periods=10),
        float=Index(np.arange(10, dtype=np.float64)),
        uint=Index(np.arange(10, dtype=np.uint64)),
        timedelta=timedelta_range("00:00:00", freq="30T", periods=10),
    )

    index["range"] = RangeIndex(10)

    if _loose_version >= LooseVersion("0.21"):
        from pandas import interval_range

        index["interval"] = interval_range(0, periods=10)

    mi = dict(
        reg2=MultiIndex.from_tuples(
            tuple(
                zip(
                    *[
                        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
                        ["one", "two", "one", "two", "one", "two", "one", "two"],
                    ]
                )
            ),
            names=["first", "second"],
        )
    )

    series = dict(
        float=Series(data["A"]),
        int=Series(data["B"]),
        mixed=Series(data["E"]),
        ts=Series(
            np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)
        ),
        mi=Series(
            np.arange(5).astype(np.float64),
            index=MultiIndex.from_tuples(
                tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]
            ),
        ),
        dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]),
        cat=Series(Categorical(["foo", "bar", "baz"])),
        dt=Series(date_range("20130101", periods=5)),
        dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")),
        period=Series([Period("2000Q1")] * 5),
    )

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list("ABCDA")
    frame = dict(
        float=DataFrame({"A": series["float"], "B": series["float"] + 1}),
        int=DataFrame({"A": series["int"], "B": series["int"] + 1}),
        mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}),
        mi=DataFrame(
            {"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)},
            index=MultiIndex.from_tuples(
                tuple(
                    zip(
                        *[
                            ["bar", "bar", "baz", "baz", "baz"],
                            ["one", "two", "one", "two", "three"],
                        ]
                    )
                ),
                names=["first", "second"],
            ),
        ),
        dup=DataFrame(
            np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]
        ),
        cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}),
        cat_and_float=DataFrame(
            {
                "A": Categorical(["foo", "bar", "baz"]),
                "B": np.arange(3).astype(np.int64),
            }
        ),
        mixed_dup=mixed_dup_df,
        dt_mixed_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
            },
            index=range(5),
        ),
        dt_mixed2_tzs=DataFrame(
            {
                "A": Timestamp("20130102", tz="US/Eastern"),
                "B": Timestamp("20130603", tz="CET"),
                "C": Timestamp("20130603", tz="UTC"),
            },
            index=range(5),
        ),
    )

    cat = dict(
        int8=Categorical(list("abcdefg")),
        int16=Categorical(np.arange(1000)),
        int32=Categorical(np.arange(10000)),
    )

    timestamp = dict(
        normal=Timestamp("2011-01-01"),
        nat=NaT,
        tz=Timestamp("2011-01-01", tz="US/Eastern"),
    )

    timestamp["freq"] = Timestamp("2011-01-01", freq="D")
    timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M")

    off = {
        "DateOffset": DateOffset(years=1),
        "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824),
        "BusinessDay": BusinessDay(offset=timedelta(seconds=9)),
        "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"),
        "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"),
        "SemiMonthBegin": SemiMonthBegin(day_of_month=9),
        "SemiMonthEnd": SemiMonthEnd(day_of_month=24),
        "MonthBegin": MonthBegin(1),
        "MonthEnd": MonthEnd(1),
        "QuarterBegin": QuarterBegin(1),
        "QuarterEnd": QuarterEnd(1),
        "Day": Day(1),
        "YearBegin": YearBegin(1),
        "YearEnd": YearEnd(1),
        "Week": Week(1),
        "Week_Tues": Week(2, normalize=False, weekday=1),
        "WeekOfMonth": WeekOfMonth(week=3, weekday=4),
        "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3),
        "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
        "Easter": Easter(),
        "Hour": Hour(1),
        "Minute": Minute(1),
    }

    return dict(
        series=series,
        frame=frame,
        index=index,
        scalars=scalars,
        mi=mi,
        sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()),
        sp_frame=dict(float=_create_sp_frame()),
        cat=cat,
        timestamp=timestamp,
        offsets=off,
    )
def test_interval_array_equal(kwargs):
    arr = interval_range(**kwargs).values
    assert_interval_array_equal(arr, arr)
示例#38
0
this_dir = os.path.dirname(os.path.realpath(__file__))
config = toml.load(os.path.join(this_dir, 'config.toml'))
u.set_full_paths(config, this_dir)
csv_loc = config['file_locations']['data']

df: pd.DataFrame = pd.read_csv(csv_loc)  # type: ignore
df.drop('should_delete', axis=1, inplace=True)
df = df.convert_dtypes()
df.date = pd.to_datetime(df.date)

pd.set_option('display.max_rows', df.shape[0] + 1)
pd.set_option('display.max_columns', 175)

date_bins = pd.interval_range(start=pd.Timestamp('2020-07-01'),
                              periods=10,
                              freq='MS')
df['date_range'] = pd.cut(df.date, date_bins)
df['muscle_weight'] = df.morning_weight * df.muscle_mass_percentage / 100
df['body_fat_weight'] = df.morning_weight * df.body_fat_percentage / 100
df['other_weight'] = df.morning_weight - df.muscle_weight - df.body_fat_weight
weights = Enumerable(df.drop(
    'morning_weight',
    axis=1).columns).where(lambda c: re.match('.*_weight', c, re.I)).to_list()
# exps = Enumerable(df.columns).where(lambda c: re.match('.*_exp',c,re.I)).to_list()
mdf: pd.DataFrame = pd.melt(df,
                            id_vars=['date'],
                            value_vars=weights,
                            value_name='lbs',
                            var_name='weight_category')  # type: ignore
mdf.dropna(inplace=True)
示例#39
0
c
c.categories
c.value_counts()

pd.cut(np.random.randn(100), bins=c.categories).value_counts()

#%%
#%% Generating ranges of intervals
"""
If we need intervals on a regular frequency, we can use the interval_range() function
to create an IntervalIndex using various combinations of start, end, and periods.
The default frequency for interval_range is a 1 for numeric intervals,
and calendar day for datetime-like intervals:
"""

pd.interval_range(start=0, end=5)
pd.interval_range(start=pd.Timestamp('2020-05-04'), periods=4)
pd.interval_range(end=pd.Timedelta('3 days'), periods=3)

"""
Specifying start, end, and periods will generate a range of evenly spaced intervals
from start to end inclusively,
with periods number of elements in the resulting IntervalIndex:
"""
pd.interval_range(start=0, end=6, periods=4)
pd.interval_range(pd.Timestamp('2018-01-01'),
                  pd.Timestamp('2018-02-28'), periods=3)

"""
Additionally, the closed parameter can be used to specify which side(s)
the intervals are closed on.
示例#40
0
    tm.assert_frame_equal(result, expected, check_exact=True)


def test_func_duplicates_raises():
    # GH28426
    msg = "Function names"
    df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
    with pytest.raises(SpecificationError, match=msg):
        df.groupby("A").agg(["min", "min"])


@pytest.mark.parametrize(
    "index",
    [
        pd.CategoricalIndex(list("abc")),
        pd.interval_range(0, 3),
        pd.period_range("2020", periods=3, freq="D"),
        pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
    ],
)
def test_agg_index_has_complex_internals(index):
    # GH 31223
    df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index)
    result = df.groupby("group").agg({"value": Series.nunique})
    expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group")
    tm.assert_frame_equal(result, expected)


def test_agg_split_block():
    # https://github.com/pandas-dev/pandas/issues/31522
    df = pd.DataFrame(
def recording(row_begining,col_begining,df):
    s1=pd.DataFrame(df.iloc[row_begining:row_begining+51,[col_begining,col_begining+1]])
    s1.columns=['score','number']
    s2=pd.DataFrame(df.iloc[row_begining:row_begining+50,[col_begining+7,col_begining+8]])
    s2.columns=['score','number']
    return pd.concat([s1,s2],axis=0,ignore_index=True)


AllScores={}

subjects=['Chinese','English','MathA','MathB','Chemistry','Physics','Biology','History','Geography','Civics']

read_excel_rows=np.arange(3,3+55*len(subjects),55)
for name_num in range(len(file_name)):
    read_score=pd.read_excel(file_name[name_num])
    Scores_temp=pd.DataFrame(index=pd.IntervalIndex(pd.interval_range(0.,101.,101),closed='left')[::-1])
    for i in range(len(subjects)):
        Scores_temp[subjects[i]]=recording(read_excel_rows[i],0,read_score).number.values
    Scores_temp=Scores_temp.astype(int)
    AllScores[file_num[name_num]]=Scores_temp
del read_excel_rows,read_score,file_name,Scores_temp,recording



'---------------------------- Important DataFrames ----------------------------'


AllCum={}
for key in AllScores.keys():
    summation=AllScores[key].sum(axis=0)
    AllCum[key]=(AllScores[key]/summation).cumsum(axis=0)
示例#42
0
def binning2index(binning):
    if binning is None:
        return pandas.CategoricalIndex(["all"])

    elif isinstance(binning, IntegerBinning):
        if (binning.loc_underflow == BinLocation.nonexistent
                and binning.loc_overflow == BinLocation.nonexistent):
            return pandas.RangeIndex(binning.min, binning.max + 1)
        else:
            return binning2index(binning.toCategoryBinning())

    elif isinstance(binning, RegularBinning):
        if binning.overflow is None or (
                binning.overflow.loc_underflow == BinLocation.nonexistent
                and binning.overflow.loc_overflow == BinLocation.nonexistent
                and binning.overflow.loc_nanflow == BinLocation.nonexistent):
            return pandas.interval_range(
                binning.interval.low,
                binning.interval.high,
                binning.num,
                closed=("left" if binning.interval.low_inclusive else "right"),
            )
        elif (binning.overflow is None
              or binning.overflow.loc_nanflow == BinLocation.nonexistent):
            return binning2index(binning.toEdgesBinning())
        else:
            return binning2index(binning.toCategoryBinning())

    elif isinstance(binning, HexagonalBinning):
        raise NotImplementedError

    elif isinstance(binning, EdgesBinning):
        if (binning.overflow is None
                or binning.overflow.loc_nanflow == BinLocation.nonexistent):
            if binning.overflow is None or (binning.overflow.loc_underflow
                                            == BinLocation.nonexistent
                                            and binning.overflow.loc_overflow
                                            == BinLocation.nonexistent):
                return pandas.IntervalIndex.from_breaks(
                    binning.edges,
                    closed=("left" if binning.low_inclusive else "right"))
            elif (binning.overflow is not None
                  and binning.overflow.loc_underflow.value <=
                  BinLocation.nonexistent.value
                  and binning.overflow.loc_overflow.value >=
                  BinLocation.nonexistent.value):
                edges = numpy.empty(binning._binshape()[0] + 1,
                                    dtype=numpy.float64)
                shift = int(
                    binning.overflow.loc_underflow != BinLocation.nonexistent)
                edges[shift:shift + len(binning.edges)] = binning.edges
                if binning.overflow.loc_underflow != BinLocation.nonexistent:
                    edges[0] = -numpy.inf
                if binning.overflow.loc_overflow != BinLocation.nonexistent:
                    edges[-1] = numpy.inf
                return pandas.IntervalIndex.from_breaks(
                    edges,
                    closed=("left" if binning.low_inclusive else "right"))
            else:
                return binning2index(binning.toIrregularBinning())
        else:
            return binning2index(binning.toCategoryBinning())

    elif isinstance(binning, IrregularBinning):
        if ((binning.overflow is None
             or binning.overflow.loc_nanflow == BinLocation.nonexistent)
                and len(binning.intervals) != 0
                and binning.intervals[0].low_inclusive !=
                binning.intervals[0].high_inclusive and all(
                    x.low_inclusive == binning.intervals[0].low_inclusive
                    and x.high_inclusive == binning.intervals[0].high_inclusive
                    for x in binning.intervals)):
            left = numpy.empty(binning._binshape(), dtype=numpy.float64)
            right = numpy.empty(binning._binshape(), dtype=numpy.float64)
            flows = ([] if binning.overflow is None else [
                (binning.overflow.loc_underflow, -numpy.inf),
                (binning.overflow.loc_overflow, numpy.inf),
            ])
            low = numpy.inf
            high = -numpy.inf
            for interval in binning.intervals:
                if interval.low <= low:
                    low = interval.low
                if interval.high >= high:
                    high = interval.high
            i = 0
            for loc, val in BinLocation._belows(flows):
                if val == -numpy.inf:
                    left[i], right[i] = val, low
                if val == numpy.inf:
                    left[i], right[i] = high, val
                i += 1
            for interval in binning.intervals:
                left[i] = interval.low
                right[i] = interval.high
                i += 1
            for loc, val in BinLocation._aboves(flows):
                if val == -numpy.inf:
                    left[i], right[i] = val, low
                if val == numpy.inf:
                    left[i], right[i] = high, val
                i += 1
            return pandas.IntervalIndex.from_arrays(
                left,
                right,
                closed=("left"
                        if binning.intervals[0].low_inclusive else "right"),
            )
        else:
            return binning2index(binning.toCategoryBinning())

    elif isinstance(binning, CategoryBinning):
        categories = []
        flows = [(binning.loc_overflow, )]
        for (loc, ) in BinLocation._belows(flows):
            categories.append("(other)")
        categories.extend(binning.categories)
        for (loc, ) in BinLocation._aboves(flows):
            categories.append("(other)")
        return pandas.CategoricalIndex(categories)

    elif isinstance(binning, SparseRegularBinning):
        return binning2index(binning.toIrregularBinning())

    elif isinstance(binning, FractionBinning):
        return binning2index(binning.toCategoryBinning())

    elif isinstance(binning, PredicateBinning):
        return binning2index(binning.toCategoryBinning())

    elif isinstance(binning, VariationBinning):
        return binning2index(binning.toCategoryBinning())

    else:
        raise AssertionError(type(binning))
示例#43
0
    def _get(self,
             ep,
             symbol,
             start_date,
             end_date,
             retry,
             retry_wait,
             freq='6H'):
        dates = [None]
        if start_date:
            if not end_date:
                end_date = pd.Timestamp.utcnow()
            dates = pd.interval_range(API._timestamp(start_date),
                                      API._timestamp(end_date),
                                      freq=freq).tolist()
            if len(dates) == 0:
                dates.append(
                    pd.Interval(left=API._timestamp(start_date),
                                right=API._timestamp(end_date)))
            elif dates[-1].right < API._timestamp(end_date):
                dates.append(
                    pd.Interval(dates[-1].right, API._timestamp(end_date)))

        @request_retry(self.ID, retry, retry_wait)
        def helper(start, start_date, end_date):
            if start_date and end_date:
                endpoint = f'/api/v1/{ep}?symbol={symbol}&count={API_MAX}&reverse=false&start={start}&startTime={start_date}&endTime={end_date}'
            else:
                endpoint = f'/api/v1/{ep}?symbol={symbol}&reverse=true'
            header = {}
            if self.key_id and self.key_secret:
                header = self._generate_signature("GET", endpoint)
            header['Accept'] = 'application/json'
            return requests.get('{}{}'.format(self.api, endpoint),
                                headers=header)

        for interval in dates:
            start = 0
            if interval is not None:
                end = interval.right
                end -= pd.Timedelta(nanoseconds=1)

                start_date = str(interval.left).replace(" ", "T") + "Z"
                end_date = str(end).replace(" ", "T") + "Z"

            while True:
                r = helper(start, start_date, end_date)

                if r.status_code in {502, 504}:
                    LOG.warning("%s: %d for URL %s - %s", self.ID,
                                r.status_code, r.url, r.text)
                    sleep(retry_wait)
                    continue
                elif r.status_code == 429:
                    sleep(API_REFRESH)
                    continue
                elif r.status_code != 200:
                    self._handle_error(r, LOG)
                else:
                    sleep(RATE_LIMIT_SLEEP)

                limit = int(r.headers['X-RateLimit-Remaining'])
                data = r.json()

                yield data

                if len(data) != API_MAX:
                    break

                if limit < 1:
                    sleep(API_REFRESH)

                start += len(data)
示例#44
0
class TestIntervalIndex(Base):
    _holder = IntervalIndex

    def setup_method(self, method):
        self.index = IntervalIndex.from_arrays([0, 1], [1, 2])
        self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan,
                                                         (1, 2)])
        self.indices = dict(intervalIndex=tm.makeIntervalIndex(10))

    def create_index(self, closed='right'):
        return IntervalIndex.from_breaks(range(11), closed=closed)

    def create_index_with_nan(self, closed='right'):
        mask = [True, False] + [True] * 8
        return IntervalIndex.from_arrays(np.where(mask, np.arange(10), np.nan),
                                         np.where(mask, np.arange(1, 11),
                                                  np.nan),
                                         closed=closed)

    def test_properties(self, closed):
        index = self.create_index(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        tm.assert_index_equal(index.left, Index(np.arange(10)))
        tm.assert_index_equal(index.right, Index(np.arange(1, 11)))
        tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5)))

        assert index.closed == closed

        ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

        # with nans
        index = self.create_index_with_nan(closed=closed)
        assert len(index) == 10
        assert index.size == 10
        assert index.shape == (10, )

        expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
        expected_right = expected_left + 1
        expected_mid = expected_left + 0.5
        tm.assert_index_equal(index.left, expected_left)
        tm.assert_index_equal(index.right, expected_right)
        tm.assert_index_equal(index.mid, expected_mid)

        assert index.closed == closed

        ivs = [
            Interval(l, r, closed) if notna(l) else np.nan
            for l, r in zip(expected_left, expected_right)
        ]
        expected = np.array(ivs, dtype=object)
        tm.assert_numpy_array_equal(np.asarray(index), expected)

    @pytest.mark.parametrize(
        'breaks',
        [[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
         [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
         pd.to_datetime(['20170101', '20170202', '20170303', '20170404']),
         pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])])
    def test_length(self, closed, breaks):
        # GH 18789
        index = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.length
        expected = Index(iv.length for iv in index)
        tm.assert_index_equal(result, expected)

        # with NA
        index = index.insert(1, np.nan)
        result = index.length
        expected = Index(iv.length if notna(iv) else iv for iv in index)
        tm.assert_index_equal(result, expected)

    def test_with_nans(self, closed):
        index = self.create_index(closed=closed)
        assert not index.hasnans

        result = index.isna()
        expected = np.repeat(False, len(index))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.repeat(True, len(index))
        tm.assert_numpy_array_equal(result, expected)

        index = self.create_index_with_nan(closed=closed)
        assert index.hasnans

        result = index.isna()
        expected = np.array([False, True] + [False] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

        result = index.notna()
        expected = np.array([True, False] + [True] * (len(index) - 2))
        tm.assert_numpy_array_equal(result, expected)

    def test_copy(self, closed):
        expected = self.create_index(closed=closed)

        result = expected.copy()
        assert result.equals(expected)

        result = expected.copy(deep=True)
        assert result.equals(expected)
        assert result.left is not expected.left

    def test_ensure_copied_data(self, closed):
        # exercise the copy flag in the constructor

        # not copying
        index = self.create_index(closed=closed)
        result = IntervalIndex(index, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same='same')
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same='same')

        # by-definition make a copy
        result = IntervalIndex(index._ndarray_values, copy=False)
        tm.assert_numpy_array_equal(index.left.values,
                                    result.left.values,
                                    check_same='copy')
        tm.assert_numpy_array_equal(index.right.values,
                                    result.right.values,
                                    check_same='copy')

    def test_equals(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
        assert expected.equals(expected)
        assert expected.equals(expected.copy())

        assert not expected.equals(expected.astype(object))
        assert not expected.equals(np.array(expected))
        assert not expected.equals(list(expected))

        assert not expected.equals([1, 2])
        assert not expected.equals(np.array([1, 2]))
        assert not expected.equals(pd.date_range('20130101', periods=2))

        expected_name1 = IntervalIndex.from_breaks(np.arange(5),
                                                   closed=closed,
                                                   name='foo')
        expected_name2 = IntervalIndex.from_breaks(np.arange(5),
                                                   closed=closed,
                                                   name='bar')
        assert expected.equals(expected_name1)
        assert expected_name1.equals(expected_name2)

        for other_closed in {'left', 'right', 'both', 'neither'} - {closed}:
            expected_other_closed = IntervalIndex.from_breaks(
                np.arange(5), closed=other_closed)
            assert not expected.equals(expected_other_closed)

    @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
    def test_where(self, closed, klass):
        idx = self.create_index(closed=closed)
        cond = [True] * len(idx)
        expected = idx
        result = expected.where(klass(cond))
        tm.assert_index_equal(result, expected)

        cond = [False] + [True] * len(idx[1:])
        expected = IntervalIndex([np.nan] + idx[1:].tolist())
        result = idx.where(klass(cond))
        tm.assert_index_equal(result, expected)

    def test_delete(self, closed):
        expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed)
        result = self.create_index(closed=closed).delete(0)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('data', [
        interval_range(0, periods=10, closed='neither'),
        interval_range(1.7, periods=8, freq=2.5, closed='both'),
        interval_range(Timestamp('20170101'), periods=12, closed='left'),
        interval_range(Timedelta('1 day'), periods=6, closed='right')
    ])
    def test_insert(self, data):
        item = data[0]
        idx_item = IntervalIndex([item])

        # start
        expected = idx_item.append(data)
        result = data.insert(0, item)
        tm.assert_index_equal(result, expected)

        # end
        expected = data.append(idx_item)
        result = data.insert(len(data), item)
        tm.assert_index_equal(result, expected)

        # mid
        expected = data[:3].append(idx_item).append(data[3:])
        result = data.insert(3, item)
        tm.assert_index_equal(result, expected)

        # invalid type
        msg = 'can only insert Interval objects and NA into an IntervalIndex'
        with tm.assert_raises_regex(ValueError, msg):
            data.insert(1, 'foo')

        # invalid closed
        msg = 'inserted item must be closed on the same side as the index'
        for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
            with tm.assert_raises_regex(ValueError, msg):
                bad_item = Interval(item.left, item.right, closed=closed)
                data.insert(1, bad_item)

        # GH 18295 (test missing)
        na_idx = IntervalIndex([np.nan], closed=data.closed)
        for na in (np.nan, pd.NaT, None):
            expected = data[:1].append(na_idx).append(data[1:])
            result = data.insert(1, na)
            tm.assert_index_equal(result, expected)

    def test_take(self, closed):
        index = self.create_index(closed=closed)

        result = index.take(range(10))
        tm.assert_index_equal(result, index)

        result = index.take([0, 0, 1])
        expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    def test_unique(self, closed):
        # unique non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_unique

        # unique overlapping - distinct endpoints
        idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
        assert idx.is_unique

        # unique overlapping - shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_unique

        # unique nested
        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
        assert idx.is_unique

        # duplicate
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1), (2, 3)],
                                        closed=closed)
        assert not idx.is_unique

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_unique

    def test_monotonic(self, closed):
        # increasing non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                        closed=closed)
        assert idx.is_monotonic
        assert idx._is_strictly_monotonic_increasing
        assert not idx.is_monotonic_decreasing
        assert not idx._is_strictly_monotonic_decreasing

        # decreasing non-overlapping
        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)],
                                        closed=closed)
        assert not idx.is_monotonic
        assert not idx._is_strictly_monotonic_increasing
        assert idx.is_monotonic_decreasing
        assert idx._is_strictly_monotonic_decreasing

        # unordered non-overlapping
        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)],
                                        closed=closed)
        assert not idx.is_monotonic
        assert not idx._is_strictly_monotonic_increasing
        assert not idx.is_monotonic_decreasing
        assert not idx._is_strictly_monotonic_decreasing

        # increasing overlapping
        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)],
                                        closed=closed)
        assert idx.is_monotonic
        assert idx._is_strictly_monotonic_increasing
        assert not idx.is_monotonic_decreasing
        assert not idx._is_strictly_monotonic_decreasing

        # decreasing overlapping
        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)],
                                        closed=closed)
        assert not idx.is_monotonic
        assert not idx._is_strictly_monotonic_increasing
        assert idx.is_monotonic_decreasing
        assert idx._is_strictly_monotonic_decreasing

        # unordered overlapping
        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)],
                                        closed=closed)
        assert not idx.is_monotonic
        assert not idx._is_strictly_monotonic_increasing
        assert not idx.is_monotonic_decreasing
        assert not idx._is_strictly_monotonic_decreasing

        # increasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)],
                                           closed=closed)
        assert idx.is_monotonic
        assert idx._is_strictly_monotonic_increasing
        assert not idx.is_monotonic_decreasing
        assert not idx._is_strictly_monotonic_decreasing

        # decreasing overlapping shared endpoints
        idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)],
                                           closed=closed)
        assert not idx.is_monotonic
        assert not idx._is_strictly_monotonic_increasing
        assert idx.is_monotonic_decreasing
        assert idx._is_strictly_monotonic_decreasing

        # stationary
        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
        assert idx.is_monotonic
        assert not idx._is_strictly_monotonic_increasing
        assert idx.is_monotonic_decreasing
        assert not idx._is_strictly_monotonic_decreasing

        # empty
        idx = IntervalIndex([], closed=closed)
        assert idx.is_monotonic
        assert idx._is_strictly_monotonic_increasing
        assert idx.is_monotonic_decreasing
        assert idx._is_strictly_monotonic_decreasing

    @pytest.mark.skip(reason='not a valid repr as we use interval notation')
    def test_repr(self):
        i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right')
        expected = ("IntervalIndex(left=[0, 1],"
                    "\n              right=[1, 2],"
                    "\n              closed='right',"
                    "\n              dtype='interval[int64]')")
        assert repr(i) == expected

        i = IntervalIndex.from_tuples(
            (Timestamp('20130101'), Timestamp('20130102')),
            (Timestamp('20130102'), Timestamp('20130103')),
            closed='right')
        expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02'],"
                    "\n              right=['2013-01-02', '2013-01-03'],"
                    "\n              closed='right',"
                    "\n              dtype='interval[datetime64[ns]]')")
        assert repr(i) == expected

    @pytest.mark.skip(reason='not a valid repr as we use interval notation')
    def test_repr_max_seq_item_setting(self):
        super(TestIntervalIndex, self).test_repr_max_seq_item_setting()

    @pytest.mark.skip(reason='not a valid repr as we use interval notation')
    def test_repr_roundtrip(self):
        super(TestIntervalIndex, self).test_repr_roundtrip()

    # TODO: check this behavior is consistent with test_interval_new.py
    def test_get_item(self, closed):
        i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan),
                                      closed=closed)
        assert i[0] == Interval(0.0, 1.0, closed=closed)
        assert i[1] == Interval(1.0, 2.0, closed=closed)
        assert isna(i[2])

        result = i[0:1]
        expected = IntervalIndex.from_arrays((0., ), (1., ), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[0:2]
        expected = IntervalIndex.from_arrays((0., 1), (1., 2.), closed=closed)
        tm.assert_index_equal(result, expected)

        result = i[1:3]
        expected = IntervalIndex.from_arrays((1., np.nan), (2., np.nan),
                                             closed=closed)
        tm.assert_index_equal(result, expected)

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_get_loc_value(self):
        pytest.raises(KeyError, self.index.get_loc, 0)
        assert self.index.get_loc(0.5) == 0
        assert self.index.get_loc(1) == 0
        assert self.index.get_loc(1.5) == 1
        assert self.index.get_loc(2) == 1
        pytest.raises(KeyError, self.index.get_loc, -1)
        pytest.raises(KeyError, self.index.get_loc, 3)

        idx = IntervalIndex.from_tuples([(0, 2), (1, 3)])
        assert idx.get_loc(0.5) == 0
        assert idx.get_loc(1) == 0
        tm.assert_numpy_array_equal(idx.get_loc(1.5),
                                    np.array([0, 1], dtype='int64'))
        tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)),
                                    np.array([0, 1], dtype='int64'))
        assert idx.get_loc(3) == 1
        pytest.raises(KeyError, idx.get_loc, 3.5)

        idx = IntervalIndex.from_arrays([0, 2], [1, 3])
        pytest.raises(KeyError, idx.get_loc, 1.5)

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def slice_locs_cases(self, breaks):
        # TODO: same tests for more index types
        index = IntervalIndex.from_breaks([0, 1, 2], closed='right')
        assert index.slice_locs() == (0, 2)
        assert index.slice_locs(0, 1) == (0, 1)
        assert index.slice_locs(1, 1) == (0, 1)
        assert index.slice_locs(0, 2) == (0, 2)
        assert index.slice_locs(0.5, 1.5) == (0, 2)
        assert index.slice_locs(0, 0.5) == (0, 1)
        assert index.slice_locs(start=1) == (0, 2)
        assert index.slice_locs(start=1.2) == (1, 2)
        assert index.slice_locs(end=1) == (0, 1)
        assert index.slice_locs(end=1.1) == (0, 2)
        assert index.slice_locs(end=1.0) == (0, 1)
        assert index.slice_locs(-1, -1) == (0, 0)

        index = IntervalIndex.from_breaks([0, 1, 2], closed='neither')
        assert index.slice_locs(0, 1) == (0, 1)
        assert index.slice_locs(0, 2) == (0, 2)
        assert index.slice_locs(0.5, 1.5) == (0, 2)
        assert index.slice_locs(1, 1) == (1, 1)
        assert index.slice_locs(1, 2) == (1, 2)

        index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)],
                                          closed='both')
        assert index.slice_locs(1, 1) == (0, 1)
        assert index.slice_locs(1, 2) == (0, 2)

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_slice_locs_int64(self):
        self.slice_locs_cases([0, 1, 2])

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_slice_locs_float64(self):
        self.slice_locs_cases([0.0, 1.0, 2.0])

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def slice_locs_decreasing_cases(self, tuples):
        index = IntervalIndex.from_tuples(tuples)
        assert index.slice_locs(1.5, 0.5) == (1, 3)
        assert index.slice_locs(2, 0) == (1, 3)
        assert index.slice_locs(2, 1) == (1, 3)
        assert index.slice_locs(3, 1.1) == (0, 3)
        assert index.slice_locs(3, 3) == (0, 2)
        assert index.slice_locs(3.5, 3.3) == (0, 1)
        assert index.slice_locs(1, -3) == (2, 3)

        slice_locs = index.slice_locs(-1, -1)
        assert slice_locs[0] == slice_locs[1]

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_slice_locs_decreasing_int64(self):
        self.slice_locs_cases([(2, 4), (1, 3), (0, 2)])

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_slice_locs_decreasing_float64(self):
        self.slice_locs_cases([(2., 4.), (1., 3.), (0., 2.)])

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_slice_locs_fails(self):
        index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)])
        with pytest.raises(KeyError):
            index.slice_locs(1, 2)

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_get_loc_interval(self):
        assert self.index.get_loc(Interval(0, 1)) == 0
        assert self.index.get_loc(Interval(0, 0.5)) == 0
        assert self.index.get_loc(Interval(0, 1, 'left')) == 0
        pytest.raises(KeyError, self.index.get_loc, Interval(2, 3))
        pytest.raises(KeyError, self.index.get_loc, Interval(-1, 0, 'left'))

    # Make consistent with test_interval_new.py (see #16316, #16386)
    @pytest.mark.parametrize('item', [3, Interval(1, 4)])
    def test_get_loc_length_one(self, item, closed):
        # GH 20921
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        result = index.get_loc(item)
        assert result == 0

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_get_indexer(self):
        actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3])
        expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index.get_indexer(self.index)
        expected = np.array([0, 1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        index = IntervalIndex.from_breaks([0, 1, 2], closed='left')
        actual = index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3])
        expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index.get_indexer(index[:1])
        expected = np.array([0], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index.get_indexer(index)
        expected = np.array([-1, 1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_get_indexer_subintervals(self):

        # TODO: is this right?
        # return indexers for wholly contained subintervals
        target = IntervalIndex.from_breaks(np.linspace(0, 2, 5))
        actual = self.index.get_indexer(target)
        expected = np.array([0, 0, 1, 1], dtype='p')
        tm.assert_numpy_array_equal(actual, expected)

        target = IntervalIndex.from_breaks([0, 0.67, 1.33, 2])
        actual = self.index.get_indexer(target)
        expected = np.array([0, 0, 1, 1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index.get_indexer(target[[0, -1]])
        expected = np.array([0, 1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        target = IntervalIndex.from_breaks([0, 0.33, 0.67, 1], closed='left')
        actual = self.index.get_indexer(target)
        expected = np.array([0, 0, 0], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

    # Make consistent with test_interval_new.py (see #16316, #16386)
    @pytest.mark.parametrize(
        'item',
        [[3], np.arange(1, 5), [Interval(1, 4)],
         interval_range(1, 4)])
    def test_get_indexer_length_one(self, item, closed):
        # GH 17284
        index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
        result = index.get_indexer(item)
        expected = np.array([0] * len(item), dtype='intp')
        tm.assert_numpy_array_equal(result, expected)

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def test_contains(self):
        # Only endpoints are valid.
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        # Invalid
        assert 0 not in i
        assert 1 not in i
        assert 2 not in i

        # Valid
        assert Interval(0, 1) in i
        assert Interval(0, 2) in i
        assert Interval(0, 0.5) in i
        assert Interval(3, 5) not in i
        assert Interval(-1, 0, closed='left') not in i

    # To be removed, replaced by test_interval_new.py (see #16316, #16386)
    def testcontains(self):
        # can select values that are IN the range of a value
        i = IntervalIndex.from_arrays([0, 1], [1, 2])

        assert i.contains(0.1)
        assert i.contains(0.5)
        assert i.contains(1)
        assert i.contains(Interval(0, 1))
        assert i.contains(Interval(0, 2))

        # these overlaps completely
        assert i.contains(Interval(0, 3))
        assert i.contains(Interval(1, 3))

        assert not i.contains(20)
        assert not i.contains(-20)

    def test_dropna(self, closed):

        expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)],
                                             closed=closed)

        ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

        ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan],
                                       closed=closed)
        result = ii.dropna()
        tm.assert_index_equal(result, expected)

    # TODO: check this behavior is consistent with test_interval_new.py
    def test_non_contiguous(self, closed):
        index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
        target = [0.5, 1.5, 2.5]
        actual = index.get_indexer(target)
        expected = np.array([0, -1, 1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        assert 1.5 not in index

    def test_union(self, closed):
        index = self.create_index(closed=closed)
        other = IntervalIndex.from_breaks(range(5, 13), closed=closed)

        expected = IntervalIndex.from_breaks(range(13), closed=closed)
        result = index.union(other)
        tm.assert_index_equal(result, expected)

        result = other.union(index)
        tm.assert_index_equal(result, expected)

        tm.assert_index_equal(index.union(index), index)
        tm.assert_index_equal(index.union(index[:1]), index)

        # GH 19101: empty result, same dtype
        index = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        result = index.union(index)
        tm.assert_index_equal(result, index)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex(np.array([], dtype='float64'), closed=closed)
        result = index.union(other)
        tm.assert_index_equal(result, index)

    def test_intersection(self, closed):
        index = self.create_index(closed=closed)
        other = IntervalIndex.from_breaks(range(5, 13), closed=closed)

        expected = IntervalIndex.from_breaks(range(5, 11), closed=closed)
        result = index.intersection(other)
        tm.assert_index_equal(result, expected)

        result = other.intersection(index)
        tm.assert_index_equal(result, expected)

        tm.assert_index_equal(index.intersection(index), index)

        # GH 19101: empty result, same dtype
        other = IntervalIndex.from_breaks(range(300, 314), closed=closed)
        expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        result = index.intersection(other)
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, different dtypes
        breaks = np.arange(300, 314, dtype='float64')
        other = IntervalIndex.from_breaks(breaks, closed=closed)
        result = index.intersection(other)
        tm.assert_index_equal(result, expected)

    def test_difference(self, closed):
        index = self.create_index(closed=closed)
        tm.assert_index_equal(index.difference(index[:1]), index[1:])

        # GH 19101: empty result, same dtype
        result = index.difference(index)
        expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex.from_arrays(index.left.astype('float64'),
                                          index.right,
                                          closed=closed)
        result = index.difference(other)
        tm.assert_index_equal(result, expected)

    def test_symmetric_difference(self, closed):
        index = self.create_index(closed=closed)
        result = index[1:].symmetric_difference(index[:-1])
        expected = IntervalIndex([index[0], index[-1]])
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, same dtype
        result = index.symmetric_difference(index)
        expected = IntervalIndex(np.array([], dtype='int64'), closed=closed)
        tm.assert_index_equal(result, expected)

        # GH 19101: empty result, different dtypes
        other = IntervalIndex.from_arrays(index.left.astype('float64'),
                                          index.right,
                                          closed=closed)
        result = index.symmetric_difference(other)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        'op_name',
        ['union', 'intersection', 'difference', 'symmetric_difference'])
    def test_set_operation_errors(self, closed, op_name):
        index = self.create_index(closed=closed)
        set_op = getattr(index, op_name)

        # non-IntervalIndex
        msg = ('the other index needs to be an IntervalIndex too, but '
               'was type Int64Index')
        with tm.assert_raises_regex(TypeError, msg):
            set_op(Index([1, 2, 3]))

        # mixed closed
        msg = ('can only do set operations between two IntervalIndex objects '
               'that are closed on the same side')
        for other_closed in {'right', 'left', 'both', 'neither'} - {closed}:
            other = self.create_index(closed=other_closed)
            with tm.assert_raises_regex(ValueError, msg):
                set_op(other)

        # GH 19016: incompatible dtypes
        other = interval_range(Timestamp('20180101'), periods=9, closed=closed)
        msg = ('can only do {op} between two IntervalIndex objects that have '
               'compatible dtypes').format(op=op_name)
        with tm.assert_raises_regex(TypeError, msg):
            set_op(other)

    def test_isin(self, closed):
        index = self.create_index(closed=closed)

        expected = np.array([True] + [False] * (len(index) - 1))
        result = index.isin(index[:1])
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin([index[0]])
        tm.assert_numpy_array_equal(result, expected)

        other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
        expected = np.array([True] * (len(index) - 1) + [False])
        result = index.isin(other)
        tm.assert_numpy_array_equal(result, expected)

        result = index.isin(other.tolist())
        tm.assert_numpy_array_equal(result, expected)

        for other_closed in {'right', 'left', 'both', 'neither'}:
            other = self.create_index(closed=other_closed)
            expected = np.repeat(closed == other_closed, len(index))
            result = index.isin(other)
            tm.assert_numpy_array_equal(result, expected)

            result = index.isin(other.tolist())
            tm.assert_numpy_array_equal(result, expected)

    def test_comparison(self):
        actual = Interval(0, 1) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)

        actual = Interval(0.5, 1.5) < self.index
        expected = np.array([False, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > Interval(0.5, 1.5)
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index
        expected = np.array([True, True])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index <= self.index
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index >= self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index < self.index
        expected = np.array([False, False])
        tm.assert_numpy_array_equal(actual, expected)
        actual = self.index > self.index
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == IntervalIndex.from_breaks([0, 1, 2], 'left')
        tm.assert_numpy_array_equal(actual, expected)

        actual = self.index == self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index.values == self.index
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index <= self.index.values
        tm.assert_numpy_array_equal(actual, np.array([True, True]))
        actual = self.index != self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index > self.index.values
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index.values > self.index
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        # invalid comparisons
        actual = self.index == 0
        tm.assert_numpy_array_equal(actual, np.array([False, False]))
        actual = self.index == self.index.left
        tm.assert_numpy_array_equal(actual, np.array([False, False]))

        with tm.assert_raises_regex(TypeError, 'unorderable types'):
            self.index > 0
        with tm.assert_raises_regex(TypeError, 'unorderable types'):
            self.index <= 0
        with pytest.raises(TypeError):
            self.index > np.arange(2)
        with pytest.raises(ValueError):
            self.index > np.arange(3)

    def test_missing_values(self, closed):
        idx = Index([
            np.nan,
            Interval(0, 1, closed=closed),
            Interval(1, 2, closed=closed)
        ])
        idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2],
                                         closed=closed)
        assert idx.equals(idx2)

        with pytest.raises(ValueError):
            IntervalIndex.from_arrays([np.nan, 0, 1],
                                      np.array([0, 1, 2]),
                                      closed=closed)

        tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))

    def test_sort_values(self, closed):
        index = self.create_index(closed=closed)

        result = index.sort_values()
        tm.assert_index_equal(result, index)

        result = index.sort_values(ascending=False)
        tm.assert_index_equal(result, index[::-1])

        # with nan
        index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])

        result = index.sort_values()
        expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
        tm.assert_index_equal(result, expected)

        result = index.sort_values(ascending=False)
        expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('tz', [None, 'US/Eastern'])
    def test_datetime(self, tz):
        start = Timestamp('2000-01-01', tz=tz)
        dates = date_range(start=start, periods=10)
        index = IntervalIndex.from_breaks(dates)

        # test mid
        start = Timestamp('2000-01-01T12:00', tz=tz)
        expected = date_range(start=start, periods=9)
        tm.assert_index_equal(index.mid, expected)

        # __contains__ doesn't check individual points
        assert Timestamp('2000-01-01', tz=tz) not in index
        assert Timestamp('2000-01-01T12', tz=tz) not in index
        assert Timestamp('2000-01-02', tz=tz) not in index
        iv_true = Interval(Timestamp('2000-01-01T08', tz=tz),
                           Timestamp('2000-01-01T18', tz=tz))
        iv_false = Interval(Timestamp('1999-12-31', tz=tz),
                            Timestamp('2000-01-01', tz=tz))
        assert iv_true in index
        assert iv_false not in index

        # .contains does check individual points
        assert not index.contains(Timestamp('2000-01-01', tz=tz))
        assert index.contains(Timestamp('2000-01-01T12', tz=tz))
        assert index.contains(Timestamp('2000-01-02', tz=tz))
        assert index.contains(iv_true)
        assert not index.contains(iv_false)

        # test get_indexer
        start = Timestamp('1999-12-31T12:00', tz=tz)
        target = date_range(start=start, periods=7, freq='12H')
        actual = index.get_indexer(target)
        expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

        start = Timestamp('2000-01-08T18:00', tz=tz)
        target = date_range(start=start, periods=7, freq='6H')
        actual = index.get_indexer(target)
        expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype='intp')
        tm.assert_numpy_array_equal(actual, expected)

    def test_append(self, closed):

        index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
        index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)

        result = index1.append(index2)
        expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        result = index1.append([index1, index2])
        expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2],
                                             [1, 2, 1, 2, 2, 3],
                                             closed=closed)
        tm.assert_index_equal(result, expected)

        msg = ('can only append two IntervalIndex objects that are closed '
               'on the same side')
        for other_closed in {'left', 'right', 'both', 'neither'} - {closed}:
            index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2],
                                                           closed=other_closed)
            with tm.assert_raises_regex(ValueError, msg):
                index1.append(index_other_closed)

    def test_is_non_overlapping_monotonic(self, closed):
        # Should be True in all cases
        tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is True

        # Should be False in all cases (overlapping)
        tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False in all cases (non-monotonic)
        tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
        idx = IntervalIndex.from_tuples(tpls, closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
        assert idx.is_non_overlapping_monotonic is False

        # Should be False for closed='both', otherwise True (GH16560)
        if closed == 'both':
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is False
        else:
            idx = IntervalIndex.from_breaks(range(4), closed=closed)
            assert idx.is_non_overlapping_monotonic is True

    @pytest.mark.parametrize('tuples', [
        lzip(range(10), range(1, 11)),
        lzip(date_range('20170101', periods=10),
             date_range('20170101', periods=10)),
        lzip(timedelta_range('0 days', periods=10),
             timedelta_range('1 day', periods=10))
    ])
    def test_to_tuples(self, tuples):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples()
        expected = Index(com.asarray_tuplesafe(tuples))
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('tuples', [
        lzip(range(10), range(1, 11)) + [np.nan],
        lzip(date_range('20170101', periods=10),
             date_range('20170101', periods=10)) + [np.nan],
        lzip(timedelta_range('0 days', periods=10),
             timedelta_range('1 day', periods=10)) + [np.nan]
    ])
    @pytest.mark.parametrize('na_tuple', [True, False])
    def test_to_tuples_na(self, tuples, na_tuple):
        # GH 18756
        idx = IntervalIndex.from_tuples(tuples)
        result = idx.to_tuples(na_tuple=na_tuple)

        # check the non-NA portion
        expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
        result_notna = result[:-1]
        tm.assert_index_equal(result_notna, expected_notna)

        # check the NA portion
        result_na = result[-1]
        if na_tuple:
            assert isinstance(result_na, tuple)
            assert len(result_na) == 2
            assert all(isna(x) for x in result_na)
        else:
            assert isna(result_na)

    def test_nbytes(self):
        # GH 19209
        left = np.arange(0, 4, dtype='i8')
        right = np.arange(1, 5, dtype='i8')

        result = IntervalIndex.from_arrays(left, right).nbytes
        expected = 64  # 4 * 8 * 2
        assert result == expected

    def test_itemsize(self):
        # GH 19209
        left = np.arange(0, 4, dtype='i8')
        right = np.arange(1, 5, dtype='i8')

        result = IntervalIndex.from_arrays(left, right).itemsize
        expected = 16  # 8 * 2
        assert result == expected

    @pytest.mark.parametrize('new_closed',
                             ['left', 'right', 'both', 'neither'])
    def test_set_closed(self, name, closed, new_closed):
        # GH 21670
        index = interval_range(0, 5, closed=closed, name=name)
        result = index.set_closed(new_closed)
        expected = interval_range(0, 5, closed=new_closed, name=name)
        tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize('bad_closed', ['foo', 10, 'LEFT', True, False])
    def test_set_closed_errors(self, bad_closed):
        # GH 21670
        index = interval_range(0, 5)
        msg = "invalid option for 'closed': {closed}".format(closed=bad_closed)
        with tm.assert_raises_regex(ValueError, msg):
            index.set_closed(bad_closed)
示例#45
0
 def test_early_truncation(self, start, end, freq, expected_endpoint):
     # index truncates early if freq causes end to be skipped
     result = interval_range(start=start, end=end, freq=freq)
     result_endpoint = result.right[-1]
     assert result_endpoint == expected_endpoint
示例#46
0
 def test_set_closed_errors(self, bad_closed):
     # GH 21670
     index = interval_range(0, 5)
     msg = "invalid option for 'closed': {closed}".format(closed=bad_closed)
     with tm.assert_raises_regex(ValueError, msg):
         index.set_closed(bad_closed)
示例#47
0
 def test_subtype_integer_errors(self):
     # int64 -> uint64 fails with negative values
     index = interval_range(-10, 10)
     dtype = IntervalDtype('uint64')
     with pytest.raises(ValueError):
         index.astype(dtype)
示例#48
0
 def f():
     interval_range()
示例#49
0
    def test_errors(self):
        # not enough params
        msg = ('Of the four parameters: start, end, periods, and freq, '
               'exactly three must be specified')

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=0)

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(end=5)

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(periods=2)

        with tm.assert_raises_regex(ValueError, msg):
            interval_range()

        # too many params
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=0, end=5, periods=6, freq=1.5)

        # mixed units
        msg = 'start, end, freq need to be type compatible'
        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, end=Timestamp('20130101'), freq=2)

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, end=Timedelta('1 day'), freq=2)

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, end=10, freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timestamp('20130101'), end=10, freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timestamp('20130101'),
                           end=Timedelta('1 day'), freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timestamp('20130101'),
                           end=Timestamp('20130110'), freq=2)

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timedelta('1 day'), end=10, freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timedelta('1 day'),
                           end=Timestamp('20130110'), freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timedelta('1 day'),
                           end=Timedelta('10 days'), freq=2)

        # invalid periods
        msg = 'periods must be a number, got foo'
        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, periods='foo')

        # invalid start
        msg = 'start must be numeric or datetime-like, got foo'
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start='foo', periods=10)

        # invalid end
        msg = r'end must be numeric or datetime-like, got \(0, 1\]'
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(end=Interval(0, 1), periods=10)

        # invalid freq for datetime-like
        msg = 'freq must be numeric or convertible to DateOffset, got foo'
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=0, end=10, freq='foo')

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=Timestamp('20130101'), periods=10, freq='foo')

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(end=Timedelta('1 day'), periods=10, freq='foo')

        # mixed tz
        start = Timestamp('2017-01-01', tz='US/Eastern')
        end = Timestamp('2017-01-07', tz='US/Pacific')
        msg = 'Start and end cannot both be tz-aware with different timezones'
        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=start, end=end)
示例#50
0
 def test_set_closed(self, name, closed, new_closed):
     # GH 21670
     index = interval_range(0, 5, closed=closed, name=name)
     result = index.set_closed(new_closed)
     expected = interval_range(0, 5, closed=new_closed, name=name)
     tm.assert_index_equal(result, expected)
示例#51
0
 def test_set_closed(self, name, closed, new_closed):
     # GH 21670
     index = interval_range(0, 5, closed=closed, name=name)
     result = index.set_closed(new_closed)
     expected = interval_range(0, 5, closed=new_closed, name=name)
     tm.assert_index_equal(result, expected)
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument("simResNumber",
                        help="Simulation result number",
                        type=int)
    parser.add_argument("--latentToPlot",
                        help="Latent to plot",
                        type=int,
                        default=0)
    parser.add_argument("--trialToPlot",
                        help="Trial to plot",
                        type=int,
                        default=0)
    parser.add_argument("--neuronToPlot",
                        help="Neuron to plot",
                        type=int,
                        default=0)
    parser.add_argument("--nResamplesKSTest",
                        help="Number of resamples for KS test",
                        type=int,
                        default=10)
    args = parser.parse_args()

    simResNumber = args.simResNumber
    latentToPlot = args.latentToPlot
    trialToPlot = args.trialToPlot
    neuronToPlot = args.neuronToPlot
    nResamplesKSTest = args.nResamplesKSTest

    simResConfigFilename = "results/{:08d}_simulation_metaData.ini".format(
        simResNumber)
    simResConfig = configparser.ConfigParser()
    simResConfig.read(simResConfigFilename)
    simInitConfigFilename = simResConfig["simulation_params"][
        "simInitConfigFilename"]

    simInitConfig = configparser.ConfigParser()
    simInitConfig.read(simInitConfigFilename)
    nLatents = int(simInitConfig["control_variables"]["nLatents"])
    nNeurons = int(simInitConfig["control_variables"]["nNeurons"])
    trialsLengths = [
        float(str) for str in simInitConfig["control_variables"]
        ["trialsLengths"][1:-1].split(",")
    ]
    dtCIF = float(simInitConfig["control_variables"]["dtCIF"])
    nTrials = len(trialsLengths)
    T = torch.tensor(trialsLengths).max().item()
    CFilename = simInitConfig["embedding_params"]["C_filename"]
    dFilename = simInitConfig["embedding_params"]["d_filename"]
    C, d = utils.svGPFA.configUtils.getLinearEmbeddingParams(
        CFilename=CFilename, dFilename=dFilename)

    simResFilename = "results/{:08d}_simRes.pickle".format(simResNumber)
    latentFigFilenamePattern = \
        "figures/{:08d}_simulation_latent_trial{:03d}_latent{:03d}.{{:s}}".format(simResNumber, trialToPlot, latentToPlot)
    embeddingFigFilenamePattern = \
        "figures/{:08d}_simulation_embedding_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot)
    cifFigFilenamePattern = \
        "figures/{:08d}_simulation_cif_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot)
    spikesTimesFigFilenamePattern = \
        "figures/{:08d}_simulation_spikesTimes_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot)
    spikesRatesFigFilenamePattern = \
       "figures/{:08d}_simulation_spikesRates_neuron{:03d}.{{:s}}".format(simResNumber, neuronToPlot)
    ksTestTimeRescalingFigFilenamePattern = \
        "figures/{:08d}_simulation_ksTestTimeRescaling_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot)
    rocFigFilenamePattern = \
        "figures/{:08d}_simulation_rocAnalysis_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot)

    with open(simResFilename, "rb") as f:
        simRes = pickle.load(f)
    times = simRes["times"]
    latentsSamples = simRes["latents"]
    latentsMeans = simRes["latentsMeans"]
    latentsSTDs = simRes["latentsSTDs"]
    cifValues = simRes["cifValues"]
    spikes = simRes["spikes"]

    pio.renderers.default = "browser"

    timesLatentToPlot = times[trialToPlot]
    latentSamplesToPlot = latentsSamples[trialToPlot][latentToPlot, :]
    latentMeansToPlot = latentsMeans[trialToPlot][latentToPlot, :]
    latentSTDsToPlot = latentsSTDs[trialToPlot][latentToPlot, :]
    title = "Trial {:d}, Latent {:d}".format(trialToPlot, latentToPlot)
    fig = plot.svGPFA.plotUtilsPlotly.getSimulatedLatentPlot(
        times=timesLatentToPlot,
        latentSamples=latentSamplesToPlot,
        latentMeans=latentMeansToPlot,
        latentSTDs=latentSTDsToPlot,
        title=title)
    fig.write_image(latentFigFilenamePattern.format("png"))
    fig.write_html(latentFigFilenamePattern.format("html"))
    fig.show()

    # embeddingSamples[r], embeddingMeans[r], embeddingSTDs \in nNeurons x nSamples
    embeddingSamples = [
        torch.matmul(C, latentsSamples[r]) + d for r in range(nTrials)
    ]
    embeddingMeans = [
        torch.matmul(C, latentsMeans[r]) + d for r in range(nTrials)
    ]
    embeddingSTDs = [torch.matmul(C, latentsSTDs[r]) for r in range(nTrials)]
    timesEmbeddingToPlot = times[trialToPlot]
    embeddingSamplesToPlot = embeddingSamples[trialToPlot][neuronToPlot, :]
    embeddingMeansToPlot = embeddingMeans[trialToPlot][neuronToPlot, :]
    embeddingSTDsToPlot = embeddingSTDs[trialToPlot][neuronToPlot, :]
    title = "Trial {:d}, Neuron {:d}".format(trialToPlot, neuronToPlot)
    fig = plot.svGPFA.plotUtilsPlotly.getSimulatedEmbeddingPlot(
        times=timesEmbeddingToPlot,
        samples=embeddingSamplesToPlot,
        means=embeddingMeansToPlot,
        stds=embeddingSTDsToPlot,
        title=title)
    fig.write_image(embeddingFigFilenamePattern.format("png"))
    fig.write_html(embeddingFigFilenamePattern.format("html"))
    fig.show()

    timesCIFToPlot = times[trialToPlot]
    valuesCIFToPlot = cifValues[trialToPlot][neuronToPlot]
    title = "Trial {:d}, Neuron {:d}".format(trialToPlot, neuronToPlot)
    fig = plot.svGPFA.plotUtilsPlotly.getPlotCIF(times=timesCIFToPlot,
                                                 values=valuesCIFToPlot,
                                                 title=title)
    fig.write_image(cifFigFilenamePattern.format("png"))
    fig.write_html(cifFigFilenamePattern.format("html"))
    fig.show()

    spikesToPlot = spikes[trialToPlot]
    title = "Trial {:d}".format(trialToPlot)
    fig = plot.svGPFA.plotUtilsPlotly.getSimulatedSpikesTimesPlotOneTrial(
        spikesTimes=spikesToPlot, title=title)
    fig.write_image(spikesTimesFigFilenamePattern.format("png"))
    fig.write_html(spikesTimesFigFilenamePattern.format("html"))
    fig.show()

    spikesRates = utils.svGPFA.miscUtils.computeSpikeRates(trialsTimes=times,
                                                           spikesTimes=spikes)
    fig = plot.svGPFA.plotUtilsPlotly.getPlotSpikeRatesForAllTrialsAndAllNeurons(
        spikesRates=spikesRates)
    fig.write_image(spikesRatesFigFilenamePattern.format("png"))
    fig.write_html(spikesRatesFigFilenamePattern.format("html"))
    fig.show()

    oneTrialCIFTimes = torch.arange(0, T, dtCIF)
    cifTimes = torch.unsqueeze(torch.ger(torch.ones(nTrials),
                                         oneTrialCIFTimes),
                               dim=2)
    cifTimesKS = cifTimes[trialToPlot, :, 0]
    cifValuesKS = cifValues[trialToPlot][neuronToPlot]
    spikesTimesKS = spikes[trialToPlot][neuronToPlot]
    diffECDFsX, diffECDFsY, estECDFx, estECDFy, simECDFx, simECDFy, cb = stats.pointProcess.tests.KSTestTimeRescalingNumericalCorrection(
        spikesTimes=spikesTimesKS,
        cifTimes=cifTimesKS,
        cifValues=cifValuesKS,
        gamma=nResamplesKSTest)
    title = "Trial {:d}, Neuron {:d} ({:d} spikes)".format(
        trialToPlot, neuronToPlot, len(spikesTimesKS))
    fig = plot.svGPFA.plotUtils.plotResKSTestTimeRescalingNumericalCorrection(
        diffECDFsX=diffECDFsX,
        diffECDFsY=diffECDFsY,
        estECDFx=estECDFx,
        estECDFy=estECDFy,
        simECDFx=simECDFx,
        simECDFy=simECDFy,
        cb=cb,
        title=title)
    plt.savefig(fname=ksTestTimeRescalingFigFilenamePattern.format("png"))

    plt.figure()

    pk = cifValuesKS * dtCIF
    bins = pd.interval_range(start=0, end=T, periods=len(pk))
    # start binning spikes using pandas
    cutRes, _ = pd.cut(spikesTimesKS, bins=bins, retbins=True)
    Y = torch.from_numpy(cutRes.value_counts().values)
    fpr, tpr, thresholds = sklearn.metrics.roc_curve(Y, pk, pos_label=1)
    roc_auc = sklearn.metrics.auc(fpr, tpr)
    title = "Trial {:d}, Neuron {:d}".format(trialToPlot, neuronToPlot)
    fig = plot.svGPFA.plotUtils.plotResROCAnalysis(fpr=fpr,
                                                   tpr=tpr,
                                                   auc=roc_auc,
                                                   title=title)
    plt.savefig(fname=rocFigFilenamePattern.format("png"))

    plt.show()

    pdb.set_trace()
示例#53
0
 def test_set_closed_errors(self, bad_closed):
     # GH 21670
     index = interval_range(0, 5)
     msg = "invalid option for 'closed': {closed}".format(closed=bad_closed)
     with tm.assert_raises_regex(ValueError, msg):
         index.set_closed(bad_closed)
示例#54
0
 def test_set_closed_errors(self, bad_closed):
     # GH 21670
     index = interval_range(0, 5)
     msg = f"invalid option for 'closed': {bad_closed}"
     with pytest.raises(ValueError, match=msg):
         index.set_closed(bad_closed)
示例#55
0
    def test_errors(self):
        # not enough params
        msg = ('Of the three parameters: start, end, and periods, '
               'exactly two must be specified')

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=0)

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(end=5)

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(periods=2)

        with tm.assert_raises_regex(ValueError, msg):
            interval_range()

        # too many params
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=0, end=5, periods=6)

        # mixed units
        msg = 'start, end, freq need to be type compatible'
        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, end=Timestamp('20130101'), freq=2)

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, end=Timedelta('1 day'), freq=2)

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, end=10, freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timestamp('20130101'), end=10, freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timestamp('20130101'),
                           end=Timedelta('1 day'), freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timestamp('20130101'),
                           end=Timestamp('20130110'), freq=2)

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timedelta('1 day'), end=10, freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timedelta('1 day'),
                           end=Timestamp('20130110'), freq='D')

        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=Timedelta('1 day'),
                           end=Timedelta('10 days'), freq=2)

        # invalid periods
        msg = 'periods must be a number, got foo'
        with tm.assert_raises_regex(TypeError, msg):
            interval_range(start=0, periods='foo')

        # invalid start
        msg = 'start must be numeric or datetime-like, got foo'
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start='foo', periods=10)

        # invalid end
        msg = r'end must be numeric or datetime-like, got \(0, 1\]'
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(end=Interval(0, 1), periods=10)

        # invalid freq for datetime-like
        msg = 'freq must be numeric or convertible to DateOffset, got foo'
        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=0, end=10, freq='foo')

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(start=Timestamp('20130101'), periods=10, freq='foo')

        with tm.assert_raises_regex(ValueError, msg):
            interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
示例#56
0
    def generate(self):
        columns = list(self.dataframe.columns)
        # put std to the end
        columns.sort(key=lambda x: 'std' in x)
        self.dataframe = self.dataframe[columns]

        # create intervals for coloring
        # ---- interval 1 for all but std
        closed_on = 'right'
        lower_bound = 0.3
        upper_bound = 0.9
        mid_periods = 15

        intervals1 = pd.IntervalIndex.from_breaks([0, lower_bound], closed=closed_on)
        intervals2 = pd.interval_range(lower_bound, upper_bound, periods=mid_periods, closed=closed_on)
        intervals3 = pd.IntervalIndex.from_breaks([upper_bound, 1], closed=closed_on)

        interval_list_1 = intervals1.append([intervals2, intervals3])

        # ---- interval 2 for std
        closed_on = 'right'
        lower_bound = 0.05
        upper_bound = 0.1
        mid_periods = 15

        intervals1 = pd.IntervalIndex.from_breaks([0, lower_bound], closed=closed_on)
        intervals2 = pd.interval_range(lower_bound, upper_bound, periods=mid_periods, closed=closed_on)
        intervals3 = pd.IntervalIndex.from_breaks([upper_bound, 1], closed=closed_on)

        interval_list_2 = intervals1.append([intervals2, intervals3])

        # interval matrix, exclude first column (Class)
        interval_dataframe = self.dataframe.iloc[:, 1:]
        for column in self.dataframe.select_dtypes(exclude=['object']):
            if 'std' in column:
                interval_list = interval_list_2
            else:
                interval_list = interval_list_1

            interval_dataframe[column] = self.dataframe[column].map(
                lambda value: get_interval_index_of_value(interval_list, value))

        # colors
        color_low = Color('#e60000')  # red
        color_lower_bound = Color('#ffa31a')  # orange
        color_upper_bound = Color('#248f24')  # green
        color_mid = Color('#ffcc00')  # yellow
        color_high = Color('#006622')  # dark green

        colors = list(color_lower_bound.range_to(color_mid, (mid_periods // 2) if mid_periods % 2 == 0 else (
                mid_periods // 2 + 1)))
        colors.extend(
            list(color_mid.range_to(color_upper_bound, mid_periods // 2 + 1)))  # +1 because first one has to be deleted

        # get unique hex values of colors
        unique_colors = []
        for color in colors:
            if color.hex in unique_colors:
                continue
            else:
                unique_colors.append(color.hex)

        colors = [color_low.hex]
        colors.extend(unique_colors)
        colors.append(color_high.hex)

        # fill color matrix
        color_matrix = [['#808080' for _ in range(len(interval_dataframe))]]  # Class column, gray

        # reversed colors for low=good and high=bad
        colors_reversed = colors.copy()
        colors_reversed.reverse()

        # fill color matrix
        for column in interval_dataframe.columns:
            colors = colors_reversed if 'std' in column else colors
            color_matrix.append([colors[value] for value in interval_dataframe[column]])

        # round values, except first column (Class)
        values = self.dataframe
        values.iloc[:, 1:] = values.iloc[:, 1:].applymap(lambda x: round(x, self.precision))

        # create figure
        fig = go.Figure(
            data=[go.Table(
                header=dict(
                    values=values.columns,
                    line_color='white', fill_color='white',
                    align='center', font=dict(color='black', size=9)
                ),

                cells=dict(
                    values=values.T,
                    fill_color=color_matrix,
                    line_color=color_matrix,
                    align='center', font=dict(color='white', size=8)
                )
            )]
        )

        # set layout
        fig.update_layout(
            width=len(self.dataframe.columns) * 100
        )

        return fig
示例#57
0
    def test_construction_from_timestamp(self, closed):
        # combinations of start/end/periods without freq
        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06')
        breaks = date_range(start=start, end=end)
        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)

        result = interval_range(start=start, end=end, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start, periods=5, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=end, periods=5, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # combinations of start/end/periods with fixed freq
        freq = '2D'
        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07')
        breaks = date_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)

        result = interval_range(start=start, end=end, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start, periods=3, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=end, periods=3, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # output truncates early if freq causes end to be skipped.
        end = Timestamp('2017-01-08')
        result = interval_range(start=start, end=end, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # combinations of start/end/periods with non-fixed freq
        freq = 'M'
        start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31')
        breaks = date_range(start=start, end=end, freq=freq)
        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)

        result = interval_range(start=start, end=end, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(start=start, periods=11, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        result = interval_range(end=end, periods=11, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)

        # output truncates early if freq causes end to be skipped.
        end = Timestamp('2018-01-15')
        result = interval_range(start=start, end=end, freq=freq, name='foo',
                                closed=closed)
        tm.assert_index_equal(result, expected)
def create_data():
    """ create the pickle/msgpack data """

    data = {
        u'A': [0., 1., 2., 3., np.nan],
        u'B': [0, 1, 0, 1, 0],
        u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'],
        u'D': date_range('1/1/2009', periods=5),
        u'E': [0., 1, Timestamp('20100101'), u'foo', 2.]
    }

    scalars = dict(timestamp=Timestamp('20130101'),
                   period=Period('2012', 'M'))

    index = dict(int=Index(np.arange(10)),
                 date=date_range('20130101', periods=10),
                 period=period_range('2013-01-01', freq='M', periods=10),
                 float=Index(np.arange(10, dtype=np.float64)),
                 uint=Index(np.arange(10, dtype=np.uint64)),
                 timedelta=timedelta_range('00:00:00', freq='30T', periods=10))

    if _loose_version >= LooseVersion('0.18'):
        from pandas import RangeIndex
        index['range'] = RangeIndex(10)

    if _loose_version >= LooseVersion('0.21'):
        from pandas import interval_range
        index['interval'] = interval_range(0, periods=10)

    mi = dict(reg2=MultiIndex.from_tuples(
        tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo',
                     u'foo', u'qux', u'qux'],
                    [u'one', u'two', u'one', u'two', u'one',
                     u'two', u'one', u'two']])),
        names=[u'first', u'second']))

    series = dict(float=Series(data[u'A']),
                  int=Series(data[u'B']),
                  mixed=Series(data[u'E']),
                  ts=Series(np.arange(10).astype(np.int64),
                            index=date_range('20130101', periods=10)),
                  mi=Series(np.arange(5).astype(np.float64),
                            index=MultiIndex.from_tuples(
                                tuple(zip(*[[1, 1, 2, 2, 2],
                                            [3, 4, 3, 4, 5]])),
                                names=[u'one', u'two'])),
                  dup=Series(np.arange(5).astype(np.float64),
                             index=[u'A', u'B', u'C', u'D', u'A']),
                  cat=Series(Categorical([u'foo', u'bar', u'baz'])),
                  dt=Series(date_range('20130101', periods=5)),
                  dt_tz=Series(date_range('20130101', periods=5,
                                          tz='US/Eastern')),
                  period=Series([Period('2000Q1')] * 5))

    mixed_dup_df = DataFrame(data)
    mixed_dup_df.columns = list(u"ABCDA")
    frame = dict(float=DataFrame({u'A': series[u'float'],
                                  u'B': series[u'float'] + 1}),
                 int=DataFrame({u'A': series[u'int'],
                                u'B': series[u'int'] + 1}),
                 mixed=DataFrame({k: data[k]
                                  for k in [u'A', u'B', u'C', u'D']}),
                 mi=DataFrame({u'A': np.arange(5).astype(np.float64),
                               u'B': np.arange(5).astype(np.int64)},
                              index=MultiIndex.from_tuples(
                                  tuple(zip(*[[u'bar', u'bar', u'baz',
                                               u'baz', u'baz'],
                                              [u'one', u'two', u'one',
                                               u'two', u'three']])),
                                  names=[u'first', u'second'])),
                 dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64),
                               columns=[u'A', u'B', u'A']),
                 cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}),
                 cat_and_float=DataFrame({
                     u'A': Categorical([u'foo', u'bar', u'baz']),
                     u'B': np.arange(3).astype(np.int64)}),
                 mixed_dup=mixed_dup_df,
                 dt_mixed_tzs=DataFrame({
                     u'A': Timestamp('20130102', tz='US/Eastern'),
                     u'B': Timestamp('20130603', tz='CET')}, index=range(5)),
                 dt_mixed2_tzs=DataFrame({
                     u'A': Timestamp('20130102', tz='US/Eastern'),
                     u'B': Timestamp('20130603', tz='CET'),
                     u'C': Timestamp('20130603', tz='UTC')}, index=range(5))
                 )

    with catch_warnings(record=True):
        mixed_dup_panel = Panel({u'ItemA': frame[u'float'],
                                 u'ItemB': frame[u'int']})
        mixed_dup_panel.items = [u'ItemA', u'ItemA']
        panel = dict(float=Panel({u'ItemA': frame[u'float'],
                                  u'ItemB': frame[u'float'] + 1}),
                     dup=Panel(
                         np.arange(30).reshape(3, 5, 2).astype(np.float64),
                         items=[u'A', u'B', u'A']),
                     mixed_dup=mixed_dup_panel)

    cat = dict(int8=Categorical(list('abcdefg')),
               int16=Categorical(np.arange(1000)),
               int32=Categorical(np.arange(10000)))

    timestamp = dict(normal=Timestamp('2011-01-01'),
                     nat=NaT,
                     tz=Timestamp('2011-01-01', tz='US/Eastern'))

    if _loose_version < LooseVersion('0.19.2'):
        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
                                      offset='M')
    else:
        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
                                      freq='M')

    off = {'DateOffset': DateOffset(years=1),
           'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824),
           'BusinessDay': BusinessDay(offset=timedelta(seconds=9)),
           'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'),
           'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'),
           'SemiMonthBegin': SemiMonthBegin(day_of_month=9),
           'SemiMonthEnd': SemiMonthEnd(day_of_month=24),
           'MonthBegin': MonthBegin(1),
           'MonthEnd': MonthEnd(1),
           'QuarterBegin': QuarterBegin(1),
           'QuarterEnd': QuarterEnd(1),
           'Day': Day(1),
           'YearBegin': YearBegin(1),
           'YearEnd': YearEnd(1),
           'Week': Week(1),
           'Week_Tues': Week(2, normalize=False, weekday=1),
           'WeekOfMonth': WeekOfMonth(week=3, weekday=4),
           'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3),
           'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"),
           'Easter': Easter(),
           'Hour': Hour(1),
           'Minute': Minute(1)}

    return dict(series=series,
                frame=frame,
                panel=panel,
                index=index,
                scalars=scalars,
                mi=mi,
                sp_series=dict(float=_create_sp_series(),
                               ts=_create_sp_tsseries()),
                sp_frame=dict(float=_create_sp_frame()),
                cat=cat,
                timestamp=timestamp,
                offsets=off)
示例#59
0
 def test_set_closed_errors(self, bad_closed):
     # GH 21670
     index = interval_range(0, 5)
     msg = "invalid option for 'closed': {closed}".format(closed=bad_closed)
     with pytest.raises(ValueError, match=msg):
         index.set_closed(bad_closed)
示例#60
0
class TestSeriesInterpolateData:
    def test_interpolate(self, datetime_series, string_series):
        ts = Series(np.arange(len(datetime_series), dtype=float),
                    datetime_series.index)

        ts_copy = ts.copy()
        ts_copy[5:10] = np.NaN

        linear_interp = ts_copy.interpolate(method="linear")
        tm.assert_series_equal(linear_interp, ts)

        ord_ts = Series([d.toordinal() for d in datetime_series.index],
                        index=datetime_series.index).astype(float)

        ord_ts_copy = ord_ts.copy()
        ord_ts_copy[5:10] = np.NaN

        time_interp = ord_ts_copy.interpolate(method="time")
        tm.assert_series_equal(time_interp, ord_ts)

    def test_interpolate_time_raises_for_non_timeseries(self):
        # When method='time' is used on a non-TimeSeries that contains a null
        # value, a ValueError should be raised.
        non_ts = Series([0, 1, 2, np.NaN])
        msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex"
        with pytest.raises(ValueError, match=msg):
            non_ts.interpolate(method="time")

    @td.skip_if_no_scipy
    def test_interpolate_pchip(self):

        ser = Series(np.sort(np.random.uniform(size=100)))

        # interpolate at new_index
        new_index = ser.index.union(
            Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])).astype(float)
        interp_s = ser.reindex(new_index).interpolate(method="pchip")
        # does not blow up, GH5977
        interp_s[49:51]

    @td.skip_if_no_scipy
    def test_interpolate_akima(self):

        ser = Series([10, 11, 12, 13])

        expected = Series(
            [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
            index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
        )
        # interpolate at new_index
        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5,
                                           2.75])).astype(float)
        interp_s = ser.reindex(new_index).interpolate(method="akima")
        tm.assert_series_equal(interp_s[1:3], expected)

    @td.skip_if_no_scipy
    def test_interpolate_piecewise_polynomial(self):
        ser = Series([10, 11, 12, 13])

        expected = Series(
            [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
            index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
        )
        # interpolate at new_index
        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5,
                                           2.75])).astype(float)
        interp_s = ser.reindex(new_index).interpolate(
            method="piecewise_polynomial")
        tm.assert_series_equal(interp_s[1:3], expected)

    @td.skip_if_no_scipy
    def test_interpolate_from_derivatives(self):
        ser = Series([10, 11, 12, 13])

        expected = Series(
            [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
            index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
        )
        # interpolate at new_index
        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5,
                                           2.75])).astype(float)
        interp_s = ser.reindex(new_index).interpolate(
            method="from_derivatives")
        tm.assert_series_equal(interp_s[1:3], expected)

    @pytest.mark.parametrize(
        "kwargs",
        [
            {},
            pytest.param({
                "method": "polynomial",
                "order": 1
            },
                         marks=td.skip_if_no_scipy),
        ],
    )
    def test_interpolate_corners(self, kwargs):
        s = Series([np.nan, np.nan])
        tm.assert_series_equal(s.interpolate(**kwargs), s)

        s = Series([], dtype=object).interpolate()
        tm.assert_series_equal(s.interpolate(**kwargs), s)

    def test_interpolate_index_values(self):
        s = Series(np.nan, index=np.sort(np.random.rand(30)))
        s[::3] = np.random.randn(10)

        vals = s.index.values.astype(float)

        result = s.interpolate(method="index")

        expected = s.copy()
        bad = isna(expected.values)
        good = ~bad
        expected = Series(np.interp(vals[bad], vals[good], s.values[good]),
                          index=s.index[bad])

        tm.assert_series_equal(result[bad], expected)

        # 'values' is synonymous with 'index' for the method kwarg
        other_result = s.interpolate(method="values")

        tm.assert_series_equal(other_result, result)
        tm.assert_series_equal(other_result[bad], expected)

    def test_interpolate_non_ts(self):
        s = Series([1, 3, np.nan, np.nan, np.nan, 11])
        msg = (
            "time-weighted interpolation only works on Series or DataFrames "
            "with a DatetimeIndex")
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method="time")

    @pytest.mark.parametrize(
        "kwargs",
        [
            {},
            pytest.param({
                "method": "polynomial",
                "order": 1
            },
                         marks=td.skip_if_no_scipy),
        ],
    )
    def test_nan_interpolate(self, kwargs):
        s = Series([0, 1, np.nan, 3])
        result = s.interpolate(**kwargs)
        expected = Series([0.0, 1.0, 2.0, 3.0])
        tm.assert_series_equal(result, expected)

    def test_nan_irregular_index(self):
        s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
        result = s.interpolate()
        expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
        tm.assert_series_equal(result, expected)

    def test_nan_str_index(self):
        s = Series([0, 1, 2, np.nan], index=list("abcd"))
        result = s.interpolate()
        expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd"))
        tm.assert_series_equal(result, expected)

    @td.skip_if_no_scipy
    def test_interp_quad(self):
        sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
        result = sq.interpolate(method="quadratic")
        expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4])
        tm.assert_series_equal(result, expected)

    @td.skip_if_no_scipy
    def test_interp_scipy_basic(self):
        s = Series([1, 3, np.nan, 12, np.nan, 25])
        # slinear
        expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0])
        result = s.interpolate(method="slinear")
        tm.assert_series_equal(result, expected)

        result = s.interpolate(method="slinear", downcast="infer")
        tm.assert_series_equal(result, expected)
        # nearest
        expected = Series([1, 3, 3, 12, 12, 25])
        result = s.interpolate(method="nearest")
        tm.assert_series_equal(result, expected.astype("float"))

        result = s.interpolate(method="nearest", downcast="infer")
        tm.assert_series_equal(result, expected)
        # zero
        expected = Series([1, 3, 3, 12, 12, 25])
        result = s.interpolate(method="zero")
        tm.assert_series_equal(result, expected.astype("float"))

        result = s.interpolate(method="zero", downcast="infer")
        tm.assert_series_equal(result, expected)
        # quadratic
        # GH #15662.
        expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0])
        result = s.interpolate(method="quadratic")
        tm.assert_series_equal(result, expected)

        result = s.interpolate(method="quadratic", downcast="infer")
        tm.assert_series_equal(result, expected)
        # cubic
        expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0])
        result = s.interpolate(method="cubic")
        tm.assert_series_equal(result, expected)

    def test_interp_limit(self):
        s = Series([1, 3, np.nan, np.nan, np.nan, 11])

        expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
        result = s.interpolate(method="linear", limit=2)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize("limit", [-1, 0])
    def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method,
                                                   limit):
        # GH 9217: make sure limit is greater than zero.
        s = pd.Series([1, 2, np.nan, 4])
        method, kwargs = nontemporal_method
        with pytest.raises(ValueError, match="Limit must be greater than 0"):
            s.interpolate(limit=limit, method=method, **kwargs)

    def test_interpolate_invalid_float_limit(self, nontemporal_method):
        # GH 9217: make sure limit is an integer.
        s = pd.Series([1, 2, np.nan, 4])
        method, kwargs = nontemporal_method
        limit = 2.0
        with pytest.raises(ValueError, match="Limit must be an integer"):
            s.interpolate(limit=limit, method=method, **kwargs)

    @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"])
    def test_interp_invalid_method(self, invalid_method):
        s = Series([1, 3, np.nan, 12, np.nan, 25])

        msg = f"method must be one of.* Got '{invalid_method}' instead"
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method=invalid_method)

        # When an invalid method and invalid limit (such as -1) are
        # provided, the error message reflects the invalid method.
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method=invalid_method, limit=-1)

    def test_interp_limit_forward(self):
        s = Series([1, 3, np.nan, np.nan, np.nan, 11])

        # Provide 'forward' (the default) explicitly here.
        expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])

        result = s.interpolate(method="linear",
                               limit=2,
                               limit_direction="forward")
        tm.assert_series_equal(result, expected)

        result = s.interpolate(method="linear",
                               limit=2,
                               limit_direction="FORWARD")
        tm.assert_series_equal(result, expected)

    def test_interp_unlimited(self):
        # these test are for issue #16282 default Limit=None is unlimited
        s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan])
        expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
        result = s.interpolate(method="linear", limit_direction="both")
        tm.assert_series_equal(result, expected)

        expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
        result = s.interpolate(method="linear", limit_direction="forward")
        tm.assert_series_equal(result, expected)

        expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan])
        result = s.interpolate(method="linear", limit_direction="backward")
        tm.assert_series_equal(result, expected)

    def test_interp_limit_bad_direction(self):
        s = Series([1, 3, np.nan, np.nan, np.nan, 11])

        msg = (r"Invalid limit_direction: expecting one of \['forward', "
               r"'backward', 'both'\], got 'abc'")
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method="linear", limit=2, limit_direction="abc")

        # raises an error even if no limit is specified.
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method="linear", limit_direction="abc")

    # limit_area introduced GH #16284
    def test_interp_limit_area(self):
        # These tests are for issue #9218 -- fill NaNs in both directions.
        s = Series(
            [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan])

        expected = Series(
            [np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan])
        result = s.interpolate(method="linear", limit_area="inside")
        tm.assert_series_equal(result, expected)

        expected = Series(
            [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan])
        result = s.interpolate(method="linear", limit_area="inside", limit=1)
        tm.assert_series_equal(result, expected)

        expected = Series(
            [np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan])
        result = s.interpolate(method="linear",
                               limit_area="inside",
                               limit_direction="both",
                               limit=1)
        tm.assert_series_equal(result, expected)

        expected = Series(
            [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0])
        result = s.interpolate(method="linear", limit_area="outside")
        tm.assert_series_equal(result, expected)

        expected = Series(
            [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
        result = s.interpolate(method="linear", limit_area="outside", limit=1)
        tm.assert_series_equal(result, expected)

        expected = Series(
            [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
        result = s.interpolate(method="linear",
                               limit_area="outside",
                               limit_direction="both",
                               limit=1)
        tm.assert_series_equal(result, expected)

        expected = Series(
            [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan])
        result = s.interpolate(method="linear",
                               limit_area="outside",
                               limit_direction="backward")
        tm.assert_series_equal(result, expected)

        # raises an error even if limit type is wrong.
        msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc"
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method="linear", limit_area="abc")

    def test_interp_limit_direction(self):
        # These tests are for issue #9218 -- fill NaNs in both directions.
        s = Series([1, 3, np.nan, np.nan, np.nan, 11])

        expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0])
        result = s.interpolate(method="linear",
                               limit=2,
                               limit_direction="backward")
        tm.assert_series_equal(result, expected)

        expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0])
        result = s.interpolate(method="linear",
                               limit=1,
                               limit_direction="both")
        tm.assert_series_equal(result, expected)

        # Check that this works on a longer series of nans.
        s = Series(
            [1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan])

        expected = Series(
            [1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
        result = s.interpolate(method="linear",
                               limit=2,
                               limit_direction="both")
        tm.assert_series_equal(result, expected)

        expected = Series(
            [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
        result = s.interpolate(method="linear",
                               limit=1,
                               limit_direction="both")
        tm.assert_series_equal(result, expected)

    def test_interp_limit_to_ends(self):
        # These test are for issue #10420 -- flow back to beginning.
        s = Series([np.nan, np.nan, 5, 7, 9, np.nan])

        expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan])
        result = s.interpolate(method="linear",
                               limit=2,
                               limit_direction="backward")
        tm.assert_series_equal(result, expected)

        expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0])
        result = s.interpolate(method="linear",
                               limit=2,
                               limit_direction="both")
        tm.assert_series_equal(result, expected)

    def test_interp_limit_before_ends(self):
        # These test are for issue #11115 -- limit ends properly.
        s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])

        expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan])
        result = s.interpolate(method="linear",
                               limit=1,
                               limit_direction="forward")
        tm.assert_series_equal(result, expected)

        expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan])
        result = s.interpolate(method="linear",
                               limit=1,
                               limit_direction="backward")
        tm.assert_series_equal(result, expected)

        expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan])
        result = s.interpolate(method="linear",
                               limit=1,
                               limit_direction="both")
        tm.assert_series_equal(result, expected)

    @td.skip_if_no_scipy
    def test_interp_all_good(self):
        s = Series([1, 2, 3])
        result = s.interpolate(method="polynomial", order=1)
        tm.assert_series_equal(result, s)

        # non-scipy
        result = s.interpolate()
        tm.assert_series_equal(result, s)

    @pytest.mark.parametrize(
        "check_scipy",
        [False, pytest.param(True, marks=td.skip_if_no_scipy)])
    def test_interp_multiIndex(self, check_scipy):
        idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
        s = Series([1, 2, np.nan], index=idx)

        expected = s.copy()
        expected.loc[2] = 2
        result = s.interpolate()
        tm.assert_series_equal(result, expected)

        msg = "Only `method=linear` interpolation is supported on MultiIndexes"
        if check_scipy:
            with pytest.raises(ValueError, match=msg):
                s.interpolate(method="polynomial", order=1)

    @td.skip_if_no_scipy
    def test_interp_nonmono_raise(self):
        s = Series([1, np.nan, 3], index=[0, 2, 1])
        msg = "krogh interpolation requires that the index be monotonic"
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method="krogh")

    @td.skip_if_no_scipy
    @pytest.mark.parametrize("method", ["nearest", "pad"])
    def test_interp_datetime64(self, method, tz_naive_fixture):
        df = Series([1, np.nan, 3],
                    index=date_range("1/1/2000",
                                     periods=3,
                                     tz=tz_naive_fixture))
        result = df.interpolate(method=method)
        expected = Series(
            [1.0, 1.0, 3.0],
            index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
        )
        tm.assert_series_equal(result, expected)

    def test_interp_pad_datetime64tz_values(self):
        # GH#27628 missing.interpolate_2d should handle datetimetz values
        dti = pd.date_range("2015-04-05", periods=3, tz="US/Central")
        ser = pd.Series(dti)
        ser[1] = pd.NaT
        result = ser.interpolate(method="pad")

        expected = pd.Series(dti)
        expected[1] = expected[0]
        tm.assert_series_equal(result, expected)

    def test_interp_limit_no_nans(self):
        # GH 7173
        s = pd.Series([1.0, 2.0, 3.0])
        result = s.interpolate(limit=1)
        expected = s
        tm.assert_series_equal(result, expected)

    @td.skip_if_no_scipy
    @pytest.mark.parametrize("method", ["polynomial", "spline"])
    def test_no_order(self, method):
        # see GH-10633, GH-24014
        s = Series([0, 1, np.nan, 3])
        msg = "You must specify the order of the spline or polynomial"
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method=method)

    @td.skip_if_no_scipy
    @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan])
    def test_interpolate_spline_invalid_order(self, order):
        s = Series([0, 1, np.nan, 3])
        msg = "order needs to be specified and greater than 0"
        with pytest.raises(ValueError, match=msg):
            s.interpolate(method="spline", order=order)

    @td.skip_if_no_scipy
    def test_spline(self):
        s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
        result = s.interpolate(method="spline", order=1)
        expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
        tm.assert_series_equal(result, expected)

    @td.skip_if_no_scipy
    def test_spline_extrapolate(self):
        s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
        result3 = s.interpolate(method="spline", order=1, ext=3)
        expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0])
        tm.assert_series_equal(result3, expected3)

        result1 = s.interpolate(method="spline", order=1, ext=0)
        expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
        tm.assert_series_equal(result1, expected1)

    @td.skip_if_no_scipy
    def test_spline_smooth(self):
        s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
        assert (s.interpolate(method="spline", order=3, s=0)[5] !=
                s.interpolate(method="spline", order=3)[5])

    @td.skip_if_no_scipy
    def test_spline_interpolation(self):
        s = Series(np.arange(10)**2)
        s[np.random.randint(0, 9, 3)] = np.nan
        result1 = s.interpolate(method="spline", order=1)
        expected1 = s.interpolate(method="spline", order=1)
        tm.assert_series_equal(result1, expected1)

    def test_interp_timedelta64(self):
        # GH 6424
        df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3]))
        result = df.interpolate(method="time")
        expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3]))
        tm.assert_series_equal(result, expected)

        # test for non uniform spacing
        df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4]))
        result = df.interpolate(method="time")
        expected = Series([1.0, 1.666667, 3.0],
                          index=pd.to_timedelta([1, 2, 4]))
        tm.assert_series_equal(result, expected)

    def test_series_interpolate_method_values(self):
        # GH#1646
        rng = date_range("1/1/2000", "1/20/2000", freq="D")
        ts = Series(np.random.randn(len(rng)), index=rng)

        ts[::2] = np.nan

        result = ts.interpolate(method="values")
        exp = ts.interpolate()
        tm.assert_series_equal(result, exp)

    def test_series_interpolate_intraday(self):
        # #1698
        index = pd.date_range("1/1/2012", periods=4, freq="12D")
        ts = pd.Series([0, 12, 24, 36], index)
        new_index = index.append(index + pd.DateOffset(days=1)).sort_values()

        exp = ts.reindex(new_index).interpolate(method="time")

        index = pd.date_range("1/1/2012", periods=4, freq="12H")
        ts = pd.Series([0, 12, 24, 36], index)
        new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
        result = ts.reindex(new_index).interpolate(method="time")

        tm.assert_numpy_array_equal(result.values, exp.values)

    @pytest.mark.parametrize(
        "ind",
        [
            ["a", "b", "c", "d"],
            pd.period_range(start="2019-01-01", periods=4),
            pd.interval_range(start=0, end=4),
        ],
    )
    def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
        # gh 21662
        df = pd.DataFrame([0, 1, np.nan, 3], index=ind)

        method, kwargs = interp_methods_ind
        if method == "pchip":
            pytest.importorskip("scipy")

        if method == "linear":
            result = df[0].interpolate(**kwargs)
            expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
            tm.assert_series_equal(result, expected)
        else:
            expected_error = (
                "Index column must be numeric or datetime type when "
                f"using {method} method other than linear. "
                "Try setting a numeric or datetime index column before "
                "interpolating.")
            with pytest.raises(ValueError, match=expected_error):
                df[0].interpolate(method=method, **kwargs)

    def test_interpolate_timedelta_index(self, interp_methods_ind):
        """
        Tests for non numerical index types  - object, period, timedelta
        Note that all methods except time, index, nearest and values
        are tested here.
        """
        # gh 21662
        ind = pd.timedelta_range(start=1, periods=4)
        df = pd.DataFrame([0, 1, np.nan, 3], index=ind)

        method, kwargs = interp_methods_ind
        if method == "pchip":
            pytest.importorskip("scipy")

        if method in {"linear", "pchip"}:
            result = df[0].interpolate(method=method, **kwargs)
            expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
            tm.assert_series_equal(result, expected)
        else:
            pytest.skip(
                "This interpolation method is not supported for Timedelta Index yet."
            )

    @pytest.mark.parametrize(
        "ascending, expected_values",
        [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])],
    )
    def test_interpolate_unsorted_index(self, ascending, expected_values):
        # GH 21037
        ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1])
        result = ts.sort_index(ascending=ascending).interpolate(method="index")
        expected = pd.Series(data=expected_values,
                             index=expected_values,
                             dtype=float)
        tm.assert_series_equal(result, expected)