def test_float_subtype(self, start, end, freq): # Has float subtype if any of start/end/freq are float, even if all # resulting endpoints can safely be upcast to integers # defined from start/end/freq index = interval_range(start=start, end=end, freq=freq) result = index.dtype.subtype expected = 'int64' if is_integer(start + end + freq) else 'float64' assert result == expected # defined from start/periods/freq index = interval_range(start=start, periods=5, freq=freq) result = index.dtype.subtype expected = 'int64' if is_integer(start + freq) else 'float64' assert result == expected # defined from end/periods/freq index = interval_range(end=end, periods=5, freq=freq) result = index.dtype.subtype expected = 'int64' if is_integer(end + freq) else 'float64' assert result == expected # GH 20976: linspace behavior defined from start/end/periods index = interval_range(start=start, end=end, periods=5) result = index.dtype.subtype expected = 'int64' if is_integer(start + end) else 'float64' assert result == expected
def test_constructor_timestamp(self, closed, name, freq, periods, tz): start, end = Timestamp('20180101', tz=tz), Timestamp('20181231', tz=tz) breaks = date_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) # defined from start/end/freq result = interval_range( start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from start/periods/freq result = interval_range( start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from end/periods/freq result = interval_range( end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # GH 20976: linspace behavior defined from start/end/periods if not breaks.freq.isAnchored() and tz is None: # matches expected only for non-anchored offsets and tz naive # (anchored/DST transitions cause unequal spacing in expected) result = interval_range(start=start, end=end, periods=periods, name=name, closed=closed) tm.assert_index_equal(result, expected)
def test_interval_array_equal_message(self): a = pd.interval_range(0, periods=4).values b = pd.interval_range(1, periods=4).values msg = textwrap.dedent("""\ IntervalArray.left are different IntervalArray.left values are different \\(100.0 %\\) \\[left\\]: Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""") with tm.assert_raises_regex(AssertionError, msg): tm.assert_interval_array_equal(a, b)
def test_interval_array_equal_periods_mismatch(): kwargs = dict(start=0) arr1 = interval_range(periods=5, **kwargs).values arr2 = interval_range(periods=6, **kwargs).values msg = """\ IntervalArray.left are different IntervalArray.left length are different \\[left\\]: 5, Int64Index\\(\\[0, 1, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: 6, Int64Index\\(\\[0, 1, 2, 3, 4, 5\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): assert_interval_array_equal(arr1, arr2)
def test_interval_array_equal_end_mismatch(): kwargs = dict(start=0, periods=5) arr1 = interval_range(end=10, **kwargs).values arr2 = interval_range(end=20, **kwargs).values msg = """\ IntervalArray.left are different IntervalArray.left values are different \\(80.0 %\\) \\[left\\]: Int64Index\\(\\[0, 2, 4, 6, 8\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[0, 4, 8, 12, 16\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): assert_interval_array_equal(arr1, arr2)
def test_interval_array_equal_start_mismatch(): kwargs = dict(periods=4) arr1 = interval_range(start=0, **kwargs).values arr2 = interval_range(start=1, **kwargs).values msg = """\ IntervalArray.left are different IntervalArray.left values are different \\(100.0 %\\) \\[left\\]: Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): assert_interval_array_equal(arr1, arr2)
def test_interval_array_equal_closed_mismatch(): kwargs = dict(start=0, periods=5) arr1 = interval_range(closed="left", **kwargs).values arr2 = interval_range(closed="right", **kwargs).values msg = """\ IntervalArray are different Attribute "closed" are different \\[left\\]: left \\[right\\]: right""" with pytest.raises(AssertionError, match=msg): assert_interval_array_equal(arr1, arr2)
def test_set_incompatible_types(self, closed, op_name, sort): index = monotonic_index(0, 11, closed=closed) set_op = getattr(index, op_name) # TODO: standardize return type of non-union setops type(self vs other) # non-IntervalIndex if op_name == 'difference': expected = index else: expected = getattr(index.astype('O'), op_name)(Index([1, 2, 3])) result = set_op(Index([1, 2, 3]), sort=sort) tm.assert_index_equal(result, expected) # mixed closed msg = ('can only do set operations between two IntervalIndex objects ' 'that are closed on the same side') for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: other = monotonic_index(0, 11, closed=other_closed) with pytest.raises(ValueError, match=msg): set_op(other, sort=sort) # GH 19016: incompatible dtypes other = interval_range(Timestamp('20180101'), periods=9, closed=closed) msg = ('can only do {op} between two IntervalIndex objects that have ' 'compatible dtypes').format(op=op_name) with pytest.raises(TypeError, match=msg): set_op(other, sort=sort)
def test_subtype_integer_errors(self): # float64 -> uint64 fails with negative values index = interval_range(-10.0, 10.0) dtype = IntervalDtype('uint64') with pytest.raises(ValueError): index.astype(dtype) # float64 -> integer-like fails with non-integer valued floats index = interval_range(0.0, 10.0, freq=0.25) dtype = IntervalDtype('int64') with pytest.raises(ValueError): index.astype(dtype) dtype = IntervalDtype('uint64') with pytest.raises(ValueError): index.astype(dtype)
def test_cython_agg_empty_buckets_nanops(observed): # GH-18869 can't call nanops on empty groups, so hardcode expected # for these df = pd.DataFrame([11, 12, 13], columns=['a']) grps = range(0, 25, 5) # add / sum result = df.groupby(pd.cut(df['a'], grps), observed=observed)._cython_agg_general('add') intervals = pd.interval_range(0, 20, freq=5) expected = pd.DataFrame( {"a": [0, 0, 36, 0]}, index=pd.CategoricalIndex(intervals, name='a', ordered=True)) if observed: expected = expected[expected.a != 0] tm.assert_frame_equal(result, expected) # prod result = df.groupby(pd.cut(df['a'], grps), observed=observed)._cython_agg_general('prod') expected = pd.DataFrame( {"a": [1, 1, 1716, 1]}, index=pd.CategoricalIndex(intervals, name='a', ordered=True)) if observed: expected = expected[expected.a != 1] tm.assert_frame_equal(result, expected)
def _fully_loaded_dataframe_example(): from distutils.version import LooseVersion index = pd.MultiIndex.from_arrays([ pd.date_range('2000-01-01', periods=5).repeat(2), np.tile(np.array(['foo', 'bar'], dtype=object), 5) ]) c1 = pd.date_range('2000-01-01', periods=10) data = { 0: c1, 1: c1.tz_localize('utc'), 2: c1.tz_localize('US/Eastern'), 3: c1[::2].tz_localize('utc').repeat(2).astype('category'), 4: ['foo', 'bar'] * 5, 5: pd.Series(['foo', 'bar'] * 5).astype('category').values, 6: [True, False] * 5, 7: np.random.randn(10), 8: np.random.randint(0, 100, size=10), 9: pd.period_range('2013', periods=10, freq='M') } if LooseVersion(pd.__version__) >= '0.21': # There is an issue with pickling IntervalIndex in pandas 0.20.x data[10] = pd.interval_range(start=1, freq=1, periods=10) return pd.DataFrame(data, index=index)
def test_subtype_datetimelike(self): # datetime -> timedelta raises dtype = IntervalDtype('timedelta64[ns]') msg = 'Cannot convert .* to .*; subtypes are incompatible' index = interval_range(Timestamp('2018-01-01'), periods=10) with pytest.raises(TypeError, match=msg): index.astype(dtype) index = interval_range(Timestamp('2018-01-01', tz='CET'), periods=10) with pytest.raises(TypeError, match=msg): index.astype(dtype) # timedelta -> datetime raises dtype = IntervalDtype('datetime64[ns]') index = interval_range(Timedelta('0 days'), periods=10) with pytest.raises(TypeError, match=msg): index.astype(dtype)
def test_no_invalid_float_truncation(self, start, end, freq): # GH 21161 if freq is None: breaks = [0.5, 1.5, 2.5, 3.5, 4.5] else: breaks = [0.5, 2.0, 3.5, 5.0, 6.5] expected = IntervalIndex.from_breaks(breaks) result = interval_range(start=start, end=end, periods=4, freq=freq) tm.assert_index_equal(result, expected)
def test_constructor_coverage(self): # float value for periods expected = interval_range(start=0, periods=10) result = interval_range(start=0, periods=10.5) tm.assert_index_equal(result, expected) # equivalent timestamp-like start/end start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15') expected = interval_range(start=start, end=end) result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) tm.assert_index_equal(result, expected) result = interval_range(start=start.asm8, end=end.asm8) tm.assert_index_equal(result, expected) # equivalent freq with timestamp equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1), DateOffset(days=1)] for freq in equiv_freq: result = interval_range(start=start, end=end, freq=freq) tm.assert_index_equal(result, expected) # equivalent timedelta-like start/end start, end = Timedelta(days=1), Timedelta(days=10) expected = interval_range(start=start, end=end) result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) tm.assert_index_equal(result, expected) result = interval_range(start=start.asm8, end=end.asm8) tm.assert_index_equal(result, expected) # equivalent freq with timedelta equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)] for freq in equiv_freq: result = interval_range(start=start, end=end, freq=freq) tm.assert_index_equal(result, expected)
def test_subtype_integer(self, subtype): index = interval_range(0.0, 10.0) dtype = IntervalDtype(subtype) result = index.astype(dtype) expected = IntervalIndex.from_arrays(index.left.astype(subtype), index.right.astype(subtype), closed=index.closed) tm.assert_index_equal(result, expected) # raises with NA msg = 'Cannot convert NA to integer' with pytest.raises(ValueError, match=msg): index.insert(0, np.nan).astype(dtype)
def test_constructor_numeric(self, closed, name, freq, periods): start, end = 0, 100 breaks = np.arange(101, step=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) # defined from start/end/freq result = interval_range( start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from start/periods/freq result = interval_range( start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from end/periods/freq result = interval_range( end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # GH 20976: linspace behavior defined from start/end/periods result = interval_range( start=start, end=end, periods=periods, name=name, closed=closed) tm.assert_index_equal(result, expected)
def test_constructor_timedelta(self, closed, name, freq, periods): start, end = Timedelta('0 days'), Timedelta('100 days') breaks = timedelta_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) # defined from start/end/freq result = interval_range( start=start, end=end, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from start/periods/freq result = interval_range( start=start, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # defined from end/periods/freq result = interval_range( end=end, periods=periods, freq=freq, name=name, closed=closed) tm.assert_index_equal(result, expected) # GH 20976: linspace behavior defined from start/end/periods result = interval_range( start=start, end=end, periods=periods, name=name, closed=closed) tm.assert_index_equal(result, expected)
def test_set_operation_errors(self, closed, op_name): index = self.create_index(closed=closed) set_op = getattr(index, op_name) # non-IntervalIndex msg = ('can only do set operations between two IntervalIndex objects ' 'that are closed on the same side') with tm.assert_raises_regex(ValueError, msg): set_op(Index([1, 2, 3])) # mixed closed for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: other = self.create_index(closed=other_closed) with tm.assert_raises_regex(ValueError, msg): set_op(other) # GH 19016: incompatible dtypes other = interval_range(Timestamp('20180101'), periods=9, closed=closed) msg = ('can only do {op} between two IntervalIndex objects that have ' 'compatible dtypes').format(op=op_name) with tm.assert_raises_regex(TypeError, msg): set_op(other)
def test_construction_from_timedelta(self, closed): # combinations of start/end/periods without freq start, end = Timedelta('1 day'), Timedelta('6 days') breaks = timedelta_range(start=start, end=end) expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) result = interval_range(start=start, end=end, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=start, periods=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=end, periods=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) # combinations of start/end/periods with fixed freq freq = '2D' start, end = Timedelta('1 day'), Timedelta('7 days') breaks = timedelta_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) result = interval_range(start=start, end=end, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=start, periods=3, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=end, periods=3, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) # output truncates early if freq causes end to be skipped. end = Timedelta('7 days 1 hour') result = interval_range(start=start, end=end, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected)
def test_construction_from_numeric(self, closed): # combinations of start/end/periods without freq expected = IntervalIndex.from_breaks( np.arange(0, 6), name='foo', closed=closed) result = interval_range(start=0, end=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=0, periods=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=5, periods=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) # combinations of start/end/periods with freq expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)], name='foo', closed=closed) result = interval_range(start=0, end=6, freq=2, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=0, periods=3, freq=2, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=6, periods=3, freq=2, name='foo', closed=closed) tm.assert_index_equal(result, expected) # output truncates early if freq causes end to be skipped. expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)], name='foo', closed=closed) result = interval_range(start=0, end=4, freq=1.5, name='foo', closed=closed) tm.assert_index_equal(result, expected)
class TestIntervalIndex(Base): _holder = IntervalIndex def setup_method(self, method): self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan, (1, 2)]) self.indices = dict(intervalIndex=tm.makeIntervalIndex(10)) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.repeat(False, len(index)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.repeat(True, len(index)) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(index._ndarray_values, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_equals(self, closed): expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) assert expected.equals(expected) assert expected.equals(expected.copy()) assert not expected.equals(expected.astype(object)) assert not expected.equals(np.array(expected)) assert not expected.equals(list(expected)) assert not expected.equals([1, 2]) assert not expected.equals(np.array([1, 2])) assert not expected.equals(pd.date_range("20130101", periods=2)) expected_name1 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name="foo") expected_name2 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name="bar") assert expected.equals(expected_name1) assert expected_name1.equals(expected_name2) for other_closed in {"left", "right", "both", "neither"} - {closed}: expected_other_closed = IntervalIndex.from_breaks( np.arange(5), closed=other_closed) assert not expected.equals(expected_other_closed) @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) def test_where(self, closed, klass): idx = self.create_index(closed=closed) cond = [True] * len(idx) expected = idx result = expected.where(klass(cond)) tm.assert_index_equal(result, expected) cond = [False] + [True] * len(idx[1:]) expected = IntervalIndex([np.nan] + idx[1:].tolist()) result = idx.where(klass(cond)) tm.assert_index_equal(result, expected) def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = "can only insert Interval objects and NA into an IntervalIndex" with pytest.raises(ValueError, match=msg): data.insert(1, "foo") # invalid closed msg = "inserted item must be closed on the same side as the index" for closed in {"left", "right", "both", "neither"} - {item.closed}: with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_take(self, closed): index = self.create_index(closed=closed) result = index.take(range(10)) tm.assert_index_equal(result, index) result = index.take([0, 0, 1]) expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True @pytest.mark.skip(reason="not a valid repr as we use interval notation") def test_repr(self): i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed="right") expected = ("IntervalIndex(left=[0, 1]," "\n right=[1, 2]," "\n closed='right'," "\n dtype='interval[int64]')") assert repr(i) == expected i = IntervalIndex.from_tuples( (Timestamp("20130101"), Timestamp("20130102")), (Timestamp("20130102"), Timestamp("20130103")), closed="right", ) expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02']," "\n right=['2013-01-02', '2013-01-03']," "\n closed='right'," "\n dtype='interval[datetime64[ns]]')") assert repr(i) == expected @pytest.mark.skip(reason="not a valid repr as we use interval notation") def test_repr_max_seq_item_setting(self): super().test_repr_max_seq_item_setting() @pytest.mark.skip(reason="not a valid repr as we use interval notation") def test_repr_roundtrip(self): super().test_repr_roundtrip() def test_frame_repr(self): # https://github.com/pandas-dev/pandas/pull/24134/files df = pd.DataFrame({"A": [1, 2, 3, 4]}, index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])) result = repr(df) expected = " A\n(0, 1] 1\n(1, 2] 2\n(2, 3] 3\n(3, 4] 4" assert result == expected @pytest.mark.parametrize( "constructor,expected", [ ( pd.Series, ("(0.0, 1.0] a\n" "NaN b\n" "(2.0, 3.0] c\n" "dtype: object"), ), ( pd.DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c"), ), ], ) def test_repr_missing(self, constructor, expected): # GH 25984 index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) obj = constructor(list("abc"), index=index) result = repr(obj) assert result == expected def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6]) def test_get_loc_length_one_scalar(self, scalar, closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) if scalar in index[0]: result = index.get_loc(scalar) assert result == 0 else: with pytest.raises(KeyError, match=str(scalar)): index.get_loc(scalar) @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"]) @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)]) def test_get_loc_length_one_interval(self, left, right, closed, other_closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) interval = Interval(left, right, closed=other_closed) if interval == index[0]: result = index.get_loc(interval) assert result == 0 else: with pytest.raises( KeyError, match=re.escape( "Interval({left}, {right}, closed='{other_closed}')". format(left=left, right=right, other_closed=other_closed)), ): index.get_loc(interval) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_get_loc_datetimelike_nonoverlapping(self, breaks): # GH 20636 # nonoverlapping = IntervalIndex method and no i8 conversion index = IntervalIndex.from_breaks(breaks) value = index[0].mid result = index.get_loc(value) expected = 0 assert result == expected interval = Interval(index[0].left, index[0].right) result = index.get_loc(interval) expected = 0 assert result == expected @pytest.mark.parametrize( "arrays", [ (date_range("20180101", periods=4), date_range("20180103", periods=4)), ( date_range("20180101", periods=4, tz="US/Eastern"), date_range("20180103", periods=4, tz="US/Eastern"), ), ( timedelta_range("0 days", periods=4), timedelta_range("2 days", periods=4), ), ], ids=lambda x: str(x[0].dtype), ) def test_get_loc_datetimelike_overlapping(self, arrays): # GH 20636 index = IntervalIndex.from_arrays(*arrays) value = index[0].mid + Timedelta("12 hours") result = index.get_loc(value) expected = slice(0, 2, None) assert result == expected interval = Interval(index[0].left, index[0].right) result = index.get_loc(interval) expected = 0 assert result == expected @pytest.mark.parametrize( "values", [ date_range("2018-01-04", periods=4, freq="-1D"), date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"), timedelta_range("3 days", periods=4, freq="-1D"), np.arange(3.0, -1.0, -1.0), np.arange(3, -1, -1), ], ids=lambda x: str(x.dtype), ) def test_get_loc_decreasing(self, values): # GH 25860 index = IntervalIndex.from_arrays(values[1:], values[:-1]) result = index.get_loc(index[0]) expected = 0 assert result == expected @pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)]) def test_get_indexer_length_one(self, item, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer(item) expected = np.array([0] * len(item), dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("size", [1, 5]) def test_get_indexer_length_one_interval(self, size, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer([Interval(0, 5, closed)] * size) expected = np.array([0] * size, dtype="intp") tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ("Cannot index an IntervalIndex of subtype {dtype1} with " "values of dtype {dtype2}") msg = re.escape(msg.format(dtype1=breaks1.dtype, dtype2=breaks2.dtype)) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) with pytest.raises(TypeError, match="unorderable types"): self.index > 0 with pytest.raises(TypeError, match="unorderable types"): self.index <= 0 msg = r"unorderable types: Interval\(\) > int\(\)" with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left" " and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = ("can only append two IntervalIndex objects that are closed " "on the same side") for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected def test_itemsize(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") expected = 16 # 8 * 2 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = IntervalIndex.from_arrays(left, right).itemsize assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00")) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates
def test_linspace_dst_transition(self, start, mid, end): # GH 20976: linspace behavior defined from start/end/periods # accounts for the hour gained/lost during DST transition result = interval_range(start=start, end=end, periods=2) expected = IntervalIndex.from_breaks([start, mid, end]) tm.assert_index_equal(result, expected)
def f(): interval_range(0, Timestamp('20130101'), freq=2)
[["foo", 100], ["bar", 200]], index=pd.Float64Index([1.23, 2.34]), ) st._arrow_table(df) "## Int64Index" df = pd.DataFrame( [["foo", 100], ["bar", 200]], index=[1, 2], ) st._arrow_table(df) "## IntervalIndex" df = pd.DataFrame( [["foo", 100], ["bar", 200]], index=pd.interval_range(start=0, end=2), ) st._arrow_table(df) "## MultiIndex" df = pd.DataFrame( [["foo", 100], ["bar", 200]], index=[["a", "b"], [1, 2]], ) st._arrow_table(df) "## PeriodIndex" df = pd.DataFrame( [["foo", 100], ["bar", 200]], index=pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3]), )
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("estResNumber", help="estimation result number", type=int) parser.add_argument("--latentToPlot", help="trial to plot", type=int, default=0) parser.add_argument("--neuronToPlot", help="neuron to plot", type=int, default=0) parser.add_argument("--trialToPlot", help="trial to plot", type=int, default=0) parser.add_argument("--dtCIF", help="neuron to plot", type=float, default=1.0) parser.add_argument("--ksTestGamma", help="gamma value for KS test", type=int, default=10) parser.add_argument("--nTestPoints", help="number of test points where to plot latents", type=int, default=2000) parser.add_argument("--location", help="location to analyze", type=int, default=0) parser.add_argument("--trials_indices", help="trials indices to analyze", default="[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]") parser.add_argument("--from_time", help="starting spike analysis time", type=float, default=750.0) parser.add_argument("--to_time", help="ending spike analysis time", type=float, default=2500.0) parser.add_argument("--min_nSpikes_perNeuron_perTrial", help="min number of spikes per neuron per trial", type=int, default=1) parser.add_argument("--data_filename", help="data filename", default="~/dev/research/gatsby-swc/datasets/george20040123_hnlds.mat") args = parser.parse_args() estResNumber = args.estResNumber latentToPlot = args.latentToPlot neuronToPlot = args.neuronToPlot trialToPlot = args.trialToPlot dtCIF = args.dtCIF ksTestGamma = args.ksTestGamma nTestPoints = args.nTestPoints location = args.location trials_indices = [int(str) for str in args.trials_indices[1:-1].split(",")] from_time = args.from_time to_time = args.to_time min_nSpikes_perNeuron_perTrial = args.min_nSpikes_perNeuron_perTrial data_filename = args.data_filename estimResMetaDataFilename = "results/{:08d}_estimation_metaData.ini".format(estResNumber) modelSaveFilename = "results/{:08d}_estimatedModel.pickle".format(estResNumber) lowerBoundHistVsIterNoFigFilenamePattern = "figures/{:08d}_lowerBoundHistVSIterNo.{{:s}}".format(estResNumber) lowerBoundHistVsElapsedTimeFigFilenamePattern = "figures/{:08d}_lowerBoundHistVsElapsedTime.{{:s}}".format(estResNumber) latentsFigFilenamePattern = "figures/{:08d}_estimatedLatent_latent{:03d}.{{:s}}".format(estResNumber, latentToPlot) embeddingsFigFilenamePattern = "figures/{:08d}_estimatedEmbedding_neuron{:d}.{{:s}}".format(estResNumber, neuronToPlot) embeddingParamsFigFilenamePattern = "figures/{:08d}_estimatedEmbeddingParams.{{:s}}".format(estResNumber) CIFFigFilenamePattern = "figures/{:08d}_CIF_trial{:03d}_neuron{:03d}.{{:s}}".format(estResNumber, trialToPlot, neuronToPlot) ksTestTimeRescalingNumericalCorrectionFigFilename = "figures/{:08d}_ksTestTimeRescaling_numericalCorrection_trial{:03d}_neuron{:03d}.png".format(estResNumber, trialToPlot, neuronToPlot) rocFigFilename = "figures/{:08d}_rocAnalysis_trial{:03d}_neuron{:03d}.png".format(estResNumber, trialToPlot, neuronToPlot) kernelsParamsFigFilenamePattern = "figures/{:08d}_estimatedKernelsParams.{{:s}}".format(estResNumber) spikes_times, neurons_indices = \ shenoyUtils.getSpikesTimes(data_filename=data_filename, trials_indices=trials_indices, location=location, from_time=from_time, to_time=to_time, min_nSpikes_perNeuron_perTrial= min_nSpikes_perNeuron_perTrial) estimResConfig = configparser.ConfigParser() estimResConfig.read(estimResMetaDataFilename) nLatents = int(estimResConfig["data_params"]["nLatents"]) from_time = float(estimResConfig["data_params"]["from_time"]) to_time = float(estimResConfig["data_params"]["to_time"]) trials = [float(str) for str in estimResConfig["data_params"]["trials_indices"][1:-1].split(",")] nTrials = len(trials) trial_times = torch.arange(from_time, to_time, dtCIF) trial_times_numpy = trial_times.detach().numpy() with open(modelSaveFilename, "rb") as f: estResults = pickle.load(f) lowerBoundHist = estResults["lowerBoundHist"] elapsedTimeHist = estResults["elapsedTimeHist"] model = estResults["model"] neurons_indices = estResults["neurons_indices"] neuronToPlot_index = torch.nonzero(torch.tensor(neurons_indices)==neuronToPlot) neurons_indices_str = "".join(str(i)+" " for i in neurons_indices) if len(neuronToPlot_index)==0: raise ValueError("Neuron {:d} is not valid. Valid neurons are ".format(neuronToPlot) + neurons_indices_str) # plot lower bound history fig = plot.svGPFA.plotUtilsPlotly.getPlotLowerBoundHist(lowerBoundHist=lowerBoundHist) fig.write_image(lowerBoundHistVsIterNoFigFilenamePattern.format("png")) fig.write_html(lowerBoundHistVsIterNoFigFilenamePattern.format("html")) fig = plot.svGPFA.plotUtilsPlotly.getPlotLowerBoundHist(elapsedTimeHist=elapsedTimeHist, lowerBoundHist=lowerBoundHist) fig.write_image(lowerBoundHistVsElapsedTimeFigFilenamePattern.format("png")) fig.write_html(lowerBoundHistVsElapsedTimeFigFilenamePattern.format("html")) # plot estimated latent across trials testMuK, testVarK = model.predictLatents(times=trial_times) indPointsLocs = model.getIndPointsLocs() fig = plot.svGPFA.plotUtilsPlotly.getPlotLatentAcrossTrials(times=trial_times, latentsMeans=testMuK, latentsSTDs=torch.sqrt(testVarK), indPointsLocs=indPointsLocs, latentToPlot=latentToPlot, xlabel="Time (msec)") fig.write_image(latentsFigFilenamePattern.format("png")) fig.write_html(latentsFigFilenamePattern.format("html")) # plot embedding embeddingMeans, embeddingVars = model.predictEmbedding(times=trial_times) embeddingMeans = embeddingMeans.detach().numpy() embeddingVars = embeddingVars.detach().numpy() title = "Neuron {:d}".format(neuronToPlot) fig = plot.svGPFA.plotUtilsPlotly.getPlotEmbeddingAcrossTrials(times=trial_times_numpy, embeddingsMeans=embeddingMeans[:,:,neuronToPlot], embeddingsSTDs=np.sqrt(embeddingVars[:,:,neuronToPlot]), title=title) fig.write_image(embeddingsFigFilenamePattern.format("png")) fig.write_html(embeddingsFigFilenamePattern.format("html")) # calculate expected CIF values (for KS test and CIF plots) with torch.no_grad(): ePosCIFValues = model.computeExpectedPosteriorCIFs(times=trial_times) spikesTimesKS = spikes_times[trialToPlot][neuronToPlot] cifValuesKS = ePosCIFValues[trialToPlot][neuronToPlot] title = "Trial {:d}, Neuron {:d} ({:d} spikes)".format(trialToPlot, neuronToPlot, len(spikesTimesKS)) # CIF fig = plot.svGPFA.plotUtilsPlotly.getPlotCIF(times=trial_times, values=ePosCIFValues[trialToPlot][neuronToPlot], title=title) fig.write_image(CIFFigFilenamePattern.format("png")) fig.write_html(CIFFigFilenamePattern.format("html")) # plot KS test time rescaling (numerical correction) diffECDFsX, diffECDFsY, estECDFx, estECDFy, simECDFx, simECDFy, cb = stats.pointProcess.tests.KSTestTimeRescalingNumericalCorrection(spikesTimes=spikesTimesKS, cifTimes=trial_times, cifValues=cifValuesKS, gamma=ksTestGamma) plot.svGPFA.plotUtils.plotResKSTestTimeRescalingNumericalCorrection(diffECDFsX=diffECDFsX, diffECDFsY=diffECDFsY, estECDFx=estECDFx, estECDFy=estECDFy, simECDFx=simECDFx, simECDFy=simECDFy, cb=cb, figFilename=ksTestTimeRescalingNumericalCorrectionFigFilename, title=title) plt.close("all") # ROC predictive analysis pk = cifValuesKS*dtCIF bins = pd.interval_range(start=int(min(trial_times)), end=int(max(trial_times)), periods=len(pk)) cutRes, _ = pd.cut(spikesTimesKS, bins=bins, retbins=True) Y = torch.from_numpy(cutRes.value_counts().values) fpr, tpr, thresholds = sklearn.metrics.roc_curve(Y, pk, pos_label=1) roc_auc = sklearn.metrics.auc(fpr, tpr) plot.svGPFA.plotUtils.plotResROCAnalysis(fpr=fpr, tpr=tpr, auc=roc_auc, title=title, figFilename=rocFigFilename) plt.close("all") # plot embedding parameters estimatedC, estimatedD = model.getSVEmbeddingParams() fig = plot.svGPFA.plotUtilsPlotly.getPlotEmbeddingParams(C=estimatedC.numpy(), d=estimatedD.numpy()) fig.write_image(embeddingParamsFigFilenamePattern.format("png")) fig.write_html(embeddingParamsFigFilenamePattern.format("html")) # plot kernel parameters kernelsParams = model.getKernelsParams() kernelsTypes = [type(kernel).__name__ for kernel in model.getKernels()] fig = plot.svGPFA.plotUtilsPlotly.getPlotKernelsParams( kernelsTypes=kernelsTypes, kernelsParams=kernelsParams) fig.write_image(kernelsParamsFigFilenamePattern.format("png")) fig.write_html(kernelsParamsFigFilenamePattern.format("html"))
def test_interval_array_equal(kwargs): arr = interval_range(**kwargs).values assert_interval_array_equal(arr, arr)
def testDataSerialize(self): for type_, compress in itertools.product( (None, ) + tuple(dataserializer.SerialType.__members__.values()), (None, ) + tuple(dataserializer.CompressType.__members__.values())): array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.loads( dataserializer.dumps(array, serial_type=type_, compress=compress))) array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps(array, serial_type=type_, compress=compress)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal( array, dataserializer.loads( dataserializer.dumps(array, serial_type=type_, compress=compress))) array = np.float64(0.2345) assert_array_equal( array, dataserializer.loads( dataserializer.dumps(array, serial_type=type_, compress=compress))) # test non-serializable object if pyarrow: non_serial = type('non_serial', (object, ), dict(nbytes=10)) with self.assertRaises(SerializationFailed): dataserializer.dumps(non_serial()) # test structured arrays. rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'), ('c', '<U8')]) array = np.ones((100, ), dtype=rec_dtype) array_loaded = dataserializer.loads(dataserializer.dumps(array)) self.assertEqual(array.dtype, array_loaded.dtype) assert_array_equal(array, array_loaded) fn = os.path.join(tempfile.gettempdir(), f'test_dump_file_{id(self)}.bin') try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.CompressType.LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.CompressType.GZIP) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) # test sparse if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps(mat, compress=dataserializer.CompressType.LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps( mat, compress=dataserializer.CompressType.GZIP)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)), shape=(2, )) des_vector = dataserializer.loads(dataserializer.dumps(vector)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps(vector, compress=dataserializer.CompressType.LZ4)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps( vector, compress=dataserializer.CompressType.GZIP)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) # test groupby df1 = pd.DataFrame({ 'a': [3, 4, 5, 3, 5, 4, 1, 2, 3], 'b': [1, 3, 4, 5, 6, 5, 4, 4, 4], 'c': list('aabaaddce') }) grouped = wrapped_groupby(df1, 'b') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1, 'b').c restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1, 'b') getattr(grouped, 'indices') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1.b, lambda x: x % 2) restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1.b, lambda x: x % 2) getattr(grouped, 'indices') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) # test categorical s = np.random.RandomState(0).random(10) cat = pd.cut(s, [0.3, 0.5, 0.8]) self.assertIsInstance(cat, pd.Categorical) des_cat = dataserializer.loads(dataserializer.dumps(cat)) self.assertEqual(len(cat), len(des_cat)) for c, dc in zip(cat, des_cat): np.testing.assert_equal(c, dc) # test IntervalIndex s = pd.interval_range(10, 100, 3) dest_s = dataserializer.loads((dataserializer.dumps(s))) pd.testing.assert_index_equal(s, dest_s) # test complex s = complex(10 + 5j) dest_s = dataserializer.loads((dataserializer.dumps(s))) self.assertIs(type(s), type(dest_s)) self.assertEqual(s, dest_s) s = np.complex64(10 + 5j) dest_s = dataserializer.loads((dataserializer.dumps(s))) self.assertIs(type(s), type(dest_s)) self.assertEqual(s, dest_s) # test ArrowArray df = pd.DataFrame({ 'a': ['s1', 's2', 's3'], 'b': [['s1', 's2'], ['s3'], ['s4', 's5']] }) df['a'] = df['a'].astype(ArrowStringDtype()) df['b'] = df['b'].astype(ArrowListDtype(str)) dest_df = dataserializer.loads(dataserializer.dumps(df)) self.assertIs(type(df), type(dest_df)) pd.testing.assert_frame_equal(df, dest_df) # test DataFrame with SparseDtype s = pd.Series([1, 2, np.nan, np.nan, 3]).astype(pd.SparseDtype(np.dtype(np.float64), np.nan)) dest_s = dataserializer.loads((dataserializer.dumps(s))) pd.testing.assert_series_equal(s, dest_s) df = pd.DataFrame({'s': s}) dest_df = dataserializer.loads((dataserializer.dumps(df))) pd.testing.assert_frame_equal(df, dest_df)
def test_construction(self): result = interval_range(0, 5, name='foo', closed='both') expected = IntervalIndex.from_breaks(np.arange(0, 5), name='foo', closed='both') tm.assert_index_equal(result, expected)
percentage_half_more = 100 - percentile_rank_half_less ## 7. Finding Percentiles with pandas ## wnba = pd.read_csv('wnba.csv') age_upper_quartile = wnba["Age"].describe()[6] age_middle_quartile = wnba["Age"].describe()[5] age_95th_percentile = wnba["Age"].describe(percentiles=[.95])[5] question1 = True question2 = False question3 = True ## 8. Grouped Frequency Distribution Tables ## wnba = pd.read_csv('wnba.csv') grouped_freq_table = wnba["PTS"].value_counts( bins=10, normalize=True).sort_index(ascending=False) * 100 ## 10. Readability for Grouped Frequency Tables ## wnba = pd.read_csv('wnba.csv') interval = pd.interval_range(start=0, end=600, freq=60) gr_freq_table_10 = pd.Series([0 for _ in range(10)], index=interval) for row in wnba["PTS"]: for i in interval: if row in i: gr_freq_table_10.loc[i] += 1 break
class TestIntervalIndex: index = IntervalIndex.from_arrays([0, 1], [1, 2]) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.zeros(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.ones(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(np.array(index), copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = "can only insert Interval objects and NA into an IntervalIndex" with pytest.raises(ValueError, match=msg): data.insert(1, "foo") # invalid closed msg = "inserted item must be closed on the same side as the index" for closed in {"left", "right", "both", "neither"} - {item.closed}: with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ( f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with " f"values of dtype {breaks2.dtype}") msg = re.escape(msg) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_contains_dunder(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed="right") in index assert Interval(0, 2, closed="right") not in index assert Interval(0, 0.5, closed="right") not in index assert Interval(3, 5, closed="right") not in index assert Interval(-1, 0, closed="left") not in index assert Interval(0, 1, closed="left") not in index assert Interval(0, 1, closed="both") not in index def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) msg = ("not supported between instances of 'int' and " "'pandas._libs.interval.Interval'") with pytest.raises(TypeError, match=msg): self.index > 0 with pytest.raises(TypeError, match=msg): self.index <= 0 with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left " "and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = "Intervals must all be closed on the same side" for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = f"invalid option for 'closed': {bad_closed}" with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00")) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates @pytest.mark.parametrize("key", [[5], (2, 3)]) def test_get_value_non_scalar_errors(self, key): # GH 31117 idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) s = pd.Series(range(len(idx)), index=idx) msg = str(key) with pytest.raises(InvalidIndexError, match=msg): with tm.assert_produces_warning(FutureWarning): idx.get_value(s, key)
def f(): interval_range(periods=2)
(the prefix "uni-" means "only one"). When a distribution has more than two modes, we say that the distribution is multimodal (the prefix "multi-" means many). We can also have cases when there is no mode at all. conside the no of bedroom in houses:- [1, 1, 2, 2, 3, 3, 4, 4] Each unique value occurs twice in the distribution above, so there's no value (or values) that occurs more often than others. For this reason, this distribution doesn't have a mode. Contextually, we could say that there's no typical house on the market with respect to the number of bedrooms. The workaround is to organize the continuous variable in a grouped frequency table, and select for the mode the midpoint of the class interval (the bin) with the highest frequency. This method has its limitations, but it generally gives reasonable answers ''' # frequency table intervals = pd.interval_range(start=0, end=800000, freq=100000) gr_freq_table = pd.Series([0, 0, 0, 0, 0, 0, 0, 0], index=intervals) for value in houses['SalePrice']: for interval in intervals: if value in interval: gr_freq_table.loc[interval] += 1 break print(gr_freq_table) mode = 150000 mean = houses['SalePrice'].mean() median = houses['SalePrice'].median() ''' When we plot a histogram or a kernel density plot to visualize the shape of a distribution, the mode will always be the peak of the distribution
labels = model_config_dict["mappings"]["labels"] print(labels) if body_cam is None: raise RuntimeError("Error initializing body camera") decimate = 20 max_dist = 4000.0 height = 400.0 width = 640.0 cx = width / decimate / 2 cy = height / decimate / 2 fx = 1.4 # values found by measuring known sized objects at known distances fy = 2.05 x_bins = pd.interval_range(start=-2000, end=2000, periods=40) y_bins = pd.interval_range(start=0, end=800, periods=8) while True: # main loop until 'q' is pressed nnet_packets, data_packets = body_cam.get_available_nnet_and_data_packets() for nnet_packet in nnet_packets: detections = list(nnet_packet.getDetectedObjects()) for detection in detections: if detection.label == 5: # we're looking for a bottle... print('Bottle is ' + '{:.2f}'.format(detection.depth_z) + 'm away.') for packet in data_packets:
def f(): interval_range(0, 10, freq=Timedelta('1day'))
def test_subtype_integer_errors(self): # int64 -> uint64 fails with negative values index = interval_range(-10, 10) dtype = IntervalDtype('uint64') with pytest.raises(ValueError): index.astype(dtype)
def create_data(): """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict( reg2=MultiIndex.from_tuples( tuple( zip( *[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] ) ), names=["first", "second"], ) ) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series( np.arange(10).astype(np.int64), index=date_range("20130101", periods=10) ), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"] ), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({"A": series["float"], "B": series["float"] + 1}), int=DataFrame({"A": series["int"], "B": series["int"] + 1}), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( {"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple( zip( *[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ] ) ), names=["first", "second"], ), ), dup=DataFrame( np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"] ), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame( { "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), } ), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
this_dir = os.path.dirname(os.path.realpath(__file__)) config = toml.load(os.path.join(this_dir, 'config.toml')) u.set_full_paths(config, this_dir) csv_loc = config['file_locations']['data'] df: pd.DataFrame = pd.read_csv(csv_loc) # type: ignore df.drop('should_delete', axis=1, inplace=True) df = df.convert_dtypes() df.date = pd.to_datetime(df.date) pd.set_option('display.max_rows', df.shape[0] + 1) pd.set_option('display.max_columns', 175) date_bins = pd.interval_range(start=pd.Timestamp('2020-07-01'), periods=10, freq='MS') df['date_range'] = pd.cut(df.date, date_bins) df['muscle_weight'] = df.morning_weight * df.muscle_mass_percentage / 100 df['body_fat_weight'] = df.morning_weight * df.body_fat_percentage / 100 df['other_weight'] = df.morning_weight - df.muscle_weight - df.body_fat_weight weights = Enumerable(df.drop( 'morning_weight', axis=1).columns).where(lambda c: re.match('.*_weight', c, re.I)).to_list() # exps = Enumerable(df.columns).where(lambda c: re.match('.*_exp',c,re.I)).to_list() mdf: pd.DataFrame = pd.melt(df, id_vars=['date'], value_vars=weights, value_name='lbs', var_name='weight_category') # type: ignore mdf.dropna(inplace=True)
c c.categories c.value_counts() pd.cut(np.random.randn(100), bins=c.categories).value_counts() #%% #%% Generating ranges of intervals """ If we need intervals on a regular frequency, we can use the interval_range() function to create an IntervalIndex using various combinations of start, end, and periods. The default frequency for interval_range is a 1 for numeric intervals, and calendar day for datetime-like intervals: """ pd.interval_range(start=0, end=5) pd.interval_range(start=pd.Timestamp('2020-05-04'), periods=4) pd.interval_range(end=pd.Timedelta('3 days'), periods=3) """ Specifying start, end, and periods will generate a range of evenly spaced intervals from start to end inclusively, with periods number of elements in the resulting IntervalIndex: """ pd.interval_range(start=0, end=6, periods=4) pd.interval_range(pd.Timestamp('2018-01-01'), pd.Timestamp('2018-02-28'), periods=3) """ Additionally, the closed parameter can be used to specify which side(s) the intervals are closed on.
tm.assert_frame_equal(result, expected, check_exact=True) def test_func_duplicates_raises(): # GH28426 msg = "Function names" df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) with pytest.raises(SpecificationError, match=msg): df.groupby("A").agg(["min", "min"]) @pytest.mark.parametrize( "index", [ pd.CategoricalIndex(list("abc")), pd.interval_range(0, 3), pd.period_range("2020", periods=3, freq="D"), pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), ], ) def test_agg_index_has_complex_internals(index): # GH 31223 df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) result = df.groupby("group").agg({"value": Series.nunique}) expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group") tm.assert_frame_equal(result, expected) def test_agg_split_block(): # https://github.com/pandas-dev/pandas/issues/31522 df = pd.DataFrame(
def recording(row_begining,col_begining,df): s1=pd.DataFrame(df.iloc[row_begining:row_begining+51,[col_begining,col_begining+1]]) s1.columns=['score','number'] s2=pd.DataFrame(df.iloc[row_begining:row_begining+50,[col_begining+7,col_begining+8]]) s2.columns=['score','number'] return pd.concat([s1,s2],axis=0,ignore_index=True) AllScores={} subjects=['Chinese','English','MathA','MathB','Chemistry','Physics','Biology','History','Geography','Civics'] read_excel_rows=np.arange(3,3+55*len(subjects),55) for name_num in range(len(file_name)): read_score=pd.read_excel(file_name[name_num]) Scores_temp=pd.DataFrame(index=pd.IntervalIndex(pd.interval_range(0.,101.,101),closed='left')[::-1]) for i in range(len(subjects)): Scores_temp[subjects[i]]=recording(read_excel_rows[i],0,read_score).number.values Scores_temp=Scores_temp.astype(int) AllScores[file_num[name_num]]=Scores_temp del read_excel_rows,read_score,file_name,Scores_temp,recording '---------------------------- Important DataFrames ----------------------------' AllCum={} for key in AllScores.keys(): summation=AllScores[key].sum(axis=0) AllCum[key]=(AllScores[key]/summation).cumsum(axis=0)
def binning2index(binning): if binning is None: return pandas.CategoricalIndex(["all"]) elif isinstance(binning, IntegerBinning): if (binning.loc_underflow == BinLocation.nonexistent and binning.loc_overflow == BinLocation.nonexistent): return pandas.RangeIndex(binning.min, binning.max + 1) else: return binning2index(binning.toCategoryBinning()) elif isinstance(binning, RegularBinning): if binning.overflow is None or ( binning.overflow.loc_underflow == BinLocation.nonexistent and binning.overflow.loc_overflow == BinLocation.nonexistent and binning.overflow.loc_nanflow == BinLocation.nonexistent): return pandas.interval_range( binning.interval.low, binning.interval.high, binning.num, closed=("left" if binning.interval.low_inclusive else "right"), ) elif (binning.overflow is None or binning.overflow.loc_nanflow == BinLocation.nonexistent): return binning2index(binning.toEdgesBinning()) else: return binning2index(binning.toCategoryBinning()) elif isinstance(binning, HexagonalBinning): raise NotImplementedError elif isinstance(binning, EdgesBinning): if (binning.overflow is None or binning.overflow.loc_nanflow == BinLocation.nonexistent): if binning.overflow is None or (binning.overflow.loc_underflow == BinLocation.nonexistent and binning.overflow.loc_overflow == BinLocation.nonexistent): return pandas.IntervalIndex.from_breaks( binning.edges, closed=("left" if binning.low_inclusive else "right")) elif (binning.overflow is not None and binning.overflow.loc_underflow.value <= BinLocation.nonexistent.value and binning.overflow.loc_overflow.value >= BinLocation.nonexistent.value): edges = numpy.empty(binning._binshape()[0] + 1, dtype=numpy.float64) shift = int( binning.overflow.loc_underflow != BinLocation.nonexistent) edges[shift:shift + len(binning.edges)] = binning.edges if binning.overflow.loc_underflow != BinLocation.nonexistent: edges[0] = -numpy.inf if binning.overflow.loc_overflow != BinLocation.nonexistent: edges[-1] = numpy.inf return pandas.IntervalIndex.from_breaks( edges, closed=("left" if binning.low_inclusive else "right")) else: return binning2index(binning.toIrregularBinning()) else: return binning2index(binning.toCategoryBinning()) elif isinstance(binning, IrregularBinning): if ((binning.overflow is None or binning.overflow.loc_nanflow == BinLocation.nonexistent) and len(binning.intervals) != 0 and binning.intervals[0].low_inclusive != binning.intervals[0].high_inclusive and all( x.low_inclusive == binning.intervals[0].low_inclusive and x.high_inclusive == binning.intervals[0].high_inclusive for x in binning.intervals)): left = numpy.empty(binning._binshape(), dtype=numpy.float64) right = numpy.empty(binning._binshape(), dtype=numpy.float64) flows = ([] if binning.overflow is None else [ (binning.overflow.loc_underflow, -numpy.inf), (binning.overflow.loc_overflow, numpy.inf), ]) low = numpy.inf high = -numpy.inf for interval in binning.intervals: if interval.low <= low: low = interval.low if interval.high >= high: high = interval.high i = 0 for loc, val in BinLocation._belows(flows): if val == -numpy.inf: left[i], right[i] = val, low if val == numpy.inf: left[i], right[i] = high, val i += 1 for interval in binning.intervals: left[i] = interval.low right[i] = interval.high i += 1 for loc, val in BinLocation._aboves(flows): if val == -numpy.inf: left[i], right[i] = val, low if val == numpy.inf: left[i], right[i] = high, val i += 1 return pandas.IntervalIndex.from_arrays( left, right, closed=("left" if binning.intervals[0].low_inclusive else "right"), ) else: return binning2index(binning.toCategoryBinning()) elif isinstance(binning, CategoryBinning): categories = [] flows = [(binning.loc_overflow, )] for (loc, ) in BinLocation._belows(flows): categories.append("(other)") categories.extend(binning.categories) for (loc, ) in BinLocation._aboves(flows): categories.append("(other)") return pandas.CategoricalIndex(categories) elif isinstance(binning, SparseRegularBinning): return binning2index(binning.toIrregularBinning()) elif isinstance(binning, FractionBinning): return binning2index(binning.toCategoryBinning()) elif isinstance(binning, PredicateBinning): return binning2index(binning.toCategoryBinning()) elif isinstance(binning, VariationBinning): return binning2index(binning.toCategoryBinning()) else: raise AssertionError(type(binning))
def _get(self, ep, symbol, start_date, end_date, retry, retry_wait, freq='6H'): dates = [None] if start_date: if not end_date: end_date = pd.Timestamp.utcnow() dates = pd.interval_range(API._timestamp(start_date), API._timestamp(end_date), freq=freq).tolist() if len(dates) == 0: dates.append( pd.Interval(left=API._timestamp(start_date), right=API._timestamp(end_date))) elif dates[-1].right < API._timestamp(end_date): dates.append( pd.Interval(dates[-1].right, API._timestamp(end_date))) @request_retry(self.ID, retry, retry_wait) def helper(start, start_date, end_date): if start_date and end_date: endpoint = f'/api/v1/{ep}?symbol={symbol}&count={API_MAX}&reverse=false&start={start}&startTime={start_date}&endTime={end_date}' else: endpoint = f'/api/v1/{ep}?symbol={symbol}&reverse=true' header = {} if self.key_id and self.key_secret: header = self._generate_signature("GET", endpoint) header['Accept'] = 'application/json' return requests.get('{}{}'.format(self.api, endpoint), headers=header) for interval in dates: start = 0 if interval is not None: end = interval.right end -= pd.Timedelta(nanoseconds=1) start_date = str(interval.left).replace(" ", "T") + "Z" end_date = str(end).replace(" ", "T") + "Z" while True: r = helper(start, start_date, end_date) if r.status_code in {502, 504}: LOG.warning("%s: %d for URL %s - %s", self.ID, r.status_code, r.url, r.text) sleep(retry_wait) continue elif r.status_code == 429: sleep(API_REFRESH) continue elif r.status_code != 200: self._handle_error(r, LOG) else: sleep(RATE_LIMIT_SLEEP) limit = int(r.headers['X-RateLimit-Remaining']) data = r.json() yield data if len(data) != API_MAX: break if limit < 1: sleep(API_REFRESH) start += len(data)
class TestIntervalIndex(Base): _holder = IntervalIndex def setup_method(self, method): self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan, (1, 2)]) self.indices = dict(intervalIndex=tm.makeIntervalIndex(10)) def create_index(self, closed='right'): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed='right'): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays(np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( 'breaks', [[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(['20170101', '20170202', '20170303', '20170404']), pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])]) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert not index.hasnans result = index.isna() expected = np.repeat(False, len(index)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.repeat(True, len(index)) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same='same') tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same='same') # by-definition make a copy result = IntervalIndex(index._ndarray_values, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same='copy') tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same='copy') def test_equals(self, closed): expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) assert expected.equals(expected) assert expected.equals(expected.copy()) assert not expected.equals(expected.astype(object)) assert not expected.equals(np.array(expected)) assert not expected.equals(list(expected)) assert not expected.equals([1, 2]) assert not expected.equals(np.array([1, 2])) assert not expected.equals(pd.date_range('20130101', periods=2)) expected_name1 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name='foo') expected_name2 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name='bar') assert expected.equals(expected_name1) assert expected_name1.equals(expected_name2) for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: expected_other_closed = IntervalIndex.from_breaks( np.arange(5), closed=other_closed) assert not expected.equals(expected_other_closed) @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) def test_where(self, closed, klass): idx = self.create_index(closed=closed) cond = [True] * len(idx) expected = idx result = expected.where(klass(cond)) tm.assert_index_equal(result, expected) cond = [False] + [True] * len(idx[1:]) expected = IntervalIndex([np.nan] + idx[1:].tolist()) result = idx.where(klass(cond)) tm.assert_index_equal(result, expected) def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('data', [ interval_range(0, periods=10, closed='neither'), interval_range(1.7, periods=8, freq=2.5, closed='both'), interval_range(Timestamp('20170101'), periods=12, closed='left'), interval_range(Timedelta('1 day'), periods=6, closed='right') ]) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = 'can only insert Interval objects and NA into an IntervalIndex' with tm.assert_raises_regex(ValueError, msg): data.insert(1, 'foo') # invalid closed msg = 'inserted item must be closed on the same side as the index' for closed in {'left', 'right', 'both', 'neither'} - {item.closed}: with tm.assert_raises_regex(ValueError, msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_take(self, closed): index = self.create_index(closed=closed) result = index.take(range(10)) tm.assert_index_equal(result, index) result = index.take([0, 0, 1]) expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) tm.assert_index_equal(result, expected) def test_unique(self, closed): # unique non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_unique # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique # duplicate idx = IntervalIndex.from_tuples([(0, 1), (0, 1), (2, 3)], closed=closed) assert not idx.is_unique # empty idx = IntervalIndex([], closed=closed) assert idx.is_unique def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing @pytest.mark.skip(reason='not a valid repr as we use interval notation') def test_repr(self): i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right') expected = ("IntervalIndex(left=[0, 1]," "\n right=[1, 2]," "\n closed='right'," "\n dtype='interval[int64]')") assert repr(i) == expected i = IntervalIndex.from_tuples( (Timestamp('20130101'), Timestamp('20130102')), (Timestamp('20130102'), Timestamp('20130103')), closed='right') expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02']," "\n right=['2013-01-02', '2013-01-03']," "\n closed='right'," "\n dtype='interval[datetime64[ns]]')") assert repr(i) == expected @pytest.mark.skip(reason='not a valid repr as we use interval notation') def test_repr_max_seq_item_setting(self): super(TestIntervalIndex, self).test_repr_max_seq_item_setting() @pytest.mark.skip(reason='not a valid repr as we use interval notation') def test_repr_roundtrip(self): super(TestIntervalIndex, self).test_repr_roundtrip() # TODO: check this behavior is consistent with test_interval_new.py def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0., ), (1., ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0., 1), (1., 2.), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1., np.nan), (2., np.nan), closed=closed) tm.assert_index_equal(result, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_loc_value(self): pytest.raises(KeyError, self.index.get_loc, 0) assert self.index.get_loc(0.5) == 0 assert self.index.get_loc(1) == 0 assert self.index.get_loc(1.5) == 1 assert self.index.get_loc(2) == 1 pytest.raises(KeyError, self.index.get_loc, -1) pytest.raises(KeyError, self.index.get_loc, 3) idx = IntervalIndex.from_tuples([(0, 2), (1, 3)]) assert idx.get_loc(0.5) == 0 assert idx.get_loc(1) == 0 tm.assert_numpy_array_equal(idx.get_loc(1.5), np.array([0, 1], dtype='int64')) tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)), np.array([0, 1], dtype='int64')) assert idx.get_loc(3) == 1 pytest.raises(KeyError, idx.get_loc, 3.5) idx = IntervalIndex.from_arrays([0, 2], [1, 3]) pytest.raises(KeyError, idx.get_loc, 1.5) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') assert index.slice_locs() == (0, 2) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(0, 0.5) == (0, 1) assert index.slice_locs(start=1) == (0, 2) assert index.slice_locs(start=1.2) == (1, 2) assert index.slice_locs(end=1) == (0, 1) assert index.slice_locs(end=1.1) == (0, 2) assert index.slice_locs(end=1.0) == (0, 1) assert index.slice_locs(-1, -1) == (0, 0) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(1, 1) == (1, 1) assert index.slice_locs(1, 2) == (1, 2) index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed='both') assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(1, 2) == (0, 2) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_int64(self): self.slice_locs_cases([0, 1, 2]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_float64(self): self.slice_locs_cases([0.0, 1.0, 2.0]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def slice_locs_decreasing_cases(self, tuples): index = IntervalIndex.from_tuples(tuples) assert index.slice_locs(1.5, 0.5) == (1, 3) assert index.slice_locs(2, 0) == (1, 3) assert index.slice_locs(2, 1) == (1, 3) assert index.slice_locs(3, 1.1) == (0, 3) assert index.slice_locs(3, 3) == (0, 2) assert index.slice_locs(3.5, 3.3) == (0, 1) assert index.slice_locs(1, -3) == (2, 3) slice_locs = index.slice_locs(-1, -1) assert slice_locs[0] == slice_locs[1] # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_decreasing_int64(self): self.slice_locs_cases([(2, 4), (1, 3), (0, 2)]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_decreasing_float64(self): self.slice_locs_cases([(2., 4.), (1., 3.), (0., 2.)]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_fails(self): index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)]) with pytest.raises(KeyError): index.slice_locs(1, 2) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_loc_interval(self): assert self.index.get_loc(Interval(0, 1)) == 0 assert self.index.get_loc(Interval(0, 0.5)) == 0 assert self.index.get_loc(Interval(0, 1, 'left')) == 0 pytest.raises(KeyError, self.index.get_loc, Interval(2, 3)) pytest.raises(KeyError, self.index.get_loc, Interval(-1, 0, 'left')) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize('item', [3, Interval(1, 4)]) def test_get_loc_length_one(self, item, closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_loc(item) assert result == 0 # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer(self): actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(self.index) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) index = IntervalIndex.from_breaks([0, 1, 2], closed='left') actual = index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(index[:1]) expected = np.array([0], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(index) expected = np.array([-1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer_subintervals(self): # TODO: is this right? # return indexers for wholly contained subintervals target = IntervalIndex.from_breaks(np.linspace(0, 2, 5)) actual = self.index.get_indexer(target) expected = np.array([0, 0, 1, 1], dtype='p') tm.assert_numpy_array_equal(actual, expected) target = IntervalIndex.from_breaks([0, 0.67, 1.33, 2]) actual = self.index.get_indexer(target) expected = np.array([0, 0, 1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(target[[0, -1]]) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) target = IntervalIndex.from_breaks([0, 0.33, 0.67, 1], closed='left') actual = self.index.get_indexer(target) expected = np.array([0, 0, 0], dtype='intp') tm.assert_numpy_array_equal(actual, expected) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize( 'item', [[3], np.arange(1, 5), [Interval(1, 4)], interval_range(1, 4)]) def test_get_indexer_length_one(self, item, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer(item) expected = np.array([0] * len(item), dtype='intp') tm.assert_numpy_array_equal(result, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_contains(self): # Only endpoints are valid. i = IntervalIndex.from_arrays([0, 1], [1, 2]) # Invalid assert 0 not in i assert 1 not in i assert 2 not in i # Valid assert Interval(0, 1) in i assert Interval(0, 2) in i assert Interval(0, 0.5) in i assert Interval(3, 5) not in i assert Interval(-1, 0, closed='left') not in i # To be removed, replaced by test_interval_new.py (see #16316, #16386) def testcontains(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) assert i.contains(0.1) assert i.contains(0.5) assert i.contains(1) assert i.contains(Interval(0, 1)) assert i.contains(Interval(0, 2)) # these overlaps completely assert i.contains(Interval(0, 3)) assert i.contains(Interval(1, 3)) assert not i.contains(20) assert not i.contains(-20) def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) # TODO: check this behavior is consistent with test_interval_new.py def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_union(self, closed): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(13), closed=closed) result = index.union(other) tm.assert_index_equal(result, expected) result = other.union(index) tm.assert_index_equal(result, expected) tm.assert_index_equal(index.union(index), index) tm.assert_index_equal(index.union(index[:1]), index) # GH 19101: empty result, same dtype index = IntervalIndex(np.array([], dtype='int64'), closed=closed) result = index.union(index) tm.assert_index_equal(result, index) # GH 19101: empty result, different dtypes other = IntervalIndex(np.array([], dtype='float64'), closed=closed) result = index.union(other) tm.assert_index_equal(result, index) def test_intersection(self, closed): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(5, 11), closed=closed) result = index.intersection(other) tm.assert_index_equal(result, expected) result = other.intersection(index) tm.assert_index_equal(result, expected) tm.assert_index_equal(index.intersection(index), index) # GH 19101: empty result, same dtype other = IntervalIndex.from_breaks(range(300, 314), closed=closed) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) result = index.intersection(other) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes breaks = np.arange(300, 314, dtype='float64') other = IntervalIndex.from_breaks(breaks, closed=closed) result = index.intersection(other) tm.assert_index_equal(result, expected) def test_difference(self, closed): index = self.create_index(closed=closed) tm.assert_index_equal(index.difference(index[:1]), index[1:]) # GH 19101: empty result, same dtype result = index.difference(index) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes other = IntervalIndex.from_arrays(index.left.astype('float64'), index.right, closed=closed) result = index.difference(other) tm.assert_index_equal(result, expected) def test_symmetric_difference(self, closed): index = self.create_index(closed=closed) result = index[1:].symmetric_difference(index[:-1]) expected = IntervalIndex([index[0], index[-1]]) tm.assert_index_equal(result, expected) # GH 19101: empty result, same dtype result = index.symmetric_difference(index) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes other = IntervalIndex.from_arrays(index.left.astype('float64'), index.right, closed=closed) result = index.symmetric_difference(other) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( 'op_name', ['union', 'intersection', 'difference', 'symmetric_difference']) def test_set_operation_errors(self, closed, op_name): index = self.create_index(closed=closed) set_op = getattr(index, op_name) # non-IntervalIndex msg = ('the other index needs to be an IntervalIndex too, but ' 'was type Int64Index') with tm.assert_raises_regex(TypeError, msg): set_op(Index([1, 2, 3])) # mixed closed msg = ('can only do set operations between two IntervalIndex objects ' 'that are closed on the same side') for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: other = self.create_index(closed=other_closed) with tm.assert_raises_regex(ValueError, msg): set_op(other) # GH 19016: incompatible dtypes other = interval_range(Timestamp('20180101'), periods=9, closed=closed) msg = ('can only do {op} between two IntervalIndex objects that have ' 'compatible dtypes').format(op=op_name) with tm.assert_raises_regex(TypeError, msg): set_op(other) def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {'right', 'left', 'both', 'neither'}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], 'left') tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) with tm.assert_raises_regex(TypeError, 'unorderable types'): self.index > 0 with tm.assert_raises_regex(TypeError, 'unorderable types'): self.index <= 0 with pytest.raises(TypeError): self.index > np.arange(2) with pytest.raises(ValueError): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) with pytest.raises(ValueError): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tz', [None, 'US/Eastern']) def test_datetime(self, tz): start = Timestamp('2000-01-01', tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp('2000-01-01T12:00', tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp('2000-01-01', tz=tz) not in index assert Timestamp('2000-01-01T12', tz=tz) not in index assert Timestamp('2000-01-02', tz=tz) not in index iv_true = Interval(Timestamp('2000-01-01T08', tz=tz), Timestamp('2000-01-01T18', tz=tz)) iv_false = Interval(Timestamp('1999-12-31', tz=tz), Timestamp('2000-01-01', tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp('2000-01-01', tz=tz)) assert index.contains(Timestamp('2000-01-01T12', tz=tz)) assert index.contains(Timestamp('2000-01-02', tz=tz)) assert index.contains(iv_true) assert not index.contains(iv_false) # test get_indexer start = Timestamp('1999-12-31T12:00', tz=tz) target = date_range(start=start, periods=7, freq='12H') actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype='intp') tm.assert_numpy_array_equal(actual, expected) start = Timestamp('2000-01-08T18:00', tz=tz) target = date_range(start=start, periods=7, freq='6H') actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = ('can only append two IntervalIndex objects that are closed ' 'on the same side') for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with tm.assert_raises_regex(ValueError, msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == 'both': idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize('tuples', [ lzip(range(10), range(1, 11)), lzip(date_range('20170101', periods=10), date_range('20170101', periods=10)), lzip(timedelta_range('0 days', periods=10), timedelta_range('1 day', periods=10)) ]) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tuples', [ lzip(range(10), range(1, 11)) + [np.nan], lzip(date_range('20170101', periods=10), date_range('20170101', periods=10)) + [np.nan], lzip(timedelta_range('0 days', periods=10), timedelta_range('1 day', periods=10)) + [np.nan] ]) @pytest.mark.parametrize('na_tuple', [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype='i8') right = np.arange(1, 5, dtype='i8') result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected def test_itemsize(self): # GH 19209 left = np.arange(0, 4, dtype='i8') right = np.arange(1, 5, dtype='i8') result = IntervalIndex.from_arrays(left, right).itemsize expected = 16 # 8 * 2 assert result == expected @pytest.mark.parametrize('new_closed', ['left', 'right', 'both', 'neither']) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('bad_closed', ['foo', 10, 'LEFT', True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with tm.assert_raises_regex(ValueError, msg): index.set_closed(bad_closed)
def test_early_truncation(self, start, end, freq, expected_endpoint): # index truncates early if freq causes end to be skipped result = interval_range(start=start, end=end, freq=freq) result_endpoint = result.right[-1] assert result_endpoint == expected_endpoint
def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with tm.assert_raises_regex(ValueError, msg): index.set_closed(bad_closed)
def f(): interval_range()
def test_errors(self): # not enough params msg = ('Of the four parameters: start, end, periods, and freq, ' 'exactly three must be specified') with tm.assert_raises_regex(ValueError, msg): interval_range(start=0) with tm.assert_raises_regex(ValueError, msg): interval_range(end=5) with tm.assert_raises_regex(ValueError, msg): interval_range(periods=2) with tm.assert_raises_regex(ValueError, msg): interval_range() # too many params with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=5, periods=6, freq=1.5) # mixed units msg = 'start, end, freq need to be type compatible' with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, end=Timestamp('20130101'), freq=2) with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, end=Timedelta('1 day'), freq=2) with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, end=10, freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timestamp('20130101'), end=10, freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timestamp('20130101'), end=Timedelta('1 day'), freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timestamp('20130101'), end=Timestamp('20130110'), freq=2) with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timedelta('1 day'), end=10, freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timedelta('1 day'), end=Timestamp('20130110'), freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timedelta('1 day'), end=Timedelta('10 days'), freq=2) # invalid periods msg = 'periods must be a number, got foo' with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, periods='foo') # invalid start msg = 'start must be numeric or datetime-like, got foo' with tm.assert_raises_regex(ValueError, msg): interval_range(start='foo', periods=10) # invalid end msg = r'end must be numeric or datetime-like, got \(0, 1\]' with tm.assert_raises_regex(ValueError, msg): interval_range(end=Interval(0, 1), periods=10) # invalid freq for datetime-like msg = 'freq must be numeric or convertible to DateOffset, got foo' with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=10, freq='foo') with tm.assert_raises_regex(ValueError, msg): interval_range(start=Timestamp('20130101'), periods=10, freq='foo') with tm.assert_raises_regex(ValueError, msg): interval_range(end=Timedelta('1 day'), periods=10, freq='foo') # mixed tz start = Timestamp('2017-01-01', tz='US/Eastern') end = Timestamp('2017-01-07', tz='US/Pacific') msg = 'Start and end cannot both be tz-aware with different timezones' with tm.assert_raises_regex(TypeError, msg): interval_range(start=start, end=end)
def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected)
def main(argv): parser = argparse.ArgumentParser() parser.add_argument("simResNumber", help="Simulation result number", type=int) parser.add_argument("--latentToPlot", help="Latent to plot", type=int, default=0) parser.add_argument("--trialToPlot", help="Trial to plot", type=int, default=0) parser.add_argument("--neuronToPlot", help="Neuron to plot", type=int, default=0) parser.add_argument("--nResamplesKSTest", help="Number of resamples for KS test", type=int, default=10) args = parser.parse_args() simResNumber = args.simResNumber latentToPlot = args.latentToPlot trialToPlot = args.trialToPlot neuronToPlot = args.neuronToPlot nResamplesKSTest = args.nResamplesKSTest simResConfigFilename = "results/{:08d}_simulation_metaData.ini".format( simResNumber) simResConfig = configparser.ConfigParser() simResConfig.read(simResConfigFilename) simInitConfigFilename = simResConfig["simulation_params"][ "simInitConfigFilename"] simInitConfig = configparser.ConfigParser() simInitConfig.read(simInitConfigFilename) nLatents = int(simInitConfig["control_variables"]["nLatents"]) nNeurons = int(simInitConfig["control_variables"]["nNeurons"]) trialsLengths = [ float(str) for str in simInitConfig["control_variables"] ["trialsLengths"][1:-1].split(",") ] dtCIF = float(simInitConfig["control_variables"]["dtCIF"]) nTrials = len(trialsLengths) T = torch.tensor(trialsLengths).max().item() CFilename = simInitConfig["embedding_params"]["C_filename"] dFilename = simInitConfig["embedding_params"]["d_filename"] C, d = utils.svGPFA.configUtils.getLinearEmbeddingParams( CFilename=CFilename, dFilename=dFilename) simResFilename = "results/{:08d}_simRes.pickle".format(simResNumber) latentFigFilenamePattern = \ "figures/{:08d}_simulation_latent_trial{:03d}_latent{:03d}.{{:s}}".format(simResNumber, trialToPlot, latentToPlot) embeddingFigFilenamePattern = \ "figures/{:08d}_simulation_embedding_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot) cifFigFilenamePattern = \ "figures/{:08d}_simulation_cif_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot) spikesTimesFigFilenamePattern = \ "figures/{:08d}_simulation_spikesTimes_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot) spikesRatesFigFilenamePattern = \ "figures/{:08d}_simulation_spikesRates_neuron{:03d}.{{:s}}".format(simResNumber, neuronToPlot) ksTestTimeRescalingFigFilenamePattern = \ "figures/{:08d}_simulation_ksTestTimeRescaling_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot) rocFigFilenamePattern = \ "figures/{:08d}_simulation_rocAnalysis_trial{:03d}_neuron{:03d}.{{:s}}".format(simResNumber, trialToPlot, neuronToPlot) with open(simResFilename, "rb") as f: simRes = pickle.load(f) times = simRes["times"] latentsSamples = simRes["latents"] latentsMeans = simRes["latentsMeans"] latentsSTDs = simRes["latentsSTDs"] cifValues = simRes["cifValues"] spikes = simRes["spikes"] pio.renderers.default = "browser" timesLatentToPlot = times[trialToPlot] latentSamplesToPlot = latentsSamples[trialToPlot][latentToPlot, :] latentMeansToPlot = latentsMeans[trialToPlot][latentToPlot, :] latentSTDsToPlot = latentsSTDs[trialToPlot][latentToPlot, :] title = "Trial {:d}, Latent {:d}".format(trialToPlot, latentToPlot) fig = plot.svGPFA.plotUtilsPlotly.getSimulatedLatentPlot( times=timesLatentToPlot, latentSamples=latentSamplesToPlot, latentMeans=latentMeansToPlot, latentSTDs=latentSTDsToPlot, title=title) fig.write_image(latentFigFilenamePattern.format("png")) fig.write_html(latentFigFilenamePattern.format("html")) fig.show() # embeddingSamples[r], embeddingMeans[r], embeddingSTDs \in nNeurons x nSamples embeddingSamples = [ torch.matmul(C, latentsSamples[r]) + d for r in range(nTrials) ] embeddingMeans = [ torch.matmul(C, latentsMeans[r]) + d for r in range(nTrials) ] embeddingSTDs = [torch.matmul(C, latentsSTDs[r]) for r in range(nTrials)] timesEmbeddingToPlot = times[trialToPlot] embeddingSamplesToPlot = embeddingSamples[trialToPlot][neuronToPlot, :] embeddingMeansToPlot = embeddingMeans[trialToPlot][neuronToPlot, :] embeddingSTDsToPlot = embeddingSTDs[trialToPlot][neuronToPlot, :] title = "Trial {:d}, Neuron {:d}".format(trialToPlot, neuronToPlot) fig = plot.svGPFA.plotUtilsPlotly.getSimulatedEmbeddingPlot( times=timesEmbeddingToPlot, samples=embeddingSamplesToPlot, means=embeddingMeansToPlot, stds=embeddingSTDsToPlot, title=title) fig.write_image(embeddingFigFilenamePattern.format("png")) fig.write_html(embeddingFigFilenamePattern.format("html")) fig.show() timesCIFToPlot = times[trialToPlot] valuesCIFToPlot = cifValues[trialToPlot][neuronToPlot] title = "Trial {:d}, Neuron {:d}".format(trialToPlot, neuronToPlot) fig = plot.svGPFA.plotUtilsPlotly.getPlotCIF(times=timesCIFToPlot, values=valuesCIFToPlot, title=title) fig.write_image(cifFigFilenamePattern.format("png")) fig.write_html(cifFigFilenamePattern.format("html")) fig.show() spikesToPlot = spikes[trialToPlot] title = "Trial {:d}".format(trialToPlot) fig = plot.svGPFA.plotUtilsPlotly.getSimulatedSpikesTimesPlotOneTrial( spikesTimes=spikesToPlot, title=title) fig.write_image(spikesTimesFigFilenamePattern.format("png")) fig.write_html(spikesTimesFigFilenamePattern.format("html")) fig.show() spikesRates = utils.svGPFA.miscUtils.computeSpikeRates(trialsTimes=times, spikesTimes=spikes) fig = plot.svGPFA.plotUtilsPlotly.getPlotSpikeRatesForAllTrialsAndAllNeurons( spikesRates=spikesRates) fig.write_image(spikesRatesFigFilenamePattern.format("png")) fig.write_html(spikesRatesFigFilenamePattern.format("html")) fig.show() oneTrialCIFTimes = torch.arange(0, T, dtCIF) cifTimes = torch.unsqueeze(torch.ger(torch.ones(nTrials), oneTrialCIFTimes), dim=2) cifTimesKS = cifTimes[trialToPlot, :, 0] cifValuesKS = cifValues[trialToPlot][neuronToPlot] spikesTimesKS = spikes[trialToPlot][neuronToPlot] diffECDFsX, diffECDFsY, estECDFx, estECDFy, simECDFx, simECDFy, cb = stats.pointProcess.tests.KSTestTimeRescalingNumericalCorrection( spikesTimes=spikesTimesKS, cifTimes=cifTimesKS, cifValues=cifValuesKS, gamma=nResamplesKSTest) title = "Trial {:d}, Neuron {:d} ({:d} spikes)".format( trialToPlot, neuronToPlot, len(spikesTimesKS)) fig = plot.svGPFA.plotUtils.plotResKSTestTimeRescalingNumericalCorrection( diffECDFsX=diffECDFsX, diffECDFsY=diffECDFsY, estECDFx=estECDFx, estECDFy=estECDFy, simECDFx=simECDFx, simECDFy=simECDFy, cb=cb, title=title) plt.savefig(fname=ksTestTimeRescalingFigFilenamePattern.format("png")) plt.figure() pk = cifValuesKS * dtCIF bins = pd.interval_range(start=0, end=T, periods=len(pk)) # start binning spikes using pandas cutRes, _ = pd.cut(spikesTimesKS, bins=bins, retbins=True) Y = torch.from_numpy(cutRes.value_counts().values) fpr, tpr, thresholds = sklearn.metrics.roc_curve(Y, pk, pos_label=1) roc_auc = sklearn.metrics.auc(fpr, tpr) title = "Trial {:d}, Neuron {:d}".format(trialToPlot, neuronToPlot) fig = plot.svGPFA.plotUtils.plotResROCAnalysis(fpr=fpr, tpr=tpr, auc=roc_auc, title=title) plt.savefig(fname=rocFigFilenamePattern.format("png")) plt.show() pdb.set_trace()
def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = f"invalid option for 'closed': {bad_closed}" with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed)
def test_errors(self): # not enough params msg = ('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') with tm.assert_raises_regex(ValueError, msg): interval_range(start=0) with tm.assert_raises_regex(ValueError, msg): interval_range(end=5) with tm.assert_raises_regex(ValueError, msg): interval_range(periods=2) with tm.assert_raises_regex(ValueError, msg): interval_range() # too many params with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=5, periods=6) # mixed units msg = 'start, end, freq need to be type compatible' with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, end=Timestamp('20130101'), freq=2) with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, end=Timedelta('1 day'), freq=2) with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, end=10, freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timestamp('20130101'), end=10, freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timestamp('20130101'), end=Timedelta('1 day'), freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timestamp('20130101'), end=Timestamp('20130110'), freq=2) with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timedelta('1 day'), end=10, freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timedelta('1 day'), end=Timestamp('20130110'), freq='D') with tm.assert_raises_regex(TypeError, msg): interval_range(start=Timedelta('1 day'), end=Timedelta('10 days'), freq=2) # invalid periods msg = 'periods must be a number, got foo' with tm.assert_raises_regex(TypeError, msg): interval_range(start=0, periods='foo') # invalid start msg = 'start must be numeric or datetime-like, got foo' with tm.assert_raises_regex(ValueError, msg): interval_range(start='foo', periods=10) # invalid end msg = r'end must be numeric or datetime-like, got \(0, 1\]' with tm.assert_raises_regex(ValueError, msg): interval_range(end=Interval(0, 1), periods=10) # invalid freq for datetime-like msg = 'freq must be numeric or convertible to DateOffset, got foo' with tm.assert_raises_regex(ValueError, msg): interval_range(start=0, end=10, freq='foo') with tm.assert_raises_regex(ValueError, msg): interval_range(start=Timestamp('20130101'), periods=10, freq='foo') with tm.assert_raises_regex(ValueError, msg): interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
def generate(self): columns = list(self.dataframe.columns) # put std to the end columns.sort(key=lambda x: 'std' in x) self.dataframe = self.dataframe[columns] # create intervals for coloring # ---- interval 1 for all but std closed_on = 'right' lower_bound = 0.3 upper_bound = 0.9 mid_periods = 15 intervals1 = pd.IntervalIndex.from_breaks([0, lower_bound], closed=closed_on) intervals2 = pd.interval_range(lower_bound, upper_bound, periods=mid_periods, closed=closed_on) intervals3 = pd.IntervalIndex.from_breaks([upper_bound, 1], closed=closed_on) interval_list_1 = intervals1.append([intervals2, intervals3]) # ---- interval 2 for std closed_on = 'right' lower_bound = 0.05 upper_bound = 0.1 mid_periods = 15 intervals1 = pd.IntervalIndex.from_breaks([0, lower_bound], closed=closed_on) intervals2 = pd.interval_range(lower_bound, upper_bound, periods=mid_periods, closed=closed_on) intervals3 = pd.IntervalIndex.from_breaks([upper_bound, 1], closed=closed_on) interval_list_2 = intervals1.append([intervals2, intervals3]) # interval matrix, exclude first column (Class) interval_dataframe = self.dataframe.iloc[:, 1:] for column in self.dataframe.select_dtypes(exclude=['object']): if 'std' in column: interval_list = interval_list_2 else: interval_list = interval_list_1 interval_dataframe[column] = self.dataframe[column].map( lambda value: get_interval_index_of_value(interval_list, value)) # colors color_low = Color('#e60000') # red color_lower_bound = Color('#ffa31a') # orange color_upper_bound = Color('#248f24') # green color_mid = Color('#ffcc00') # yellow color_high = Color('#006622') # dark green colors = list(color_lower_bound.range_to(color_mid, (mid_periods // 2) if mid_periods % 2 == 0 else ( mid_periods // 2 + 1))) colors.extend( list(color_mid.range_to(color_upper_bound, mid_periods // 2 + 1))) # +1 because first one has to be deleted # get unique hex values of colors unique_colors = [] for color in colors: if color.hex in unique_colors: continue else: unique_colors.append(color.hex) colors = [color_low.hex] colors.extend(unique_colors) colors.append(color_high.hex) # fill color matrix color_matrix = [['#808080' for _ in range(len(interval_dataframe))]] # Class column, gray # reversed colors for low=good and high=bad colors_reversed = colors.copy() colors_reversed.reverse() # fill color matrix for column in interval_dataframe.columns: colors = colors_reversed if 'std' in column else colors color_matrix.append([colors[value] for value in interval_dataframe[column]]) # round values, except first column (Class) values = self.dataframe values.iloc[:, 1:] = values.iloc[:, 1:].applymap(lambda x: round(x, self.precision)) # create figure fig = go.Figure( data=[go.Table( header=dict( values=values.columns, line_color='white', fill_color='white', align='center', font=dict(color='black', size=9) ), cells=dict( values=values.T, fill_color=color_matrix, line_color=color_matrix, align='center', font=dict(color='white', size=8) ) )] ) # set layout fig.update_layout( width=len(self.dataframe.columns) * 100 ) return fig
def test_construction_from_timestamp(self, closed): # combinations of start/end/periods without freq start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06') breaks = date_range(start=start, end=end) expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) result = interval_range(start=start, end=end, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=start, periods=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=end, periods=5, name='foo', closed=closed) tm.assert_index_equal(result, expected) # combinations of start/end/periods with fixed freq freq = '2D' start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07') breaks = date_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) result = interval_range(start=start, end=end, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=start, periods=3, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=end, periods=3, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) # output truncates early if freq causes end to be skipped. end = Timestamp('2017-01-08') result = interval_range(start=start, end=end, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) # combinations of start/end/periods with non-fixed freq freq = 'M' start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31') breaks = date_range(start=start, end=end, freq=freq) expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed) result = interval_range(start=start, end=end, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(start=start, periods=11, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) result = interval_range(end=end, periods=11, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected) # output truncates early if freq causes end to be skipped. end = Timestamp('2018-01-15') result = interval_range(start=start, end=end, freq=freq, name='foo', closed=closed) tm.assert_index_equal(result, expected)
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']])), names=[u'first', u'second'])) series = dict(float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict(float=DataFrame({u'A': series[u'float'], u'B': series[u'float'] + 1}), int=DataFrame({u'A': series[u'int'], u'B': series[u'int'] + 1}), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame({u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64)}), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET')}, index=range(5)), dt_mixed2_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC')}, index=range(5)) ) with catch_warnings(record=True): mixed_dup_panel = Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'int']}) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1}), dup=Panel( np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = {'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1)} return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed)
class TestSeriesInterpolateData: def test_interpolate(self, datetime_series, string_series): ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) ts_copy = ts.copy() ts_copy[5:10] = np.NaN linear_interp = ts_copy.interpolate(method="linear") tm.assert_series_equal(linear_interp, ts) ord_ts = Series([d.toordinal() for d in datetime_series.index], index=datetime_series.index).astype(float) ord_ts_copy = ord_ts.copy() ord_ts_copy[5:10] = np.NaN time_interp = ord_ts_copy.interpolate(method="time") tm.assert_series_equal(time_interp, ord_ts) def test_interpolate_time_raises_for_non_timeseries(self): # When method='time' is used on a non-TimeSeries that contains a null # value, a ValueError should be raised. non_ts = Series([0, 1, 2, np.NaN]) msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex" with pytest.raises(ValueError, match=msg): non_ts.interpolate(method="time") @td.skip_if_no_scipy def test_interpolate_pchip(self): ser = Series(np.sort(np.random.uniform(size=100))) # interpolate at new_index new_index = ser.index.union( Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])).astype(float) interp_s = ser.reindex(new_index).interpolate(method="pchip") # does not blow up, GH5977 interp_s[49:51] @td.skip_if_no_scipy def test_interpolate_akima(self): ser = Series([10, 11, 12, 13]) expected = Series( [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), ) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(float) interp_s = ser.reindex(new_index).interpolate(method="akima") tm.assert_series_equal(interp_s[1:3], expected) @td.skip_if_no_scipy def test_interpolate_piecewise_polynomial(self): ser = Series([10, 11, 12, 13]) expected = Series( [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), ) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(float) interp_s = ser.reindex(new_index).interpolate( method="piecewise_polynomial") tm.assert_series_equal(interp_s[1:3], expected) @td.skip_if_no_scipy def test_interpolate_from_derivatives(self): ser = Series([10, 11, 12, 13]) expected = Series( [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), ) # interpolate at new_index new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(float) interp_s = ser.reindex(new_index).interpolate( method="from_derivatives") tm.assert_series_equal(interp_s[1:3], expected) @pytest.mark.parametrize( "kwargs", [ {}, pytest.param({ "method": "polynomial", "order": 1 }, marks=td.skip_if_no_scipy), ], ) def test_interpolate_corners(self, kwargs): s = Series([np.nan, np.nan]) tm.assert_series_equal(s.interpolate(**kwargs), s) s = Series([], dtype=object).interpolate() tm.assert_series_equal(s.interpolate(**kwargs), s) def test_interpolate_index_values(self): s = Series(np.nan, index=np.sort(np.random.rand(30))) s[::3] = np.random.randn(10) vals = s.index.values.astype(float) result = s.interpolate(method="index") expected = s.copy() bad = isna(expected.values) good = ~bad expected = Series(np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]) tm.assert_series_equal(result[bad], expected) # 'values' is synonymous with 'index' for the method kwarg other_result = s.interpolate(method="values") tm.assert_series_equal(other_result, result) tm.assert_series_equal(other_result[bad], expected) def test_interpolate_non_ts(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) msg = ( "time-weighted interpolation only works on Series or DataFrames " "with a DatetimeIndex") with pytest.raises(ValueError, match=msg): s.interpolate(method="time") @pytest.mark.parametrize( "kwargs", [ {}, pytest.param({ "method": "polynomial", "order": 1 }, marks=td.skip_if_no_scipy), ], ) def test_nan_interpolate(self, kwargs): s = Series([0, 1, np.nan, 3]) result = s.interpolate(**kwargs) expected = Series([0.0, 1.0, 2.0, 3.0]) tm.assert_series_equal(result, expected) def test_nan_irregular_index(self): s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) result = s.interpolate() expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) tm.assert_series_equal(result, expected) def test_nan_str_index(self): s = Series([0, 1, 2, np.nan], index=list("abcd")) result = s.interpolate() expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd")) tm.assert_series_equal(result, expected) @td.skip_if_no_scipy def test_interp_quad(self): sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) result = sq.interpolate(method="quadratic") expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4]) tm.assert_series_equal(result, expected) @td.skip_if_no_scipy def test_interp_scipy_basic(self): s = Series([1, 3, np.nan, 12, np.nan, 25]) # slinear expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0]) result = s.interpolate(method="slinear") tm.assert_series_equal(result, expected) result = s.interpolate(method="slinear", downcast="infer") tm.assert_series_equal(result, expected) # nearest expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method="nearest") tm.assert_series_equal(result, expected.astype("float")) result = s.interpolate(method="nearest", downcast="infer") tm.assert_series_equal(result, expected) # zero expected = Series([1, 3, 3, 12, 12, 25]) result = s.interpolate(method="zero") tm.assert_series_equal(result, expected.astype("float")) result = s.interpolate(method="zero", downcast="infer") tm.assert_series_equal(result, expected) # quadratic # GH #15662. expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0]) result = s.interpolate(method="quadratic") tm.assert_series_equal(result, expected) result = s.interpolate(method="quadratic", downcast="infer") tm.assert_series_equal(result, expected) # cubic expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0]) result = s.interpolate(method="cubic") tm.assert_series_equal(result, expected) def test_interp_limit(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) result = s.interpolate(method="linear", limit=2) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("limit", [-1, 0]) def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit): # GH 9217: make sure limit is greater than zero. s = pd.Series([1, 2, np.nan, 4]) method, kwargs = nontemporal_method with pytest.raises(ValueError, match="Limit must be greater than 0"): s.interpolate(limit=limit, method=method, **kwargs) def test_interpolate_invalid_float_limit(self, nontemporal_method): # GH 9217: make sure limit is an integer. s = pd.Series([1, 2, np.nan, 4]) method, kwargs = nontemporal_method limit = 2.0 with pytest.raises(ValueError, match="Limit must be an integer"): s.interpolate(limit=limit, method=method, **kwargs) @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"]) def test_interp_invalid_method(self, invalid_method): s = Series([1, 3, np.nan, 12, np.nan, 25]) msg = f"method must be one of.* Got '{invalid_method}' instead" with pytest.raises(ValueError, match=msg): s.interpolate(method=invalid_method) # When an invalid method and invalid limit (such as -1) are # provided, the error message reflects the invalid method. with pytest.raises(ValueError, match=msg): s.interpolate(method=invalid_method, limit=-1) def test_interp_limit_forward(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) # Provide 'forward' (the default) explicitly here. expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) result = s.interpolate(method="linear", limit=2, limit_direction="forward") tm.assert_series_equal(result, expected) result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD") tm.assert_series_equal(result, expected) def test_interp_unlimited(self): # these test are for issue #16282 default Limit=None is unlimited s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan]) expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) result = s.interpolate(method="linear", limit_direction="both") tm.assert_series_equal(result, expected) expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) result = s.interpolate(method="linear", limit_direction="forward") tm.assert_series_equal(result, expected) expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan]) result = s.interpolate(method="linear", limit_direction="backward") tm.assert_series_equal(result, expected) def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) msg = (r"Invalid limit_direction: expecting one of \['forward', " r"'backward', 'both'\], got 'abc'") with pytest.raises(ValueError, match=msg): s.interpolate(method="linear", limit=2, limit_direction="abc") # raises an error even if no limit is specified. with pytest.raises(ValueError, match=msg): s.interpolate(method="linear", limit_direction="abc") # limit_area introduced GH #16284 def test_interp_limit_area(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series( [np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) expected = Series( [np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="linear", limit_area="inside") tm.assert_series_equal(result, expected) expected = Series( [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan]) result = s.interpolate(method="linear", limit_area="inside", limit=1) tm.assert_series_equal(result, expected) expected = Series( [np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="linear", limit_area="inside", limit_direction="both", limit=1) tm.assert_series_equal(result, expected) expected = Series( [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) result = s.interpolate(method="linear", limit_area="outside") tm.assert_series_equal(result, expected) expected = Series( [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) result = s.interpolate(method="linear", limit_area="outside", limit=1) tm.assert_series_equal(result, expected) expected = Series( [np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) result = s.interpolate(method="linear", limit_area="outside", limit_direction="both", limit=1) tm.assert_series_equal(result, expected) expected = Series( [3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) result = s.interpolate(method="linear", limit_area="outside", limit_direction="backward") tm.assert_series_equal(result, expected) # raises an error even if limit type is wrong. msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" with pytest.raises(ValueError, match=msg): s.interpolate(method="linear", limit_area="abc") def test_interp_limit_direction(self): # These tests are for issue #9218 -- fill NaNs in both directions. s = Series([1, 3, np.nan, np.nan, np.nan, 11]) expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0]) result = s.interpolate(method="linear", limit=2, limit_direction="backward") tm.assert_series_equal(result, expected) expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0]) result = s.interpolate(method="linear", limit=1, limit_direction="both") tm.assert_series_equal(result, expected) # Check that this works on a longer series of nans. s = Series( [1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) expected = Series( [1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) result = s.interpolate(method="linear", limit=2, limit_direction="both") tm.assert_series_equal(result, expected) expected = Series( [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) result = s.interpolate(method="linear", limit=1, limit_direction="both") tm.assert_series_equal(result, expected) def test_interp_limit_to_ends(self): # These test are for issue #10420 -- flow back to beginning. s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan]) result = s.interpolate(method="linear", limit=2, limit_direction="backward") tm.assert_series_equal(result, expected) expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0]) result = s.interpolate(method="linear", limit=2, limit_direction="both") tm.assert_series_equal(result, expected) def test_interp_limit_before_ends(self): # These test are for issue #11115 -- limit ends properly. s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan]) result = s.interpolate(method="linear", limit=1, limit_direction="forward") tm.assert_series_equal(result, expected) expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan]) result = s.interpolate(method="linear", limit=1, limit_direction="backward") tm.assert_series_equal(result, expected) expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan]) result = s.interpolate(method="linear", limit=1, limit_direction="both") tm.assert_series_equal(result, expected) @td.skip_if_no_scipy def test_interp_all_good(self): s = Series([1, 2, 3]) result = s.interpolate(method="polynomial", order=1) tm.assert_series_equal(result, s) # non-scipy result = s.interpolate() tm.assert_series_equal(result, s) @pytest.mark.parametrize( "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)]) def test_interp_multiIndex(self, check_scipy): idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) s = Series([1, 2, np.nan], index=idx) expected = s.copy() expected.loc[2] = 2 result = s.interpolate() tm.assert_series_equal(result, expected) msg = "Only `method=linear` interpolation is supported on MultiIndexes" if check_scipy: with pytest.raises(ValueError, match=msg): s.interpolate(method="polynomial", order=1) @td.skip_if_no_scipy def test_interp_nonmono_raise(self): s = Series([1, np.nan, 3], index=[0, 2, 1]) msg = "krogh interpolation requires that the index be monotonic" with pytest.raises(ValueError, match=msg): s.interpolate(method="krogh") @td.skip_if_no_scipy @pytest.mark.parametrize("method", ["nearest", "pad"]) def test_interp_datetime64(self, method, tz_naive_fixture): df = Series([1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)) result = df.interpolate(method=method) expected = Series( [1.0, 1.0, 3.0], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), ) tm.assert_series_equal(result, expected) def test_interp_pad_datetime64tz_values(self): # GH#27628 missing.interpolate_2d should handle datetimetz values dti = pd.date_range("2015-04-05", periods=3, tz="US/Central") ser = pd.Series(dti) ser[1] = pd.NaT result = ser.interpolate(method="pad") expected = pd.Series(dti) expected[1] = expected[0] tm.assert_series_equal(result, expected) def test_interp_limit_no_nans(self): # GH 7173 s = pd.Series([1.0, 2.0, 3.0]) result = s.interpolate(limit=1) expected = s tm.assert_series_equal(result, expected) @td.skip_if_no_scipy @pytest.mark.parametrize("method", ["polynomial", "spline"]) def test_no_order(self, method): # see GH-10633, GH-24014 s = Series([0, 1, np.nan, 3]) msg = "You must specify the order of the spline or polynomial" with pytest.raises(ValueError, match=msg): s.interpolate(method=method) @td.skip_if_no_scipy @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan]) def test_interpolate_spline_invalid_order(self, order): s = Series([0, 1, np.nan, 3]) msg = "order needs to be specified and greater than 0" with pytest.raises(ValueError, match=msg): s.interpolate(method="spline", order=order) @td.skip_if_no_scipy def test_spline(self): s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) result = s.interpolate(method="spline", order=1) expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) tm.assert_series_equal(result, expected) @td.skip_if_no_scipy def test_spline_extrapolate(self): s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) result3 = s.interpolate(method="spline", order=1, ext=3) expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0]) tm.assert_series_equal(result3, expected3) result1 = s.interpolate(method="spline", order=1, ext=0) expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) tm.assert_series_equal(result1, expected1) @td.skip_if_no_scipy def test_spline_smooth(self): s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) assert (s.interpolate(method="spline", order=3, s=0)[5] != s.interpolate(method="spline", order=3)[5]) @td.skip_if_no_scipy def test_spline_interpolation(self): s = Series(np.arange(10)**2) s[np.random.randint(0, 9, 3)] = np.nan result1 = s.interpolate(method="spline", order=1) expected1 = s.interpolate(method="spline", order=1) tm.assert_series_equal(result1, expected1) def test_interp_timedelta64(self): # GH 6424 df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3])) result = df.interpolate(method="time") expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3])) tm.assert_series_equal(result, expected) # test for non uniform spacing df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4])) result = df.interpolate(method="time") expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4])) tm.assert_series_equal(result, expected) def test_series_interpolate_method_values(self): # GH#1646 rng = date_range("1/1/2000", "1/20/2000", freq="D") ts = Series(np.random.randn(len(rng)), index=rng) ts[::2] = np.nan result = ts.interpolate(method="values") exp = ts.interpolate() tm.assert_series_equal(result, exp) def test_series_interpolate_intraday(self): # #1698 index = pd.date_range("1/1/2012", periods=4, freq="12D") ts = pd.Series([0, 12, 24, 36], index) new_index = index.append(index + pd.DateOffset(days=1)).sort_values() exp = ts.reindex(new_index).interpolate(method="time") index = pd.date_range("1/1/2012", periods=4, freq="12H") ts = pd.Series([0, 12, 24, 36], index) new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() result = ts.reindex(new_index).interpolate(method="time") tm.assert_numpy_array_equal(result.values, exp.values) @pytest.mark.parametrize( "ind", [ ["a", "b", "c", "d"], pd.period_range(start="2019-01-01", periods=4), pd.interval_range(start=0, end=4), ], ) def test_interp_non_timedelta_index(self, interp_methods_ind, ind): # gh 21662 df = pd.DataFrame([0, 1, np.nan, 3], index=ind) method, kwargs = interp_methods_ind if method == "pchip": pytest.importorskip("scipy") if method == "linear": result = df[0].interpolate(**kwargs) expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) tm.assert_series_equal(result, expected) else: expected_error = ( "Index column must be numeric or datetime type when " f"using {method} method other than linear. " "Try setting a numeric or datetime index column before " "interpolating.") with pytest.raises(ValueError, match=expected_error): df[0].interpolate(method=method, **kwargs) def test_interpolate_timedelta_index(self, interp_methods_ind): """ Tests for non numerical index types - object, period, timedelta Note that all methods except time, index, nearest and values are tested here. """ # gh 21662 ind = pd.timedelta_range(start=1, periods=4) df = pd.DataFrame([0, 1, np.nan, 3], index=ind) method, kwargs = interp_methods_ind if method == "pchip": pytest.importorskip("scipy") if method in {"linear", "pchip"}: result = df[0].interpolate(method=method, **kwargs) expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) tm.assert_series_equal(result, expected) else: pytest.skip( "This interpolation method is not supported for Timedelta Index yet." ) @pytest.mark.parametrize( "ascending, expected_values", [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], ) def test_interpolate_unsorted_index(self, ascending, expected_values): # GH 21037 ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) result = ts.sort_index(ascending=ascending).interpolate(method="index") expected = pd.Series(data=expected_values, index=expected_values, dtype=float) tm.assert_series_equal(result, expected)