class IndexAppend(object): goal_time = 0.2 def setup(self): N = 10000 self.range_idx = RangeIndex(0, 100) self.int_idx = self.range_idx.astype(int) self.obj_idx = self.int_idx.astype(str) self.range_idxs = [] self.int_idxs = [] self.object_idxs = [] for i in range(1, N): r_idx = RangeIndex(i * 100, (i + 1) * 100) self.range_idxs.append(r_idx) i_idx = r_idx.astype(int) self.int_idxs.append(i_idx) o_idx = i_idx.astype(str) self.object_idxs.append(o_idx) def time_append_range_list(self): self.range_idx.append(self.range_idxs) def time_append_int_list(self): self.int_idx.append(self.int_idxs) def time_append_obj_list(self): self.obj_idx.append(self.object_idxs)
def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() assert i_copy is not i assert i_copy.identical(i) assert i_copy._range == range(0, 5, 1) assert i_copy.name == 'Foo'
def test_constructor_range(self): self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) self.assertTrue(result.equals(expected)) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) self.assertTrue(result.equals(expected)) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) self.assertTrue(result.equals(expected)) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) self.assertTrue(result.equals(expected)) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) self.assertTrue(result.equals(expected)) self.assertRaises(TypeError, lambda: Index(range(1, 5, 2), dtype='float64'))
def test_constructor_range(self): pytest.raises(TypeError, lambda: RangeIndex(range(1, 5, 2))) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) pytest.raises(TypeError, lambda: Index(range(1, 5, 2), dtype='float64'))
def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() self.assertTrue(i_copy is not i) self.assertTrue(i_copy.identical(i)) self.assertEqual(i_copy._start, 0) self.assertEqual(i_copy._stop, 5) self.assertEqual(i_copy._step, 1) self.assertEqual(i_copy.name, 'Foo')
def test_nbytes(self): # memory savings vs int index i = RangeIndex(0, 1000) self.assertTrue(i.nbytes < i.astype(int).nbytes / 10) # constant memory usage i2 = RangeIndex(0, 10) self.assertEqual(i.nbytes, i2.nbytes)
def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() assert i_copy is not i assert i_copy.identical(i) assert i_copy._start == 0 assert i_copy._stop == 5 assert i_copy._step == 1 assert i_copy.name == 'Foo'
def test_nbytes(self): # memory savings vs int index i = RangeIndex(0, 1000) assert i.nbytes < i.astype(int).nbytes / 10 # constant memory usage i2 = RangeIndex(0, 10) assert i.nbytes == i2.nbytes
def test_view(self): i = RangeIndex(0, name='Foo') i_view = i.view() assert i_view.name == 'Foo' i_view = i.view('i8') tm.assert_numpy_array_equal(i.values, i_view) i_view = i.view(RangeIndex) tm.assert_index_equal(i, i_view)
def test_view(self): super(TestRangeIndex, self).test_view() i = RangeIndex(0, name='Foo') i_view = i.view() self.assertEqual(i_view.name, 'Foo') i_view = i.view('i8') tm.assert_numpy_array_equal(i.values, i_view) i_view = i.view(RangeIndex) tm.assert_index_equal(i, i_view)
def test_view(self, indices): super(TestRangeIndex, self).test_view(indices) i = RangeIndex(0, name='Foo') i_view = i.view() assert i_view.name == 'Foo' i_view = i.view('i8') tm.assert_numpy_array_equal(i.values, i_view) i_view = i.view(RangeIndex) tm.assert_index_equal(i, i_view)
def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = RangeIndex(1, 5) self.assertEqual(index.values.dtype, np.int64) self.assertTrue(index.equals(arr)) # non-int raise Exception self.assertRaises(TypeError, RangeIndex, '1', '10', '1') self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) # invalid passed type self.assertRaises(TypeError, lambda: RangeIndex(1, 5, dtype='float64'))
def test_constructor_same(self): # pass thru w and w/o copy index = RangeIndex(1, 5, 2) result = RangeIndex(index, copy=False) assert result.identical(index) result = RangeIndex(index, copy=True) tm.assert_index_equal(result, index, exact=True) result = RangeIndex(index) tm.assert_index_equal(result, index, exact=True) pytest.raises(TypeError, lambda: RangeIndex(index, dtype='float64'))
def test_delete(self): idx = RangeIndex(5, name='Foo') expected = idx[1:].astype(int) result = idx.delete(0) self.assertTrue(result.equals(expected)) self.assertEqual(result.name, expected.name) expected = idx[:-1].astype(int) result = idx.delete(-1) self.assertTrue(result.equals(expected)) self.assertEqual(result.name, expected.name) with tm.assertRaises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx))
def setup(self): N = 10000 self.range_idx = RangeIndex(0, 100) self.int_idx = self.range_idx.astype(int) self.obj_idx = self.int_idx.astype(str) self.range_idxs = [] self.int_idxs = [] self.object_idxs = [] for i in range(1, N): r_idx = RangeIndex(i * 100, (i + 1) * 100) self.range_idxs.append(r_idx) i_idx = r_idx.astype(int) self.int_idxs.append(i_idx) o_idx = i_idx.astype(str) self.object_idxs.append(o_idx)
def test_delete(self): idx = RangeIndex(5, name='Foo') expected = idx[1:].astype(int) result = idx.delete(0) tm.assert_index_equal(result, expected) assert result.name == expected.name expected = idx[:-1].astype(int) result = idx.delete(-1) tm.assert_index_equal(result, expected) assert result.name == expected.name with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx))
def test_constructor_name(self): # GH12288 orig = RangeIndex(10) orig.name = 'original' copy = RangeIndex(orig) copy.name = 'copy' self.assertTrue(orig.name, 'original') self.assertTrue(copy.name, 'copy') new = Index(copy) self.assertTrue(new.name, 'copy') new.name = 'new' self.assertTrue(orig.name, 'original') self.assertTrue(new.name, 'copy') self.assertTrue(new.name, 'new')
def test_constructor_name(self): # GH12288 orig = RangeIndex(10) orig.name = 'original' copy = RangeIndex(orig) copy.name = 'copy' assert orig.name == 'original' assert copy.name == 'copy' new = Index(copy) assert new.name == 'copy' new.name = 'new' assert orig.name == 'original' assert copy.name == 'copy' assert new.name == 'new'
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): """ Write a DataFrame to the parquet format. Parameters ---------- df : DataFrame path : string File path engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet reader library to use. If 'auto', then the option 'io.parquet.engine' is used. If 'auto', then the first library to be installed is used. compression : str, optional, default 'snappy' compression method, includes {'gzip', 'snappy', 'brotli'} kwargs Additional keyword arguments passed to the engine """ impl = get_engine(engine) if not isinstance(df, DataFrame): raise ValueError("to_parquet only support IO with DataFrames") valid_types = {'string', 'unicode'} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): raise ValueError("parquet does not support serializing {} " "for the index; you can .reset_index()" "to make the index into column(s)".format( type(df.index))) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError("parquet does not support serializing a " "non-default index for the index; you " "can .reset_index() to make the index " "into column(s)") if df.index.name is not None: raise ValueError("parquet does not serialize index meta-data on a " "default index") # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("parquet must have string column names") return impl.write(df, path, compression=compression, **kwargs)
class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10**7, step=3) self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3) def time_max(self): self.idx_inc.max() def time_max_trivial(self): self.idx_dec.max() def time_min(self): self.idx_dec.min() def time_min_trivial(self): self.idx_inc.min()
def to_feather(df, path): """ Write a DataFrame to the feather-format Parameters ---------- df : DataFrame path : string File path """ path = _stringify_path(path) if not isinstance(df, DataFrame): raise ValueError("feather only support IO with DataFrames") feather = _try_import() valid_types = {'string', 'unicode'} # validate index # -------------- # validate that we have only a default index # raise on anything else as we don't serialize the index if not isinstance(df.index, Int64Index): raise ValueError("feather does not support serializing {} " "for the index; you can .reset_index()" "to make the index into column(s)".format( type(df.index))) if not df.index.equals(RangeIndex.from_range(range(len(df)))): raise ValueError("feather does not support serializing a " "non-default index for the index; you " "can .reset_index() to make the index " "into column(s)") if df.index.name is not None: raise ValueError("feather does not serialize index meta-data on a " "default index") # validate columns # ---------------- # must have value column names (strings only) if df.columns.inferred_type not in valid_types: raise ValueError("feather must have string column names") feather.write_dataframe(df, path)
def test_intersection(self): # intersect with Int64Index other = Index(np.arange(1, 6)) result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(self.index) expected = Index(np.sort(np.asarray(np.intersect1d(self.index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = self.index.intersection(other) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected)
def test_max_min(self, start, stop, step): # GH17607 idx = RangeIndex(start, stop, step) expected = idx._int64index.max() result = idx.max() assert result == expected expected = idx._int64index.min() result = idx.min() assert result == expected # empty idx = RangeIndex(start, stop, -step) assert isna(idx.max()) assert isna(idx.min())
def test_constructor_same(self): # pass thru w and w/o copy index = RangeIndex(1, 5, 2) result = RangeIndex(index, copy=False) self.assertTrue(result.identical(index)) result = RangeIndex(index, copy=True) self.assertTrue(result.equals(index)) result = RangeIndex(index) self.assertTrue(result.equals(index)) self.assertRaises(TypeError, lambda: RangeIndex(index, dtype='float64'))
def test_max_min_range(self, start, stop, step): # GH#17607 idx = RangeIndex(start, stop, step) expected = idx._int64index.max() result = idx.max() assert result == expected # skipna should be irrelevant since RangeIndex should never have NAs result2 = idx.max(skipna=False) assert result2 == expected expected = idx._int64index.min() result = idx.min() assert result == expected # skipna should be irrelevant since RangeIndex should never have NAs result2 = idx.min(skipna=False) assert result2 == expected # empty idx = RangeIndex(start, stop, -step) assert isna(idx.max()) assert isna(idx.min())
def create_data(): """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict( reg2=MultiIndex.from_tuples( tuple( zip( *[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ] ) ), names=["first", "second"], ) ) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series( np.arange(10).astype(np.int64), index=date_range("20130101", periods=10) ), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"] ), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({"A": series["float"], "B": series["float"] + 1}), int=DataFrame({"A": series["int"], "B": series["int"] + 1}), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( {"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple( zip( *[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ] ) ), names=["first", "second"], ), ), dup=DataFrame( np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"] ), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame( { "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), } ), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
def test_from_records_sequencelike(self): df = DataFrame( { "A": np.array(np.random.randn(6), dtype=np.float64), "A1": np.array(np.random.randn(6), dtype=np.float64), "B": np.array(np.arange(6), dtype=np.int64), "C": ["foo"] * 6, "D": np.array([True, False] * 3, dtype=bool), "E": np.array(np.random.randn(6), dtype=np.float32), "E1": np.array(np.random.randn(6), dtype=np.float32), "F": np.array(np.arange(6), dtype=np.int32), } ) # this is actually tricky to create the recordlike arrays and # have the dtypes be intact blocks = df._to_dict_of_blocks() tuples = [] columns = [] dtypes = [] for dtype, b in blocks.items(): columns.extend(b.columns) dtypes.extend([(c, np.dtype(dtype).descr[0][1]) for c in b.columns]) for i in range(len(df.index)): tup = [] for _, b in blocks.items(): tup.extend(b.iloc[i].values) tuples.append(tuple(tup)) recarray = np.array(tuples, dtype=dtypes).view(np.recarray) recarray2 = df.to_records() lists = [list(x) for x in tuples] # tuples (lose the dtype info) result = DataFrame.from_records(tuples, columns=columns).reindex( columns=df.columns ) # created recarray and with to_records recarray (have dtype info) result2 = DataFrame.from_records(recarray, columns=columns).reindex( columns=df.columns ) result3 = DataFrame.from_records(recarray2, columns=columns).reindex( columns=df.columns ) # list of tupels (no dtype info) result4 = DataFrame.from_records(lists, columns=columns).reindex( columns=df.columns ) tm.assert_frame_equal(result, df, check_dtype=False) tm.assert_frame_equal(result2, df) tm.assert_frame_equal(result3, df) tm.assert_frame_equal(result4, df, check_dtype=False) # tuples is in the order of the columns result = DataFrame.from_records(tuples) tm.assert_index_equal(result.columns, RangeIndex(8)) # test exclude parameter & we are casting the results here (as we don't # have dtype info to recover) columns_to_test = [columns.index("C"), columns.index("E1")] exclude = list(set(range(8)) - set(columns_to_test)) result = DataFrame.from_records(tuples, exclude=exclude) result.columns = [columns[i] for i in sorted(columns_to_test)] tm.assert_series_equal(result["C"], df["C"]) tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
def create_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = { "timestamp": Timestamp("20130101"), "period": Period("2012", "M") } index = { "int": Index(np.arange(10)), "date": date_range("20130101", periods=10), "period": period_range("2013-01-01", freq="M", periods=10), "float": Index(np.arange(10, dtype=np.float64)), "uint": Index(np.arange(10, dtype=np.uint64)), "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), } index["range"] = RangeIndex(10) index["interval"] = interval_range(0, periods=10) mi = { "reg2": MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], ) } series = { "float": Series(data["A"]), "int": Series(data["B"]), "mixed": Series(data["E"]), "ts": Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), "mi": Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), "cat": Series(Categorical(["foo", "bar", "baz"])), "dt": Series(date_range("20130101", periods=5)), "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), "period": Series([Period("2000Q1")] * 5), } mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = { "float": DataFrame({ "A": series["float"], "B": series["float"] + 1 }), "int": DataFrame({ "A": series["int"], "B": series["int"] + 1 }), "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), "mi": DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), "dup": DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), "cat_and_float": DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), "mixed_dup": mixed_dup_df, "dt_mixed_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), "dt_mixed2_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), } cat = { "int8": Categorical(list("abcdefg")), "int16": Categorical(np.arange(1000)), "int32": Categorical(np.arange(10000)), } timestamp = { "normal": Timestamp("2011-01-01"), "nat": NaT, "tz": Timestamp("2011-01-01", tz="US/Eastern"), } timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return { "series": series, "frame": frame, "index": index, "scalars": scalars, "mi": mi, "sp_series": { "float": _create_sp_series(), "ts": _create_sp_tsseries() }, "sp_frame": { "float": _create_sp_frame() }, "cat": cat, "timestamp": timestamp, "offsets": off, }
def test_intersection(self, sort): # intersect with Int64Index other = Index(np.arange(1, 6)) result = self.index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(self.index, sort=sort) expected = Index(np.sort(np.asarray(np.intersect1d(self.index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = self.index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = self.index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(self.index, sort=sort) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int), sort=sort) result = first.intersection(other, sort=sort).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first, sort=sort).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected)
def test_constructor_invalid_args_wrong_type(self, args): msg = f"Wrong type {type(args)} for value {args}" with pytest.raises(TypeError, match=msg): RangeIndex(args)
def test_numeric_compat2(self): # validate that we are handling the RangeIndex overrides to numeric ops # and returning RangeIndex where possible idx = RangeIndex(0, 10, 2) result = idx * 2 expected = RangeIndex(0, 20, 4) tm.assert_index_equal(result, expected, exact=True) result = idx + 2 expected = RangeIndex(2, 12, 2) tm.assert_index_equal(result, expected, exact=True) result = idx - 2 expected = RangeIndex(-2, 8, 2) tm.assert_index_equal(result, expected, exact=True) result = idx / 2 expected = RangeIndex(0, 5, 1).astype("float64") tm.assert_index_equal(result, expected, exact=True) result = idx / 4 expected = RangeIndex(0, 10, 2) / 4 tm.assert_index_equal(result, expected, exact=True) result = idx // 1 expected = idx tm.assert_index_equal(result, expected, exact=True) # __mul__ result = idx * idx expected = Index(idx.values * idx.values) tm.assert_index_equal(result, expected, exact=True) # __pow__ idx = RangeIndex(0, 1000, 2) result = idx ** 2 expected = idx._int64index ** 2 tm.assert_index_equal(Index(result.values), expected, exact=True) # __floordiv__ cases_exact = [ (RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), (RangeIndex(0, 1000, 1), 2, RangeIndex(0, 1000, 1)._int64index // 2), ( RangeIndex(0, 100, 1), 2.0, RangeIndex(0, 100, 1)._int64index // 2.0, ), (RangeIndex(0), 50, RangeIndex(0)), (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), (RangeIndex(-100, -200, 3), 2, RangeIndex(0)), ] for idx, div, expected in cases_exact: tm.assert_index_equal(idx // div, expected, exact=True)
def makeRangeIndex(k=10, name=None, **kwargs): return RangeIndex(0, k, 1, name=name, **kwargs)
def ichimoku(high, low, close, tenkan=None, kijun=None, senkou=None, offset=None, **kwargs): """Indicator: Ichimoku Kinkō Hyō (Ichimoku)""" high = verify_series(high) low = verify_series(low) close = verify_series(close) tenkan = int(tenkan) if tenkan and tenkan > 0 else 9 kijun = int(kijun) if kijun and kijun > 0 else 26 senkou = int(senkou) if senkou and senkou > 0 else 52 offset = get_offset(offset) # Calculate Result tenkan_sen = midprice(high=high, low=low, length=tenkan) kijun_sen = midprice(high=high, low=low, length=kijun) span_a = 0.5 * (tenkan_sen + kijun_sen) span_b = midprice(high=high, low=low, length=senkou) # Copy Span A and B values before their shift _span_a = span_a[-kijun:].copy() _span_b = span_b[-kijun:].copy() span_a = span_a.shift(kijun) span_b = span_b.shift(kijun) chikou_span = close.shift(-kijun) # Offset if offset != 0: tenkan_sen = tenkan_sen.shift(offset) kijun_sen = kijun_sen.shift(offset) span_a = span_a.shift(offset) span_b = span_b.shift(offset) chikou_span = chikou_span.shift(offset) # Handle fills if 'fillna' in kwargs: span_a.fillna(kwargs['fillna'], inplace=True) span_b.fillna(kwargs['fillna'], inplace=True) chikou_span.fillna(kwargs['fillna'], inplace=True) if 'fill_method' in kwargs: span_a.fillna(method=kwargs['fill_method'], inplace=True) span_b.fillna(method=kwargs['fill_method'], inplace=True) chikou_span.fillna(method=kwargs['fill_method'], inplace=True) # Name and Categorize it span_a.name = f"ISA_{tenkan}" span_b.name = f"ISB_{kijun}" tenkan_sen.name = f"ITS_{tenkan}" kijun_sen.name = f"IKS_{kijun}" chikou_span.name = f"ICS_{kijun}" chikou_span.category = kijun_sen.category = tenkan_sen.category = 'trend' span_b.category = span_a.category = chikou_span # Prepare Ichimoku DataFrame data = {span_a.name: span_a, span_b.name: span_b, tenkan_sen.name: tenkan_sen, kijun_sen.name: kijun_sen, chikou_span.name: chikou_span} ichimokudf = DataFrame(data) ichimokudf.name = f"ICHIMOKU_{tenkan}_{kijun}_{senkou}" ichimokudf.category = 'overlap' # Prepare Span DataFrame last = close.index[-1] if close.index.dtype == 'int64': ext_index = RangeIndex(start=last + 1, stop=last + kijun + 1) spandf = DataFrame(index=ext_index, columns=[span_a.name, span_b.name]) _span_a.index = _span_b.index = ext_index else: df_freq = close.index.value_counts().mode()[0] tdelta = Timedelta(df_freq, unit='d') new_dt = date_range(start=last + tdelta, periods=kijun, freq='B') spandf = DataFrame(index=new_dt, columns=[span_a.name, span_b.name]) _span_a.index = _span_b.index = new_dt spandf[span_a.name] = _span_a spandf[span_b.name] = _span_b spandf.name = f"ICHISPAN_{tenkan}_{kijun}" spandf.category = 'overlap' return ichimokudf, spandf
def test_slice_integer(self): # same as above, but for Integer based indexes # these coerce to a like integer # oob indicates if we are out of bounds # of positional indexing for index, oob in [ (Int64Index(range(5)), False), (RangeIndex(5), False), (Int64Index(range(5)) + 10, True), ]: # s is an in-range index s = Series(range(5), index=index) # getitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: for idxr in [lambda x: x.loc]: result = idxr(s)[l] # these are all label indexing # except getitem which is positional # empty if oob: indexer = slice(0, 0) else: indexer = slice(3, 5) self.check(result, s, indexer, False) # positional indexing msg = ( "cannot do slice indexing " fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " "type float" ) with pytest.raises(TypeError, match=msg): s[l] # getitem out-of-bounds for l in [slice(-6, 6), slice(-6.0, 6.0)]: for idxr in [lambda x: x.loc]: result = idxr(s)[l] # these are all label indexing # except getitem which is positional # empty if oob: indexer = slice(0, 0) else: indexer = slice(-6, 6) self.check(result, s, indexer, False) # positional indexing msg = ( "cannot do slice indexing " fr"on {type(index).__name__} with these indexers \[-6\.0\] of " "type float" ) with pytest.raises(TypeError, match=msg): s[slice(-6.0, 6.0)] # getitem odd floats for l, res1 in [ (slice(2.5, 4), slice(3, 5)), (slice(2, 3.5), slice(2, 4)), (slice(2.5, 3.5), slice(3, 4)), ]: for idxr in [lambda x: x.loc]: result = idxr(s)[l] if oob: res = slice(0, 0) else: res = res1 self.check(result, s, res, False) # positional indexing msg = ( "cannot do slice indexing " fr"on {type(index).__name__} with these indexers \[(2|3)\.5\] of " "type float" ) with pytest.raises(TypeError, match=msg): s[l] # setitem for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: for idxr in [lambda x: x.loc]: sc = s.copy() idxr(sc)[l] = 0 result = idxr(sc)[l].values.ravel() assert (result == 0).all() # positional indexing msg = ( "cannot do slice indexing " fr"on {type(index).__name__} with these indexers \[(3|4)\.0\] of " "type float" ) with pytest.raises(TypeError, match=msg): s[l] = 0
def test_slice_keep_name(self): idx = RangeIndex(1, 2, name="asdf") assert idx.name == idx[1:].name
def test_take_preserve_name(self): index = RangeIndex(1, 5, name="foo") taken = index.take([3, 0, 1]) assert index.name == taken.name
def create_index(self): return RangeIndex(start=0, stop=20, step=2)
def test_get_indexer_pad(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='pad') expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
def test_get_indexer_decreasing(self, stop): # GH 28678 index = RangeIndex(7, stop, -3) result = index.get_indexer(range(9)) expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)
def setup(self): self.idx_inc = RangeIndex(start=0, stop=10**7, step=3) self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
class TestRangeIndex(Numeric): _holder = RangeIndex _compat_props = ['shape', 'ndim', 'size'] def setup_method(self, method): self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'), index_dec=RangeIndex(18, -1, -2, name='bar')) self.setup_indices() def create_index(self): return RangeIndex(5) def test_can_hold_identifiers(self): idx = self.create_index() key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is False def test_too_many_names(self): with pytest.raises(ValueError, match="^Length"): self.index.names = ["roger", "harold"] @pytest.mark.parametrize('name', [None, 'foo']) @pytest.mark.parametrize('args, kwargs, start, stop, step', [((5, ), dict(), 0, 5, 1), ((1, 5), dict(), 1, 5, 1), ((1, 5, 2), dict(), 1, 5, 2), ((0, ), dict(), 0, 0, 1), ((0, 0), dict(), 0, 0, 1), (tuple(), dict(start=0), 0, 0, 1), (tuple(), dict(stop=0), 0, 0, 1)]) def test_constructor(self, args, kwargs, start, stop, step, name): result = RangeIndex(*args, name=name, **kwargs) expected = Index(np.arange(start, stop, step, dtype=np.int64), name=name) assert isinstance(result, RangeIndex) assert result._start == start assert result._stop == stop assert result._step == step assert result.name is name tm.assert_index_equal(result, expected) def test_constructor_invalid_args(self): msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" with pytest.raises(TypeError, match=msg): RangeIndex() with pytest.raises(TypeError, match=msg): RangeIndex(name='Foo') # invalid args for i in [ Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10), np.array([1]), [1] ]: with pytest.raises(TypeError): RangeIndex(i) # we don't allow on a bare Index msg = (r'Index\(\.\.\.\) must be called with a collection of some ' r'kind, 0 was passed') with pytest.raises(TypeError, match=msg): Index(0, 1000) def test_constructor_same(self): # pass thru w and w/o copy index = RangeIndex(1, 5, 2) result = RangeIndex(index, copy=False) assert result.identical(index) result = RangeIndex(index, copy=True) tm.assert_index_equal(result, index, exact=True) result = RangeIndex(index) tm.assert_index_equal(result, index, exact=True) with pytest.raises(TypeError): RangeIndex(index, dtype='float64') def test_constructor_range(self): with pytest.raises(TypeError): RangeIndex(range(1, 5, 2)) result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) with pytest.raises(TypeError): Index(range(1, 5, 2), dtype='float64') def test_constructor_name(self): # GH12288 orig = RangeIndex(10) orig.name = 'original' copy = RangeIndex(orig) copy.name = 'copy' assert orig.name == 'original' assert copy.name == 'copy' new = Index(copy) assert new.name == 'copy' new.name = 'new' assert orig.name == 'original' assert copy.name == 'copy' assert new.name == 'new' def test_constructor_corner(self): arr = np.array([1, 2, 3, 4], dtype=object) index = RangeIndex(1, 5) assert index.values.dtype == np.int64 tm.assert_index_equal(index, Index(arr)) # non-int raise Exception with pytest.raises(TypeError): RangeIndex('1', '10', '1') with pytest.raises(TypeError): RangeIndex(1.1, 10.2, 1.3) # invalid passed type with pytest.raises(TypeError): RangeIndex(1, 5, dtype='float64') @pytest.mark.parametrize('index, start, stop, step', [(RangeIndex(5), 0, 5, 1), (RangeIndex(0, 5), 0, 5, 1), (RangeIndex(5, step=2), 0, 5, 2), (RangeIndex(1, 5, 2), 1, 5, 2)]) def test_start_stop_step_attrs(self, index, start, stop, step): # GH 25710 assert index.start == start assert index.stop == stop assert index.step == step def test_copy(self): i = RangeIndex(5, name='Foo') i_copy = i.copy() assert i_copy is not i assert i_copy.identical(i) assert i_copy._start == 0 assert i_copy._stop == 5 assert i_copy._step == 1 assert i_copy.name == 'Foo' def test_repr(self): i = RangeIndex(5, name='Foo') result = repr(i) expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" assert result == expected result = eval(result) tm.assert_index_equal(result, i, exact=True) i = RangeIndex(5, 0, -1) result = repr(i) expected = "RangeIndex(start=5, stop=0, step=-1)" assert result == expected result = eval(result) tm.assert_index_equal(result, i, exact=True) def test_insert(self): idx = RangeIndex(5, name='Foo') result = idx[1:4] # test 0th element tm.assert_index_equal(idx[0:4], result.insert(0, idx[0])) # GH 18295 (test missing) expected = Float64Index([0, np.nan, 1, 2, 3, 4]) for na in (np.nan, pd.NaT, None): result = RangeIndex(5).insert(1, na) tm.assert_index_equal(result, expected) def test_delete(self): idx = RangeIndex(5, name='Foo') expected = idx[1:].astype(int) result = idx.delete(0) tm.assert_index_equal(result, expected) assert result.name == expected.name expected = idx[:-1].astype(int) result = idx.delete(-1) tm.assert_index_equal(result, expected) assert result.name == expected.name with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx)) def test_view(self): i = RangeIndex(0, name='Foo') i_view = i.view() assert i_view.name == 'Foo' i_view = i.view('i8') tm.assert_numpy_array_equal(i.values, i_view) i_view = i.view(RangeIndex) tm.assert_index_equal(i, i_view) def test_dtype(self): assert self.index.dtype == np.int64 def test_is_monotonic(self): assert self.index.is_monotonic is True assert self.index.is_monotonic_increasing is True assert self.index.is_monotonic_decreasing is False assert self.index._is_strictly_monotonic_increasing is True assert self.index._is_strictly_monotonic_decreasing is False index = RangeIndex(4, 0, -1) assert index.is_monotonic is False assert index._is_strictly_monotonic_increasing is False assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_decreasing is True index = RangeIndex(1, 2) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is True index = RangeIndex(2, 1) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is True index = RangeIndex(1, 1) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is True def test_equals_range(self): equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), (RangeIndex(0), RangeIndex(1, -1, 3)), (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))] for left, right in equiv_pairs: assert left.equals(right) assert right.equals(left) def test_logical_compat(self): idx = self.create_index() assert idx.all() == idx.values.all() assert idx.any() == idx.values.any() def test_identical(self): i = Index(self.index.copy()) assert i.identical(self.index) # we don't allow object dtype for RangeIndex if isinstance(self.index, RangeIndex): return same_values_different_type = Index(i, dtype=object) assert not i.identical(same_values_different_type) i = self.index.copy(dtype=object) i = i.rename('foo') same_values = Index(i, dtype=object) assert same_values.identical(self.index.copy(dtype=object)) assert not i.identical(self.index) assert Index(same_values, name='foo', dtype=object).identical(i) assert not self.index.copy(dtype=object).identical( self.index.copy(dtype='int64')) def test_get_indexer(self): target = RangeIndex(10) indexer = self.index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_pad(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='pad') expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_backfill(self): target = RangeIndex(10) indexer = self.index.get_indexer(target, method='backfill') expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_join_outer(self): # join with Int64Index other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = self.index.join(other, how='outer', return_indexers=True) noidx_res = self.index.join(other, how='outer') tm.assert_index_equal(res, noidx_res) eres = Int64Index([ 0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ]) elidx = np.array( [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) eridx = np.array( [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], dtype=np.intp) assert isinstance(res, Int64Index) assert not isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) # join with RangeIndex other = RangeIndex(25, 14, -1) res, lidx, ridx = self.index.join(other, how='outer', return_indexers=True) noidx_res = self.index.join(other, how='outer') tm.assert_index_equal(res, noidx_res) assert isinstance(res, Int64Index) assert not isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) def test_join_inner(self): # Join with non-RangeIndex other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = self.index.join(other, how='inner', return_indexers=True) # no guarantee of sortedness, so sort for comparison purposes ind = res.argsort() res = res.take(ind) lidx = lidx.take(ind) ridx = ridx.take(ind) eres = Int64Index([16, 18]) elidx = np.array([8, 9], dtype=np.intp) eridx = np.array([9, 7], dtype=np.intp) assert isinstance(res, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) # Join two RangeIndex other = RangeIndex(25, 14, -1) res, lidx, ridx = self.index.join(other, how='inner', return_indexers=True) assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) def test_join_left(self): # Join with Int64Index other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = self.index.join(other, how='left', return_indexers=True) eres = self.index eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # Join withRangeIndex other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = self.index.join(other, how='left', return_indexers=True) assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) def test_join_right(self): # Join with Int64Index other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = self.index.join(other, how='right', return_indexers=True) eres = other elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) assert isinstance(other, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) assert ridx is None # Join withRangeIndex other = RangeIndex(25, 14, -1) res, lidx, ridx = self.index.join(other, how='right', return_indexers=True) eres = other assert isinstance(other, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) assert ridx is None def test_join_non_int_index(self): other = Index([3, 6, 7, 8, 10], dtype=object) outer = self.index.join(other, how='outer') outer2 = other.join(self.index, how='outer') expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) tm.assert_index_equal(outer, outer2) tm.assert_index_equal(outer, expected) inner = self.index.join(other, how='inner') inner2 = other.join(self.index, how='inner') expected = Index([6, 8, 10]) tm.assert_index_equal(inner, inner2) tm.assert_index_equal(inner, expected) left = self.index.join(other, how='left') tm.assert_index_equal(left, self.index.astype(object)) left2 = other.join(self.index, how='left') tm.assert_index_equal(left2, other) right = self.index.join(other, how='right') tm.assert_index_equal(right, other) right2 = other.join(self.index, how='right') tm.assert_index_equal(right2, self.index.astype(object)) def test_join_non_unique(self): other = Index([4, 4, 3, 3]) res, lidx, ridx = self.index.join(other, return_indexers=True) eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) def test_join_self(self): kinds = 'outer', 'inner', 'left', 'right' for kind in kinds: joined = self.index.join(self.index, how=kind) assert self.index is joined @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): # intersect with Int64Index other = Index(np.arange(1, 6)) result = self.index.intersection(other, sort=sort) expected = Index( np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(self.index, sort=sort) expected = Index( np.sort(np.asarray(np.intersect1d(self.index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = self.index.intersection(other, sort=sort) expected = Index( np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = self.index.intersection(other, sort=sort) expected = Index( np.sort(np.intersect1d(self.index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(self.index, sort=sort) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int), sort=sort) result = first.intersection(other, sort=sort).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first, sort=sort).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) def test_union_noncomparable(self): from datetime import datetime, timedelta # corner case, non-Int64Index now = datetime.now() other = Index([now + timedelta(i) for i in range(4)], dtype=object) result = self.index.union(other) expected = Index(np.concatenate((self.index, other))) tm.assert_index_equal(result, expected) result = other.union(self.index) expected = Index(np.concatenate((other, self.index))) tm.assert_index_equal(result, expected) def test_union(self): RI = RangeIndex I64 = Int64Index cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)), (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)), (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)), (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)), (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)), (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)), (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)), (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)), (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)), (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)), (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)), (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)), (RI(0), RI(0), RI(0)), (RI(0, -10, -2), RI(0), RI(0, -10, -2)), (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)), (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)), (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)), (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)), (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)), (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])), (RI(0, 10, 1), I64([]), RI(0, 10, 1)), (RI(0), I64([1, 5, 6]), I64([1, 5, 6]))] for idx1, idx2, expected in cases: res1 = idx1.union(idx2) res2 = idx2.union(idx1) res3 = idx1._int64index.union(idx2) tm.assert_index_equal(res1, expected, exact=True) tm.assert_index_equal(res2, expected, exact=True) tm.assert_index_equal(res3, expected) def test_nbytes(self): # memory savings vs int index i = RangeIndex(0, 1000) assert i.nbytes < i._int64index.nbytes / 10 # constant memory usage i2 = RangeIndex(0, 10) assert i.nbytes == i2.nbytes def test_cant_or_shouldnt_cast(self): # can't with pytest.raises(TypeError): RangeIndex('foo', 'bar', 'baz') # shouldn't with pytest.raises(TypeError): RangeIndex('0', '1', '2') def test_view_Index(self): self.index.view(Index) def test_prevent_casting(self): result = self.index.astype('O') assert result.dtype == np.object_ def test_take_preserve_name(self): index = RangeIndex(1, 5, name='foo') taken = index.take([3, 0, 1]) assert index.name == taken.name def test_take_fill_value(self): # GH 12631 idx = pd.RangeIndex(1, 4, name='xxx') result = idx.take(np.array([1, 0, -1])) expected = pd.Int64Index([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.Int64Index([2, 1, 3], name='xxx') tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_print_unicode_columns(self): df = pd.DataFrame({ u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9] }) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_roundtrip(self): tm.assert_index_equal(eval(repr(self.index)), self.index) def test_slice_keep_name(self): idx = RangeIndex(1, 2, name='asdf') assert idx.name == idx[1:].name def test_explicit_conversions(self): # GH 8608 # add/sub are overridden explicitly for Float/Int Index idx = RangeIndex(5) # float conversions arr = np.arange(5, dtype='int64') * 3.2 expected = Float64Index(arr) fidx = idx * 3.2 tm.assert_index_equal(fidx, expected) fidx = 3.2 * idx tm.assert_index_equal(fidx, expected) # interops with numpy arrays expected = Float64Index(arr) a = np.zeros(5, dtype='float64') result = fidx - a tm.assert_index_equal(result, expected) expected = Float64Index(-arr) a = np.zeros(5, dtype='float64') result = a - fidx tm.assert_index_equal(result, expected) def test_has_duplicates(self): for ind in self.indices: if not len(ind): continue idx = self.indices[ind] assert idx.is_unique assert not idx.has_duplicates def test_extended_gcd(self): result = self.index._extended_gcd(6, 10) assert result[0] == result[1] * 6 + result[2] * 10 assert 2 == result[0] result = self.index._extended_gcd(10, 6) assert 2 == result[1] * 10 + result[2] * 6 assert 2 == result[0] def test_min_fitting_element(self): result = RangeIndex(0, 20, 2)._min_fitting_element(1) assert 2 == result result = RangeIndex(1, 6)._min_fitting_element(1) assert 1 == result result = RangeIndex(18, -2, -2)._min_fitting_element(1) assert 2 == result result = RangeIndex(5, 0, -1)._min_fitting_element(1) assert 1 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) assert big_num == result def test_max_fitting_element(self): result = RangeIndex(0, 20, 2)._max_fitting_element(17) assert 16 == result result = RangeIndex(1, 6)._max_fitting_element(4) assert 4 == result result = RangeIndex(18, -2, -2)._max_fitting_element(17) assert 16 == result result = RangeIndex(5, 0, -1)._max_fitting_element(4) assert 4 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) assert big_num == result def test_pickle_compat_construction(self): # RangeIndex() is a valid constructor pass def test_slice_specialised(self): # scalar indexing res = self.index[1] expected = 2 assert res == expected res = self.index[-1] expected = 18 assert res == expected # slicing # slice value completion index = self.index[:] expected = self.index tm.assert_index_equal(index, expected) # positive slice values index = self.index[7:10:2] expected = Index(np.array([14, 18]), name='foo') tm.assert_index_equal(index, expected) # negative slice values index = self.index[-1:-5:-2] expected = Index(np.array([18, 14]), name='foo') tm.assert_index_equal(index, expected) # stop overshoot index = self.index[2:100:4] expected = Index(np.array([4, 12]), name='foo') tm.assert_index_equal(index, expected) # reverse index = self.index[::-1] expected = Index(self.index.values[::-1], name='foo') tm.assert_index_equal(index, expected) index = self.index[-8::-1] expected = Index(np.array([4, 2, 0]), name='foo') tm.assert_index_equal(index, expected) index = self.index[-40::-1] expected = Index(np.array([], dtype=np.int64), name='foo') tm.assert_index_equal(index, expected) index = self.index[40::-1] expected = Index(self.index.values[40::-1], name='foo') tm.assert_index_equal(index, expected) index = self.index[10::-1] expected = Index(self.index.values[::-1], name='foo') tm.assert_index_equal(index, expected) def test_len_specialised(self): # make sure that our len is the same as # np.arange calc for step in np.arange(1, 6, 1): arr = np.arange(0, 5, step) i = RangeIndex(0, 5, step) assert len(i) == len(arr) i = RangeIndex(5, 0, step) assert len(i) == 0 for step in np.arange(-6, -1, 1): arr = np.arange(5, 0, step) i = RangeIndex(5, 0, step) assert len(i) == len(arr) i = RangeIndex(0, 5, step) assert len(i) == 0 def test_append(self): # GH16212 RI = RangeIndex I64 = Int64Index F64 = Float64Index OI = Index cases = [([RI(1, 12, 5)], RI(1, 12, 5)), ([RI(0, 6, 4)], RI(0, 6, 4)), ([RI(1, 3), RI(3, 7)], RI(1, 7)), ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), ([RI(-4, -8), RI(3, -4)], RI(0, 0)), ([RI(-4, -8), RI(3, 5)], RI(3, 5)), ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), ([RI(-2, ), RI(3, 5)], RI(3, 5)), ([RI(2, ), RI(2)], I64([0, 1, 0, 1])), ([RI(2, ), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), ([RI(2, ), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), ([RI(3, ), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), ([RI(3, ), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])), ([RI(3, ), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])), ([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))] for indices, expected in cases: result = indices[0].append(indices[1:]) tm.assert_index_equal(result, expected, exact=True) if len(indices) == 2: # Append single item rather than list result2 = indices[0].append(indices[1]) tm.assert_index_equal(result2, expected, exact=True)
def test_constructor_additional_invalid_args(self, args): msg = f"Value needs to be a scalar value, was type {type(args).__name__}" with pytest.raises(TypeError, match=msg): RangeIndex(args)
def setup_method(self, method): self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'), index_dec=RangeIndex(18, -1, -2, name='bar')) self.setup_indices()
def test_constructor_range(self): result = RangeIndex.from_range(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5, 6)) expected = RangeIndex(5, 6, 1) tm.assert_index_equal(result, expected, exact=True) # an invalid range result = RangeIndex.from_range(range(5, 1)) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected, exact=True) result = RangeIndex.from_range(range(5)) expected = RangeIndex(0, 5, 1) tm.assert_index_equal(result, expected, exact=True) result = Index(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True) msg = ( r"(RangeIndex.)?from_range\(\) got an unexpected keyword argument( 'copy')?" ) with pytest.raises(TypeError, match=msg): RangeIndex.from_range(range(10), copy=True)
def create_index(self): return RangeIndex(5)
def _get_index_loc(self, key, base_index=None): """ Get the location of a specific key in an index Parameters ---------- key : label The key for which to find the location if the underlying index is a DateIndex or a location if the underlying index is a RangeIndex or an Int64Index. base_index : pd.Index, optional Optionally the base index to search. If None, the model's index is searched. Returns ------- loc : int The location of the key index : pd.Index The index including the key; this is a copy of the original index unless the index had to be expanded to accommodate `key`. index_was_expanded : bool Whether or not the index was expanded to accommodate `key`. Notes ----- If `key` is past the end of of the given index, and the index is either an Int64Index or a date index, this function extends the index up to and including key, and then returns the location in the new index. """ if base_index is None: base_index = self._index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = isinstance(base_index, Int64Index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: # See gh5835. Remove the except after pandas 0.25 required. try: base_index_start = base_index.start base_index_step = base_index.step except AttributeError: base_index_start = base_index._start base_index_step = base_index._step stop = base_index_start + (key + 1) * base_index_step index = RangeIndex(start=base_index_start, stop=stop, step=base_index_step) # Special handling for Int64Index if (not range_index and int_index and not date_index and isinstance(key, (int, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Int64Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Use index type to choose creation function if index_class is DatetimeIndex: index_fn = date_range else: index_fn = period_range # Integer key (i.e. already given a location) if isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_fn(start=base_index[0], periods=int(key + 1), freq=base_index.freq) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_fn(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_fn(start=base_index[0], periods=len(index) + 1, freq=base_index.freq) # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For Int64Index and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop else: end = loc return loc, index[:end + 1], index_was_expanded
def test_get_indexer(self): index = self.create_index() target = RangeIndex(10) indexer = index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
def test_take_preserve_name(self): index = RangeIndex(1, 5, name='foo') taken = index.take([3, 0, 1]) self.assertEqual(index.name, taken.name)
def test_intersection(self, sort): # intersect with Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index(np.arange(1, 6)) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) expected = Index( np.sort(np.asarray(np.intersect1d(index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int), sort=sort) result = first.intersection(other, sort=sort).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first, sort=sort).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected)
def setup(self): idx_large_fast = RangeIndex(100000) idx_small_slow = date_range(start="1/1/2012", periods=1) self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow]) self.idx_non_object = RangeIndex(1)
def decode(obj): """ Decoder for deserializing numpy data types. """ typ = obj.get('typ') if typ is None: return obj elif typ == 'timestamp': freq = obj['freq'] if 'freq' in obj else obj['offset'] return Timestamp(obj['value'], tz=obj['tz'], freq=freq) elif typ == 'nat': return NaT elif typ == 'period': return Period(ordinal=obj['ordinal'], freq=obj['freq']) elif typ == 'index': dtype = dtype_for(obj['dtype']) data = unconvert(obj['data'], dtype, obj.get('compress')) return Index(data, dtype=dtype, name=obj['name']) elif typ == 'range_index': return RangeIndex(obj['start'], obj['stop'], obj['step'], name=obj['name']) elif typ == 'multi_index': dtype = dtype_for(obj['dtype']) data = unconvert(obj['data'], dtype, obj.get('compress')) data = [tuple(x) for x in data] return MultiIndex.from_tuples(data, names=obj['names']) elif typ == 'period_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) d = dict(name=obj['name'], freq=obj['freq']) freq = d.pop('freq', None) return PeriodIndex(PeriodArray(data, freq), **d) elif typ == 'datetime_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) d = dict(name=obj['name'], freq=obj['freq']) result = DatetimeIndex(data, **d) tz = obj['tz'] # reverse tz conversion if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result elif typ in ('interval_index', 'interval_array'): return globals()[obj['klass']].from_arrays(obj['left'], obj['right'], obj['closed'], name=obj['name']) elif typ == 'category': from_codes = globals()[obj['klass']].from_codes return from_codes(codes=obj['codes'], categories=obj['categories'], ordered=obj['ordered']) elif typ == 'interval': return Interval(obj['left'], obj['right'], obj['closed']) elif typ == 'series': dtype = dtype_for(obj['dtype']) pd_dtype = pandas_dtype(dtype) index = obj['index'] result = Series(unconvert(obj['data'], dtype, obj['compress']), index=index, dtype=pd_dtype, name=obj['name']) return result elif typ == 'block_manager': axes = obj['axes'] def create_block(b): values = _safe_reshape( unconvert(b['values'], dtype_for(b['dtype']), b['compress']), b['shape']) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 if 'locs' in b: placement = b['locs'] else: placement = axes[0].get_indexer(b['items']) if is_datetime64tz_dtype(b['dtype']): assert isinstance(values, np.ndarray), type(values) assert values.dtype == 'M8[ns]', values.dtype values = DatetimeArray(values, dtype=b['dtype']) return make_block(values=values, klass=getattr(internals, b['klass']), placement=placement, dtype=b['dtype']) blocks = [create_block(b) for b in obj['blocks']] return globals()[obj['klass']](BlockManager(blocks, axes)) elif typ == 'datetime': return parse(obj['data']) elif typ == 'datetime64': return np.datetime64(parse(obj['data'])) elif typ == 'date': return parse(obj['data']).date() elif typ == 'timedelta': return timedelta(*obj['data']) elif typ == 'timedelta64': return np.timedelta64(int(obj['data'])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return SparseSeries( # unconvert(obj['sp_values'], dtype, obj['compress']), # sparse_index=obj['sp_index'], index=obj['index'], # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) # elif typ == 'sparse_dataframe': # return SparseDataFrame( # obj['data'], columns=obj['columns'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'] # ) # elif typ == 'sparse_panel': # return SparsePanel( # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind']) elif typ == 'block_index': return globals()[obj['klass']](obj['length'], obj['blocs'], obj['blengths']) elif typ == 'int_index': return globals()[obj['klass']](obj['length'], obj['indices']) elif typ == 'ndarray': return unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')).reshape(obj['shape']) elif typ == 'np_scalar': if obj.get('sub_typ') == 'np_complex': return c2f(obj['real'], obj['imag'], obj['dtype']) else: dtype = dtype_for(obj['dtype']) try: return dtype(obj['data']) except (ValueError, TypeError): return dtype.type(obj['data']) elif typ == 'np_complex': return complex(obj['real'] + '+' + obj['imag'] + 'j') elif isinstance(obj, (dict, list, set)): return obj else: return obj
def test_constructor(self): index = RangeIndex(5) expected = np.arange(5, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._start == 0 assert index._stop == 5 assert index._step == 1 assert index.name is None tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5) expected = np.arange(1, 5, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._start == 1 tm.assert_index_equal(Index(expected), index) index = RangeIndex(1, 5, 2) expected = np.arange(1, 5, 2, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._step == 2 tm.assert_index_equal(Index(expected), index) for index in [ RangeIndex(0), RangeIndex(start=0), RangeIndex(stop=0), RangeIndex(0, 0) ]: expected = np.empty(0, dtype=np.int64) assert isinstance(index, RangeIndex) assert index._start == 0 assert index._stop == 0 assert index._step == 1 tm.assert_index_equal(Index(expected), index) for index in [ RangeIndex(0, name='Foo'), RangeIndex(start=0, name='Foo'), RangeIndex(stop=0, name='Foo'), RangeIndex(0, 0, name='Foo') ]: assert isinstance(index, RangeIndex) assert index.name == 'Foo' # we don't allow on a bare Index with pytest.raises(TypeError): Index(0, 1000)
def _get_index_loc(self, key, base_index=None): """ Get the location of a specific key in an index Parameters ---------- key : label The key for which to find the location if the underlying index is a DateIndex or a location if the underlying index is a RangeIndex or an Int64Index. base_index : pd.Index, optional Optionally the base index to search. If None, the model's index is searched. Returns ------- loc : int The location of the key index : pd.Index The index including the key; this is a copy of the original index unless the index had to be expanded to accomodate `key`. index_was_expanded : bool Whether or not the index was expanded to accomodate `key`. Notes ----- If `key` is past the end of of the given index, and the index is either an Int64Index or a date index, this function extends the index up to and including key, and then returns the location in the new index. """ if base_index is None: base_index = self._index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = isinstance(base_index, Int64Index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, long, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: stop = base_index._start + (key + 1) * base_index._step index = RangeIndex(start=base_index._start, stop=stop, step=base_index._step) # Special handling for Int64Index if (not range_index and int_index and not date_index and isinstance(key, (int, long, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Int64Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Integer key (i.e. already given a location) if isinstance(key, (int, long, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_class(start=base_index[0], periods=int(key + 1), freq=base_index.freq) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_class(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_class(start=base_index[0], periods=len(index) + 1, freq=base_index.freq) # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For Int64Index and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop else: end = loc return loc, index[:end + 1], index_was_expanded
class TestRangeIndex(Numeric): _holder = RangeIndex _compat_props = ["shape", "ndim", "size"] @pytest.fixture( params=[ RangeIndex(start=0, stop=20, step=2, name="foo"), RangeIndex(start=18, stop=-1, step=-2, name="bar"), ], ids=["index_inc", "index_dec"], ) def indices(self, request): return request.param def create_index(self): return RangeIndex(start=0, stop=20, step=2) def test_can_hold_identifiers(self): idx = self.create_index() key = idx[0] assert idx._can_hold_identifiers_and_holds_name(key) is False def test_too_many_names(self): index = self.create_index() with pytest.raises(ValueError, match="^Length"): index.names = ["roger", "harold"] @pytest.mark.parametrize( "index, start, stop, step", [ (RangeIndex(5), 0, 5, 1), (RangeIndex(0, 5), 0, 5, 1), (RangeIndex(5, step=2), 0, 5, 2), (RangeIndex(1, 5, 2), 1, 5, 2), ], ) def test_start_stop_step_attrs(self, index, start, stop, step): # GH 25710 assert index.start == start assert index.stop == stop assert index.step == step @pytest.mark.parametrize("attr_name", ["_start", "_stop", "_step"]) def test_deprecated_start_stop_step_attrs(self, attr_name): # GH 26581 idx = self.create_index() with tm.assert_produces_warning(FutureWarning): getattr(idx, attr_name) def test_copy(self): i = RangeIndex(5, name="Foo") i_copy = i.copy() assert i_copy is not i assert i_copy.identical(i) assert i_copy._range == range(0, 5, 1) assert i_copy.name == "Foo" def test_repr(self): i = RangeIndex(5, name="Foo") result = repr(i) expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" assert result == expected result = eval(result) tm.assert_index_equal(result, i, exact=True) i = RangeIndex(5, 0, -1) result = repr(i) expected = "RangeIndex(start=5, stop=0, step=-1)" assert result == expected result = eval(result) tm.assert_index_equal(result, i, exact=True) def test_insert(self): idx = RangeIndex(5, name="Foo") result = idx[1:4] # test 0th element tm.assert_index_equal(idx[0:4], result.insert(0, idx[0])) # GH 18295 (test missing) expected = Float64Index([0, np.nan, 1, 2, 3, 4]) for na in (np.nan, pd.NaT, None): result = RangeIndex(5).insert(1, na) tm.assert_index_equal(result, expected) def test_delete(self): idx = RangeIndex(5, name="Foo") expected = idx[1:].astype(int) result = idx.delete(0) tm.assert_index_equal(result, expected) assert result.name == expected.name expected = idx[:-1].astype(int) result = idx.delete(-1) tm.assert_index_equal(result, expected) assert result.name == expected.name with pytest.raises((IndexError, ValueError)): # either depending on numpy version result = idx.delete(len(idx)) def test_view(self): i = RangeIndex(0, name="Foo") i_view = i.view() assert i_view.name == "Foo" i_view = i.view("i8") tm.assert_numpy_array_equal(i.values, i_view) i_view = i.view(RangeIndex) tm.assert_index_equal(i, i_view) def test_dtype(self): index = self.create_index() assert index.dtype == np.int64 def test_cached_data(self): # GH 26565, GH26617 # Calling RangeIndex._data caches an int64 array of the same length at # self._cached_data. This test checks whether _cached_data has been set idx = RangeIndex(0, 100, 10) assert idx._cached_data is None repr(idx) assert idx._cached_data is None str(idx) assert idx._cached_data is None idx.get_loc(20) assert idx._cached_data is None 90 in idx assert idx._cached_data is None 91 in idx assert idx._cached_data is None idx.all() assert idx._cached_data is None idx.any() assert idx._cached_data is None df = pd.DataFrame({"a": range(10)}, index=idx) df.loc[50] assert idx._cached_data is None with pytest.raises(KeyError, match="51"): df.loc[51] assert idx._cached_data is None df.loc[10:50] assert idx._cached_data is None df.iloc[5:10] assert idx._cached_data is None # actually calling idx._data assert isinstance(idx._data, np.ndarray) assert isinstance(idx._cached_data, np.ndarray) def test_is_monotonic(self): index = RangeIndex(0, 20, 2) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is False assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is False index = RangeIndex(4, 0, -1) assert index.is_monotonic is False assert index._is_strictly_monotonic_increasing is False assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_decreasing is True index = RangeIndex(1, 2) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is True index = RangeIndex(2, 1) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is True index = RangeIndex(1, 1) assert index.is_monotonic is True assert index.is_monotonic_increasing is True assert index.is_monotonic_decreasing is True assert index._is_strictly_monotonic_increasing is True assert index._is_strictly_monotonic_decreasing is True def test_equals_range(self): equiv_pairs = [ (RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), (RangeIndex(0), RangeIndex(1, -1, 3)), (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2)), ] for left, right in equiv_pairs: assert left.equals(right) assert right.equals(left) def test_logical_compat(self): idx = self.create_index() assert idx.all() == idx.values.all() assert idx.any() == idx.values.any() def test_identical(self): index = self.create_index() i = Index(index.copy()) assert i.identical(index) # we don't allow object dtype for RangeIndex if isinstance(index, RangeIndex): return same_values_different_type = Index(i, dtype=object) assert not i.identical(same_values_different_type) i = index.copy(dtype=object) i = i.rename("foo") same_values = Index(i, dtype=object) assert same_values.identical(index.copy(dtype=object)) assert not i.identical(index) assert Index(same_values, name="foo", dtype=object).identical(i) assert not index.copy(dtype=object).identical(index.copy(dtype="int64")) def test_get_indexer(self): index = self.create_index() target = RangeIndex(10) indexer = index.get_indexer(target) expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_pad(self): index = self.create_index() target = RangeIndex(10) indexer = index.get_indexer(target, method="pad") expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_backfill(self): index = self.create_index() target = RangeIndex(10) indexer = index.get_indexer(target, method="backfill") expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected) def test_get_indexer_limit(self): # GH 28631 idx = RangeIndex(4) target = RangeIndex(6) result = idx.get_indexer(target, method="pad", limit=1) expected = np.array([0, 1, 2, 3, 3, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("stop", [0, -1, -2]) def test_get_indexer_decreasing(self, stop): # GH 28678 index = RangeIndex(7, stop, -3) result = index.get_indexer(range(9)) expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) def test_join_outer(self): # join with Int64Index index = self.create_index() other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = index.join(other, how="outer", return_indexers=True) noidx_res = index.join(other, how="outer") tm.assert_index_equal(res, noidx_res) eres = Int64Index( [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] ) elidx = np.array( [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp, ) eridx = np.array( [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], dtype=np.intp, ) assert isinstance(res, Int64Index) assert not isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) # join with RangeIndex other = RangeIndex(25, 14, -1) res, lidx, ridx = index.join(other, how="outer", return_indexers=True) noidx_res = index.join(other, how="outer") tm.assert_index_equal(res, noidx_res) assert isinstance(res, Int64Index) assert not isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) def test_join_inner(self): # Join with non-RangeIndex index = self.create_index() other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = index.join(other, how="inner", return_indexers=True) # no guarantee of sortedness, so sort for comparison purposes ind = res.argsort() res = res.take(ind) lidx = lidx.take(ind) ridx = ridx.take(ind) eres = Int64Index([16, 18]) elidx = np.array([8, 9], dtype=np.intp) eridx = np.array([9, 7], dtype=np.intp) assert isinstance(res, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) # Join two RangeIndex other = RangeIndex(25, 14, -1) res, lidx, ridx = index.join(other, how="inner", return_indexers=True) assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) def test_join_left(self): # Join with Int64Index index = self.create_index() other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = index.join(other, how="left", return_indexers=True) eres = index eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) # Join withRangeIndex other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = index.join(other, how="left", return_indexers=True) assert isinstance(res, RangeIndex) tm.assert_index_equal(res, eres) assert lidx is None tm.assert_numpy_array_equal(ridx, eridx) def test_join_right(self): # Join with Int64Index index = self.create_index() other = Int64Index(np.arange(25, 14, -1)) res, lidx, ridx = index.join(other, how="right", return_indexers=True) eres = other elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) assert isinstance(other, Int64Index) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) assert ridx is None # Join withRangeIndex other = RangeIndex(25, 14, -1) res, lidx, ridx = index.join(other, how="right", return_indexers=True) eres = other assert isinstance(other, RangeIndex) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) assert ridx is None def test_join_non_int_index(self): index = self.create_index() other = Index([3, 6, 7, 8, 10], dtype=object) outer = index.join(other, how="outer") outer2 = other.join(index, how="outer") expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) tm.assert_index_equal(outer, outer2) tm.assert_index_equal(outer, expected) inner = index.join(other, how="inner") inner2 = other.join(index, how="inner") expected = Index([6, 8, 10]) tm.assert_index_equal(inner, inner2) tm.assert_index_equal(inner, expected) left = index.join(other, how="left") tm.assert_index_equal(left, index.astype(object)) left2 = other.join(index, how="left") tm.assert_index_equal(left2, other) right = index.join(other, how="right") tm.assert_index_equal(right, other) right2 = other.join(index, how="right") tm.assert_index_equal(right2, index.astype(object)) def test_join_non_unique(self): index = self.create_index() other = Index([4, 4, 3, 3]) res, lidx, ridx = index.join(other, return_indexers=True) eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) def test_join_self(self, join_type): index = self.create_index() joined = index.join(index, how=join_type) assert index is joined def test_nbytes(self): # memory savings vs int index i = RangeIndex(0, 1000) assert i.nbytes < i._int64index.nbytes / 10 # constant memory usage i2 = RangeIndex(0, 10) assert i.nbytes == i2.nbytes def test_cant_or_shouldnt_cast(self): # can't with pytest.raises(TypeError): RangeIndex("foo", "bar", "baz") # shouldn't with pytest.raises(TypeError): RangeIndex("0", "1", "2") def test_view_index(self): index = self.create_index() index.view(Index) def test_prevent_casting(self): index = self.create_index() result = index.astype("O") assert result.dtype == np.object_ def test_take_preserve_name(self): index = RangeIndex(1, 5, name="foo") taken = index.take([3, 0, 1]) assert index.name == taken.name def test_take_fill_value(self): # GH 12631 idx = pd.RangeIndex(1, 4, name="xxx") result = idx.take(np.array([1, 0, -1])) expected = pd.Int64Index([2, 1, 3], name="xxx") tm.assert_index_equal(result, expected) # fill_value msg = "Unable to fill values because RangeIndex cannot contain NA" with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -1]), fill_value=True) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = pd.Int64Index([2, 1, 3], name="xxx") tm.assert_index_equal(result, expected) msg = "Unable to fill values because RangeIndex cannot contain NA" with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5])) def test_print_unicode_columns(self): df = pd.DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_roundtrip(self): index = self.create_index() tm.assert_index_equal(eval(repr(index)), index) def test_slice_keep_name(self): idx = RangeIndex(1, 2, name="asdf") assert idx.name == idx[1:].name def test_explicit_conversions(self): # GH 8608 # add/sub are overridden explicitly for Float/Int Index idx = RangeIndex(5) # float conversions arr = np.arange(5, dtype="int64") * 3.2 expected = Float64Index(arr) fidx = idx * 3.2 tm.assert_index_equal(fidx, expected) fidx = 3.2 * idx tm.assert_index_equal(fidx, expected) # interops with numpy arrays expected = Float64Index(arr) a = np.zeros(5, dtype="float64") result = fidx - a tm.assert_index_equal(result, expected) expected = Float64Index(-arr) a = np.zeros(5, dtype="float64") result = a - fidx tm.assert_index_equal(result, expected) def test_has_duplicates(self, indices): assert indices.is_unique assert not indices.has_duplicates def test_extended_gcd(self): index = self.create_index() result = index._extended_gcd(6, 10) assert result[0] == result[1] * 6 + result[2] * 10 assert 2 == result[0] result = index._extended_gcd(10, 6) assert 2 == result[1] * 10 + result[2] * 6 assert 2 == result[0] def test_min_fitting_element(self): result = RangeIndex(0, 20, 2)._min_fitting_element(1) assert 2 == result result = RangeIndex(1, 6)._min_fitting_element(1) assert 1 == result result = RangeIndex(18, -2, -2)._min_fitting_element(1) assert 2 == result result = RangeIndex(5, 0, -1)._min_fitting_element(1) assert 1 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) assert big_num == result def test_max_fitting_element(self): result = RangeIndex(0, 20, 2)._max_fitting_element(17) assert 16 == result result = RangeIndex(1, 6)._max_fitting_element(4) assert 4 == result result = RangeIndex(18, -2, -2)._max_fitting_element(17) assert 16 == result result = RangeIndex(5, 0, -1)._max_fitting_element(4) assert 4 == result big_num = 500000000000000000000000 result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) assert big_num == result def test_pickle_compat_construction(self): # RangeIndex() is a valid constructor pass def test_slice_specialised(self): index = self.create_index() index.name = "foo" # scalar indexing res = index[1] expected = 2 assert res == expected res = index[-1] expected = 18 assert res == expected # slicing # slice value completion index_slice = index[:] expected = index tm.assert_index_equal(index_slice, expected) # positive slice values index_slice = index[7:10:2] expected = Index(np.array([14, 18]), name="foo") tm.assert_index_equal(index_slice, expected) # negative slice values index_slice = index[-1:-5:-2] expected = Index(np.array([18, 14]), name="foo") tm.assert_index_equal(index_slice, expected) # stop overshoot index_slice = index[2:100:4] expected = Index(np.array([4, 12]), name="foo") tm.assert_index_equal(index_slice, expected) # reverse index_slice = index[::-1] expected = Index(index.values[::-1], name="foo") tm.assert_index_equal(index_slice, expected) index_slice = index[-8::-1] expected = Index(np.array([4, 2, 0]), name="foo") tm.assert_index_equal(index_slice, expected) index_slice = index[-40::-1] expected = Index(np.array([], dtype=np.int64), name="foo") tm.assert_index_equal(index_slice, expected) index_slice = index[40::-1] expected = Index(index.values[40::-1], name="foo") tm.assert_index_equal(index_slice, expected) index_slice = index[10::-1] expected = Index(index.values[::-1], name="foo") tm.assert_index_equal(index_slice, expected) @pytest.mark.parametrize("step", set(range(-5, 6)) - {0}) def test_len_specialised(self, step): # make sure that our len is the same as np.arange calc start, stop = (0, 5) if step > 0 else (5, 0) arr = np.arange(start, stop, step) index = RangeIndex(start, stop, step) assert len(index) == len(arr) index = RangeIndex(stop, start, step) assert len(index) == 0 @pytest.fixture( params=[ ([RI(1, 12, 5)], RI(1, 12, 5)), ([RI(0, 6, 4)], RI(0, 6, 4)), ([RI(1, 3), RI(3, 7)], RI(1, 7)), ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), ([RI(-4, -8), RI(3, -4)], RI(0, 0)), ([RI(-4, -8), RI(3, 5)], RI(3, 5)), ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), ([RI(-2), RI(3, 5)], RI(3, 5)), ([RI(2), RI(2)], I64([0, 1, 0, 1])), ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), ([RI(2), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), ([RI(3), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), ([RI(3), F64([-1, 3.1, 15.0])], F64([0, 1, 2, -1, 3.1, 15.0])), ([RI(3), OI(["a", None, 14])], OI([0, 1, 2, "a", None, 14])), ([RI(3, 1), OI(["a", None, 14])], OI(["a", None, 14])), ] ) def appends(self, request): """Inputs and expected outputs for RangeIndex.append test""" return request.param def test_append(self, appends): # GH16212 indices, expected = appends result = indices[0].append(indices[1:]) tm.assert_index_equal(result, expected, exact=True) if len(indices) == 2: # Append single item rather than list result2 = indices[0].append(indices[1]) tm.assert_index_equal(result2, expected, exact=True) def test_engineless_lookup(self): # GH 16685 # Standard lookup on RangeIndex should not require the engine to be # created idx = RangeIndex(2, 10, 3) assert idx.get_loc(5) == 1 tm.assert_numpy_array_equal( idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2])) ) with pytest.raises(KeyError, match="3"): idx.get_loc(3) assert "_engine" not in idx._cache # The engine is still required for lookup of a different dtype scalar: with pytest.raises(KeyError, match="'a'"): assert idx.get_loc("a") == -1 assert "_engine" in idx._cache
def test_get_indexer_backfill(self): index = self.create_index() target = RangeIndex(10) indexer = index.get_indexer(target, method="backfill") expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) tm.assert_numpy_array_equal(indexer, expected)
def test_take_preserve_name(self): index = RangeIndex(1, 5, name='foo') taken = index.take([3, 0, 1]) assert index.name == taken.name
def test_constructor_range_object(self): result = RangeIndex(range(1, 5, 2)) expected = RangeIndex(1, 5, 2) tm.assert_index_equal(result, expected, exact=True)
class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10**7, step=3) self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3) def time_max(self): self.idx_inc.max() def time_max_trivial(self): self.idx_dec.max() def time_min(self): self.idx_dec.min() def time_min_trivial(self): self.idx_inc.min() def time_get_loc_inc(self): self.idx_inc.get_loc(900000) def time_get_loc_dec(self): self.idx_dec.get_loc(100000)
class TestRangeIndexSetOps: @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): # intersect with Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index(np.arange(1, 6)) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) expected = Index( np.sort(np.asarray(np.intersect1d(index.values, other.values)))) tm.assert_index_equal(result, expected) # intersect with increasing RangeIndex other = RangeIndex(1, 6) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # intersect with decreasing RangeIndex other = RangeIndex(5, 0, -1) result = index.intersection(other, sort=sort) expected = Index(np.sort(np.intersect1d(index.values, other.values))) tm.assert_index_equal(result, expected) # reversed (GH 17296) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # GH 17296: intersect two decreasing RangeIndexes first = RangeIndex(10, -2, -2) other = RangeIndex(5, -4, -1) expected = first.astype(int).intersection(other.astype(int), sort=sort) result = first.intersection(other, sort=sort).astype(int) tm.assert_index_equal(result, expected) # reversed result = other.intersection(first, sort=sort).astype(int) tm.assert_index_equal(result, expected) index = RangeIndex(5) # intersect of non-overlapping indices other = RangeIndex(5, 10, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) other = RangeIndex(-1, -5, -1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) # intersection of empty indices other = RangeIndex(0, 0, 1) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) result = other.intersection(index, sort=sort) tm.assert_index_equal(result, expected) # intersection of non-overlapping values based on start value and gcd index = RangeIndex(1, 10, 2) other = RangeIndex(0, 10, 4) result = index.intersection(other, sort=sort) expected = RangeIndex(0, 0, 1) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("sort", [False, None]) def test_union_noncomparable(self, sort): # corner case, non-Int64Index index = RangeIndex(start=0, stop=20, step=2) other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) result = index.union(other, sort=sort) expected = Index(np.concatenate((index, other))) tm.assert_index_equal(result, expected) result = other.union(index, sort=sort) expected = Index(np.concatenate((other, index))) tm.assert_index_equal(result, expected) @pytest.fixture(params=[ ( RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), ), ( RangeIndex(0, 10, 1), RangeIndex(5, 20, 1), RangeIndex(0, 20, 1), Int64Index(range(20)), ), ( RangeIndex(0, 10, 1), RangeIndex(10, 20, 1), RangeIndex(0, 20, 1), Int64Index(range(20)), ), ( RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), RangeIndex(0, -10, -1), ), ( RangeIndex(0, -10, -1), RangeIndex(-10, -20, -1), RangeIndex(-19, 1, 1), Int64Index(range(0, -20, -1)), ), ( RangeIndex(0, 10, 2), RangeIndex(1, 10, 2), RangeIndex(0, 10, 1), Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))), ), ( RangeIndex(0, 11, 2), RangeIndex(1, 12, 2), RangeIndex(0, 12, 1), Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))), ), ( RangeIndex(0, 21, 4), RangeIndex(-2, 24, 4), RangeIndex(-2, 24, 2), Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))), ), ( RangeIndex(0, -20, -2), RangeIndex(-1, -21, -2), RangeIndex(-19, 1, 1), Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))), ), ( RangeIndex(0, 100, 5), RangeIndex(0, 100, 20), RangeIndex(0, 100, 5), Int64Index(range(0, 100, 5)), ), ( RangeIndex(0, -100, -5), RangeIndex(5, -100, -20), RangeIndex(-95, 10, 5), Int64Index(list(range(0, -100, -5)) + [5]), ), ( RangeIndex(0, -11, -1), RangeIndex(1, -12, -4), RangeIndex(-11, 2, 1), Int64Index(list(range(0, -11, -1)) + [1, -11]), ), (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)), ( RangeIndex(0, -10, -2), RangeIndex(0), RangeIndex(0, -10, -2), RangeIndex(0, -10, -2), ), ( RangeIndex(0, 100, 2), RangeIndex(100, 150, 200), RangeIndex(0, 102, 2), Int64Index(range(0, 102, 2)), ), ( RangeIndex(0, -100, -2), RangeIndex(-100, 50, 102), RangeIndex(-100, 4, 2), Int64Index(list(range(0, -100, -2)) + [-100, 2]), ), ( RangeIndex(0, -100, -1), RangeIndex(0, -50, -3), RangeIndex(-99, 1, 1), Int64Index(list(range(0, -100, -1))), ), ( RangeIndex(0, 1, 1), RangeIndex(5, 6, 10), RangeIndex(0, 6, 5), Int64Index([0, 5]), ), ( RangeIndex(0, 10, 5), RangeIndex(-5, -6, -20), RangeIndex(-5, 10, 5), Int64Index([0, 5, -5]), ), ( RangeIndex(0, 3, 1), RangeIndex(4, 5, 1), Int64Index([0, 1, 2, 4]), Int64Index([0, 1, 2, 4]), ), ( RangeIndex(0, 10, 1), Int64Index([]), RangeIndex(0, 10, 1), RangeIndex(0, 10, 1), ), ( RangeIndex(0), Int64Index([1, 5, 6]), Int64Index([1, 5, 6]), Int64Index([1, 5, 6]), ), ]) def unions(self, request): """Inputs and expected outputs for RangeIndex.union tests""" return request.param def test_union_sorted(self, unions): idx1, idx2, expected_sorted, expected_notsorted = unions res1 = idx1.union(idx2, sort=None) tm.assert_index_equal(res1, expected_sorted, exact=True) res1 = idx1.union(idx2, sort=False) tm.assert_index_equal(res1, expected_notsorted, exact=True) res2 = idx2.union(idx1, sort=None) res3 = idx1._int64index.union(idx2, sort=None) tm.assert_index_equal(res2, expected_sorted, exact=True) tm.assert_index_equal(res3, expected_sorted)