def test_cached_data(self): # GH 26565 # Calling RangeIndex._data caches an int64 array of the same length at # self._cached_data. This tests whether _cached_data has been set. idx = RangeIndex(0, 100, 10) assert idx._cached_data is None repr(idx) assert idx._cached_data is None str(idx) assert idx._cached_data is None idx.get_loc(20) assert idx._cached_data is None df = pd.DataFrame({'a': range(10)}, index=idx) df.loc[50] assert idx._cached_data is None with pytest.raises(KeyError): df.loc[51] assert idx._cached_data is None df.loc[10:50] assert idx._cached_data is None df.iloc[5:10] assert idx._cached_data is None # actually calling data._data assert isinstance(idx._data, np.ndarray) assert isinstance(idx._cached_data, np.ndarray)
def test_cache(self): # GH 26565, GH26617, GH35432 # This test checks whether _cache has been set. # Calling RangeIndex._cache["_data"] creates an int64 array of the same length # as the RangeIndex and stores it in _cache. idx = RangeIndex(0, 100, 10) assert idx._cache == {} repr(idx) assert idx._cache == {} str(idx) assert idx._cache == {} idx.get_loc(20) assert idx._cache == {} 90 in idx # True assert idx._cache == {} 91 in idx # False assert idx._cache == {} idx.all() assert idx._cache == {} idx.any() assert idx._cache == {} for _ in idx: pass assert idx._cache == {} df = pd.DataFrame({"a": range(10)}, index=idx) df.loc[50] assert idx._cache == {} with pytest.raises(KeyError, match="51"): df.loc[51] assert idx._cache == {} df.loc[10:50] assert idx._cache == {} df.iloc[5:10] assert idx._cache == {} # idx._cache should contain a _data entry after call to idx._data idx._data assert isinstance(idx._data, np.ndarray) assert idx._data is idx._data # check cached value is reused assert len(idx._cache) == 4 expected = np.arange(0, 100, 10, dtype="int64") tm.assert_numpy_array_equal(idx._cache["_data"], expected)
def test_cached_data(self): # GH 26565, GH26617 # Calling RangeIndex._data caches an int64 array of the same length at # self._cached_data. This test checks whether _cached_data has been set idx = RangeIndex(0, 100, 10) assert idx._cached_data is None repr(idx) assert idx._cached_data is None str(idx) assert idx._cached_data is None idx.get_loc(20) assert idx._cached_data is None 90 in idx assert idx._cached_data is None 91 in idx assert idx._cached_data is None with tm.assert_produces_warning(FutureWarning): idx.contains(90) assert idx._cached_data is None with tm.assert_produces_warning(FutureWarning): idx.contains(91) assert idx._cached_data is None idx.all() assert idx._cached_data is None idx.any() assert idx._cached_data is None df = pd.DataFrame({'a': range(10)}, index=idx) df.loc[50] assert idx._cached_data is None with pytest.raises(KeyError): df.loc[51] assert idx._cached_data is None df.loc[10:50] assert idx._cached_data is None df.iloc[5:10] assert idx._cached_data is None # actually calling idx._data assert isinstance(idx._data, np.ndarray) assert isinstance(idx._cached_data, np.ndarray)
def test_cached_data(self): # GH 26565, GH26617 # Calling RangeIndex._data caches an int64 array of the same length at # self._cached_data. This test checks whether _cached_data has been set idx = RangeIndex(0, 100, 10) assert idx._cached_data is None repr(idx) assert idx._cached_data is None str(idx) assert idx._cached_data is None idx.get_loc(20) assert idx._cached_data is None 90 in idx assert idx._cached_data is None 91 in idx assert idx._cached_data is None idx.contains(90) assert idx._cached_data is None idx.contains(91) assert idx._cached_data is None idx.all() assert idx._cached_data is None idx.any() assert idx._cached_data is None df = pd.DataFrame({'a': range(10)}, index=idx) df.loc[50] assert idx._cached_data is None with pytest.raises(KeyError): df.loc[51] assert idx._cached_data is None df.loc[10:50] assert idx._cached_data is None df.iloc[5:10] assert idx._cached_data is None # actually calling idx._data assert isinstance(idx._data, np.ndarray) assert isinstance(idx._cached_data, np.ndarray)
def test_cached_data(self): # GH 26565, GH26617 # Calling RangeIndex._data caches an int64 array of the same length at # self._cached_data. This test checks whether _cached_data has been set idx = RangeIndex(0, 100, 10) assert idx._cached_data is None repr(idx) assert idx._cached_data is None str(idx) assert idx._cached_data is None idx.get_loc(20) assert idx._cached_data is None 90 in idx assert idx._cached_data is None 91 in idx assert idx._cached_data is None idx.all() assert idx._cached_data is None idx.any() assert idx._cached_data is None idx.format() assert idx._cache == {} df = pd.DataFrame({"a": range(10)}, index=idx) str(df) assert idx._cache == {} df.loc[50] assert idx._cached_data is None with pytest.raises(KeyError, match="51"): df.loc[51] assert idx._cached_data is None df.loc[10:50] assert idx._cached_data is None df.iloc[5:10] assert idx._cached_data is None # actually calling idx._data assert isinstance(idx._data, np.ndarray) assert isinstance(idx._cached_data, np.ndarray)
def test_engineless_lookup(self): # GH 16685 # Standard lookup on RangeIndex should not require the engine to be # created idx = RangeIndex(2, 10, 3) assert idx.get_loc(5) == 1 tm.assert_numpy_array_equal(idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2]))) with pytest.raises(KeyError): idx.get_loc(3) assert '_engine' not in idx._cache # The engine is still required for lookup of a different dtype scalar: with pytest.raises(KeyError): assert idx.get_loc('a') == -1 assert '_engine' in idx._cache
def test_engineless_lookup(self): # GH 16685 # Standard lookup on RangeIndex should not require the engine to be # created idx = RangeIndex(2, 10, 3) assert idx.get_loc(5) == 1 tm.assert_numpy_array_equal(idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2]))) with pytest.raises(KeyError, match="3"): idx.get_loc(3) assert "_engine" not in idx._cache # Different types of scalars can be excluded immediately, no need to # use the _engine with pytest.raises(KeyError, match="'a'"): idx.get_loc("a") assert "_engine" not in idx._cache
class Range: def setup(self): self.idx_inc = RangeIndex(start=0, stop=10**7, step=3) self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3) def time_max(self): self.idx_inc.max() def time_max_trivial(self): self.idx_dec.max() def time_min(self): self.idx_dec.min() def time_min_trivial(self): self.idx_inc.min() def time_get_loc_inc(self): self.idx_inc.get_loc(900000) def time_get_loc_dec(self): self.idx_dec.get_loc(100000)
def _get_index_loc(self, key, base_index=None): """ Get the location of a specific key in an index Parameters ---------- key : label The key for which to find the location if the underlying index is a DateIndex or a location if the underlying index is a RangeIndex or an Int64Index. base_index : pd.Index, optional Optionally the base index to search. If None, the model's index is searched. Returns ------- loc : int The location of the key index : pd.Index The index including the key; this is a copy of the original index unless the index had to be expanded to accommodate `key`. index_was_expanded : bool Whether or not the index was expanded to accommodate `key`. Notes ----- If `key` is past the end of of the given index, and the index is either an Int64Index or a date index, this function extends the index up to and including key, and then returns the location in the new index. """ if base_index is None: base_index = self._index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = isinstance(base_index, Int64Index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: # See gh5835. Remove the except after pandas 0.25 required. try: base_index_start = base_index.start base_index_step = base_index.step except AttributeError: base_index_start = base_index._start base_index_step = base_index._step stop = base_index_start + (key + 1) * base_index_step index = RangeIndex(start=base_index_start, stop=stop, step=base_index_step) # Special handling for Int64Index if (not range_index and int_index and not date_index and isinstance(key, (int, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Int64Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Use index type to choose creation function if index_class is DatetimeIndex: index_fn = date_range else: index_fn = period_range # Integer key (i.e. already given a location) if isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_fn(start=base_index[0], periods=int(key + 1), freq=base_index.freq) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key, freq=base_index.freq) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_fn(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_fn(start=base_index[0], periods=len(index) + 1, freq=base_index.freq) # To avoid possible inconsistencies with `get_loc` below, # set the key directly equal to the last index location key = index[-1] # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For Int64Index and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop - 1 else: end = loc return loc, index[:end + 1], index_was_expanded
def _get_index_loc(self, key, base_index=None): """ Get the location of a specific key in an index Parameters ---------- key : label The key for which to find the location if the underlying index is a DateIndex or a location if the underlying index is a RangeIndex or an Int64Index. base_index : pd.Index, optional Optionally the base index to search. If None, the model's index is searched. Returns ------- loc : int The location of the key index : pd.Index The index including the key; this is a copy of the original index unless the index had to be expanded to accomodate `key`. index_was_expanded : bool Whether or not the index was expanded to accomodate `key`. Notes ----- If `key` is past the end of of the given index, and the index is either an Int64Index or a date index, this function extends the index up to and including key, and then returns the location in the new index. """ if base_index is None: base_index = self._index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = isinstance(base_index, Int64Index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, long, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: stop = base_index._start + (key + 1) * base_index._step index = RangeIndex(start=base_index._start, stop=stop, step=base_index._step) # Special handling for Int64Index if (not range_index and int_index and not date_index and isinstance(key, (int, long, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Int64Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Integer key (i.e. already given a location) if isinstance(key, (int, long, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_class(start=base_index[0], periods=int(key + 1), freq=base_index.freq) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_class(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_class(start=base_index[0], periods=len(index) + 1, freq=base_index.freq) # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For Int64Index and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop else: end = loc return loc, index[:end + 1], index_was_expanded
def _get_index_loc(self, key, base_index=None): if base_index is None: base_index = self._index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = isinstance(base_index, Int64Index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, long, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: # See gh5835. Remove the except after pandas 0.25 required. try: base_index_start = base_index.start base_index_step = base_index.step except AttributeError: base_index_start = base_index._start base_index_step = base_index._step stop = base_index_start + (key + 1) * base_index_step index = RangeIndex(start=base_index_start, stop=stop, step=base_index_step) # Special handling for Int64Index if (not range_index and int_index and not date_index and isinstance(key, (int, long, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Int64Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Use index type to choose creation function if index_class is DatetimeIndex: index_fn = date_range else: index_fn = period_range # Integer key (i.e. already given a location) if isinstance(key, (int, long, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_fn(start=base_index[0], periods=int(key + 1), freq=base_index.freq) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_fn(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_fn(start=base_index[0], periods=len(index) + 1, freq=base_index.freq) # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For Int64Index and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop else: end = loc return loc, index[:end + 1], index_was_expanded