def _maybe_convert_i8(self, key): """ Maybe convert a given key to its equivalent i8 value(s). Used as a preprocessing step prior to IntervalTree queries (self._engine), which expects numeric data. Parameters ---------- key : scalar or list-like The key that should maybe be converted to i8. Returns ------- scalar or list-like The original key if no conversion occurred, int if converted scalar, Int64Index if converted list-like. """ original = key if is_list_like(key): key = ensure_index(key) if not self._needs_i8_conversion(key): return original scalar = is_scalar(key) if is_interval_dtype(key) or isinstance(key, Interval): # convert left/right and reconstruct left = self._maybe_convert_i8(key.left) right = self._maybe_convert_i8(key.right) constructor = Interval if scalar else IntervalIndex.from_arrays return constructor(left, right, closed=self.closed) if scalar: # Timestamp/Timedelta key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) if lib.is_period(key): key_i8 = key.ordinal elif isinstance(key_i8, Timestamp): key_i8 = key_i8.value elif isinstance(key_i8, (np.datetime64, np.timedelta64)): key_i8 = key_i8.view("i8") else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) if key.hasnans: # convert NaT from its i8 value to np.nan so it's not viewed # as a valid value, maybe causing errors (e.g. is_overlapping) key_i8 = key_i8.where(~key._isnan) # ensure consistency with IntervalIndex subtype # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any], # ExtensionDtype]" has no attribute "subtype" subtype = self.dtype.subtype # type: ignore[union-attr] if not is_dtype_equal(subtype, key_dtype): raise ValueError( f"Cannot index an IntervalIndex of subtype {subtype} with " f"values of dtype {key_dtype}") return key_i8
def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar Parameters ---------- pandas_dtype : bool, default False whether to infer dtype including pandas extension types. If False, scalar belongs to pandas extension types is inferred as object """ dtype = np.object_ # a 1-element ndarray if isinstance(val, np.ndarray): msg = "invalid ndarray passed to infer_dtype_from_scalar" if val.ndim != 0: raise ValueError(msg) dtype = val.dtype val = val.item() elif isinstance(val, str): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry # so this is kind of bad. Alternately we could use np.repeat # instead of np.empty (but then you still don't want things # coming out as np.str_! dtype = np.object_ elif isinstance(val, (np.datetime64, datetime)): val = tslibs.Timestamp(val) if val is tslibs.NaT or val.tz is None: dtype = np.dtype("M8[ns]") else: if pandas_dtype: dtype = DatetimeTZDtype(unit="ns", tz=val.tz) else: # return datetimetz as object return np.object_, val val = val.value elif isinstance(val, (np.timedelta64, timedelta)): val = tslibs.Timedelta(val).value dtype = np.dtype("m8[ns]") elif is_bool(val): dtype = np.bool_ elif is_integer(val): if isinstance(val, np.integer): dtype = type(val) else: dtype = np.int64 elif is_float(val): if isinstance(val, np.floating): dtype = type(val) else: dtype = np.float64 elif is_complex(val): dtype = np.complex_ elif pandas_dtype: if lib.is_period(val): dtype = PeriodDtype(freq=val.freq) val = val.ordinal return dtype, val
def test_is_period(self): assert lib.is_period(pd.Period('2011-01', freq='M')) assert not lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')) assert not lib.is_period(pd.Timestamp('2011-01')) assert not lib.is_period(1) assert not lib.is_period(np.nan)
def test_is_period(self): assert lib.is_period(pd.Period('2011-01', freq='M')) assert not lib.is_period(pd.PeriodIndex(['2011-01'], freq='M')) assert not lib.is_period(pd.Timestamp('2011-01')) assert not lib.is_period(1) assert not lib.is_period(np.nan)
def infer_dtype_from_scalar(val, pandas_dtype=False): """ interpret the dtype from a scalar Parameters ---------- pandas_dtype : bool, default False whether to infer dtype including pandas extension types. If False, scalar belongs to pandas extension types is inferred as object """ dtype = np.object_ # a 1-element ndarray if isinstance(val, np.ndarray): msg = "invalid ndarray passed to _infer_dtype_from_scalar" if val.ndim != 0: raise ValueError(msg) dtype = val.dtype val = val.item() elif isinstance(val, string_types): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry # so this is kind of bad. Alternately we could use np.repeat # instead of np.empty (but then you still don't want things # coming out as np.str_! dtype = np.object_ elif isinstance(val, (np.datetime64, datetime)): val = tslib.Timestamp(val) if val is tslib.NaT or val.tz is None: dtype = np.dtype('M8[ns]') else: if pandas_dtype: dtype = DatetimeTZDtype(unit='ns', tz=val.tz) else: # return datetimetz as object return np.object_, val val = val.value elif isinstance(val, (np.timedelta64, timedelta)): val = tslib.Timedelta(val).value dtype = np.dtype('m8[ns]') elif is_bool(val): dtype = np.bool_ elif is_integer(val): if isinstance(val, np.integer): dtype = type(val) else: dtype = np.int64 elif is_float(val): if isinstance(val, np.floating): dtype = type(val) else: dtype = np.float64 elif is_complex(val): dtype = np.complex_ elif pandas_dtype: if lib.is_period(val): dtype = PeriodDtype(freq=val.freq) val = val.ordinal return dtype, val
def test_is_period(self): assert lib.is_period(pd.Period("2011-01", freq="M")) assert not lib.is_period(pd.PeriodIndex(["2011-01"], freq="M")) assert not lib.is_period(pd.Timestamp("2011-01")) assert not lib.is_period(1) assert not lib.is_period(np.nan)
def test_is_period(self): self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) self.assertFalse(lib.is_period(1)) self.assertFalse(lib.is_period(np.nan))
def test_is_period(self): self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) self.assertFalse(lib.is_period(1)) self.assertFalse(lib.is_period(np.nan))