def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray or None Indices of output values in original index """ if method is not None: raise NotImplementedError("argument method is not implemented for " "CategoricalIndex.reindex") if level is not None: raise NotImplementedError("argument level is not implemented for " "CategoricalIndex.reindex") if limit is not None: raise NotImplementedError("argument limit is not implemented for " "CategoricalIndex.reindex") target = ibase._ensure_index(target) if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) new_target = self.take(indexer) # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) if (cats == -1).any(): # coerce to a regular index here! result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) else: codes = new_target.codes.copy() codes[indexer == -1] = cats[missing] new_target = self._create_from_codes(codes) # we always want to return an Index type here # to be consistent with .reindex for other index types (e.g. they don't # coerce based on the actual values, only on the dtype) # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) if is_categorical_dtype(target): new_target = target._shallow_copy(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) return new_target, indexer
def index_method(self, *args, **kwargs): result = method(self, *args, **kwargs) # Index.__new__ will choose the appropriate subclass to return result = Index(result) if pin_name: result.name = self.name return result
def __sub__(self, other): from pandas import Index other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def intersection(self, other, sort=False): self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): return self._get_reconciled_name_object(other) if len(self) == 0: return self.copy() if len(other) == 0: return other.copy() if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: result.freq = to_offset(result.inferred_freq) return result elif (other.freq is None or self.freq is None or other.freq != self.freq or not other.freq.isAnchored() or (not self.is_monotonic or not other.is_monotonic)): result = Index.intersection(self, other, sort=sort) # Invalidate the freq of `result`, which may not be correct at # this point, depending on the values. result.freq = None if hasattr(self, 'tz'): result = self._shallow_copy(result._values, name=result.name, tz=result.tz, freq=None) else: result = self._shallow_copy(result._values, name=result.name, freq=None) if result.freq is None: result.freq = to_offset(result.inferred_freq) return result # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self # after sorting, the intersection always starts with the right index # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[]) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left.values[lslice] return self._shallow_copy(left_chunk)
def wrap_arithmetic_op(self, other, result): if result is NotImplemented: return NotImplemented if not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other) or is_period_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def union(self, other): """ Specialized union for TimedeltaIndex objects. If combine overlapping ranges with the same DateOffset, will be much faster than Index.union Parameters ---------- other : TimedeltaIndex or array-like Returns ------- y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) if len(other) == 0 or self.equals(other) or len(self) == 0: return super(TimedeltaIndex, self).union(other) if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) except (TypeError, ValueError): pass this, other = self, other if this._can_fast_union(other): return this._fast_union(other) else: result = Index.union(this, other) if isinstance(result, TimedeltaIndex): if result.freq is None: result.freq = to_offset(result.inferred_freq) return result
def _union_indexes(indexes, sort=True): if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') if len(indexes) == 1: result = indexes[0] if isinstance(result, list): result = Index(sorted(result)) return result indexes, kind = _sanitize_and_check(indexes) def _unique_indices(inds): def conv(i): if isinstance(i, Index): i = i.tolist() return i return Index( lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) if kind == 'special': result = indexes[0] if hasattr(result, 'union_many'): return result.union_many(indexes[1:]) else: for other in indexes[1:]: result = result.union(other) return result elif kind == 'array': index = indexes[0] for other in indexes[1:]: if not index.equals(other): if sort is None: # TODO: remove once pd.concat sort default changes warnings.warn(_sort_msg, FutureWarning, stacklevel=8) sort = True return _unique_indices(indexes) name = _get_consensus_names(indexes)[0] if name != index.name: index = index._shallow_copy(name=name) return index else: # kind='list' return _unique_indices(indexes)
def wrap_arithmetic_op(self, other, result): if result is NotImplemented: return NotImplemented if isinstance(result, tuple): # divmod, rdivmod assert len(result) == 2 return (wrap_arithmetic_op(self, other, result[0]), wrap_arithmetic_op(self, other, result[1])) if not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def __add__(self, other): from pandas import DateOffset other = lib.item_from_zerodim(other) if isinstance(other, ABCSeries): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (DateOffset, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datelike(other) elif is_integer_dtype(other) and self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def join(self, other, how='left', level=None, return_indexers=False): """ See Index.join """ if _is_convertible_to_index(other): try: other = TimedeltaIndex(other) except (TypeError, ValueError): pass return Index.join(self, other, how=how, level=level, return_indexers=return_indexers)
def _get_combined_index(indexes, intersect=False, sort=False): # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: index = Index([]) elif len(indexes) == 1: index = indexes[0] elif intersect: index = indexes[0] for other in indexes[1:]: index = index.intersection(other) else: index = _union_indexes(indexes, sort=sort) index = ensure_index(index) if sort: try: index = index.sort_values() except TypeError: pass return index
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Returns ------- loc : int """ if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): # GH#20464 datetime check here is to ensure we don't allow # datetime objects to be incorrectly treated as timedelta # objects; NaT is a special case because it plays a double role # as Not-A-Timedelta raise TypeError if isna(key): key = NaT if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below tolerance = self._convert_tolerance(tolerance, np.asarray(key)) if _is_convertible_to_td(key): key = Timedelta(key) return Index.get_loc(self, key, method, tolerance) try: return Index.get_loc(self, key, method, tolerance) except (KeyError, ValueError, TypeError): try: return self._get_string_slice(key) except (TypeError, KeyError, ValueError): pass try: stamp = Timedelta(key) return Index.get_loc(self, stamp, method, tolerance) except (KeyError, ValueError): raise KeyError(key)
def _union_indexes(indexes): if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') if len(indexes) == 1: result = indexes[0] if isinstance(result, list): result = Index(sorted(result)) return result indexes, kind = _sanitize_and_check(indexes) def _unique_indices(inds): def conv(i): if isinstance(i, Index): i = i.tolist() return i return Index(lib.fast_unique_multiple_list([conv(i) for i in inds])) if kind == 'special': result = indexes[0] if hasattr(result, 'union_many'): return result.union_many(indexes[1:]) else: for other in indexes[1:]: result = result.union(other) return result elif kind == 'array': index = indexes[0] for other in indexes[1:]: if not index.equals(other): return _unique_indices(indexes) name = _get_consensus_names(indexes)[0] if name != index.name: index = index._shallow_copy(name=name) return index else: return _unique_indices(indexes)
def _get_combined_index(indexes, intersect=False, sort=False): """ Return the union or intersection of indexes. Parameters ---------- indexes : list of Index or list objects When intersect=True, do not accept list of lists. intersect : bool, default False If True, calculate the intersection between indexes. Otherwise, calculate the union. sort : bool, default False Whether the result index should come out sorted or not. Returns ------- Index """ # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: index = Index([]) elif len(indexes) == 1: index = indexes[0] elif intersect: index = indexes[0] for other in indexes[1:]: index = index.intersection(other) else: index = _union_indexes(indexes, sort=sort) index = ensure_index(index) if sort: try: index = index.sort_values() except TypeError: pass return index
def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Returns ------- loc : int """ if is_bool_indexer(key): raise TypeError if isnull(key): key = NaT if tolerance is not None: # try converting tolerance now, so errors don't get swallowed by # the try/except clauses below tolerance = self._convert_tolerance(tolerance) if _is_convertible_to_td(key): key = Timedelta(key) return Index.get_loc(self, key, method, tolerance) try: return Index.get_loc(self, key, method, tolerance) except (KeyError, ValueError, TypeError): try: return self._get_string_slice(key) except (TypeError, KeyError, ValueError): pass try: stamp = Timedelta(key) return Index.get_loc(self, stamp, method, tolerance) except (KeyError, ValueError): raise KeyError(key)
def intersection(self, other): """ Specialized intersection for TimedeltaIndex objects. May be much faster than Index.intersection Parameters ---------- other : TimedeltaIndex or array-like Returns ------- y : Index or TimedeltaIndex """ self._assert_can_do_setop(other) if self.equals(other): return self._get_reconciled_name_object(other) if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) except (TypeError, ValueError): pass result = Index.intersection(self, other) return result if len(self) == 0: return self if len(other) == 0: return other # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[]) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left.values[lslice] return self._shallow_copy(left_chunk)
def _union(self, other, sort): if len(other) == 0 or self.equals(other) or len(self) == 0: return super()._union(other, sort=sort) if not isinstance(other, TimedeltaIndex): try: other = TimedeltaIndex(other) except (TypeError, ValueError): pass this, other = self, other if this._can_fast_union(other): return this._fast_union(other) else: result = Index._union(this, other, sort=sort) if isinstance(result, TimedeltaIndex): if result.freq is None: result.freq = to_offset(result.inferred_freq) return result
def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ if _is_convertible_to_td(key): key = Timedelta(key) return self.get_value_maybe_box(series, key) try: return com.maybe_box(self, Index.get_value(self, series, key), series, key) except KeyError: try: loc = self._get_string_slice(key) return series[loc] except (TypeError, ValueError, KeyError): pass try: return self.get_value_maybe_box(series, key) except (TypeError, ValueError, KeyError): raise KeyError(key)
def to_tuples(self, na_tuple=True): tuples = self._data.to_tuples(na_tuple=na_tuple) return Index(tuples)
def is_monotonic_decreasing(self): return Index(self.codes).is_monotonic_decreasing
def astype(self, dtype, copy: bool = True): with rewrite_exception("IntervalArray", type(self).__name__): new_values = self._values.astype(dtype, copy=copy) if is_interval_dtype(new_values.dtype): return self._shallow_copy(new_values) return Index.astype(self, dtype, copy=copy)
def reindex(self, target, method=None, level=None, limit=None, tolerance=None) -> tuple[Index, np.ndarray | None]: """ Create index with target's values (move/add/delete values as necessary) Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray[np.intp] or None Indices of output values in original index """ if method is not None: raise NotImplementedError( "argument method is not implemented for CategoricalIndex.reindex" ) if level is not None: raise NotImplementedError( "argument level is not implemented for CategoricalIndex.reindex" ) if limit is not None: raise NotImplementedError( "argument limit is not implemented for CategoricalIndex.reindex" ) target = ibase.ensure_index(target) if self.equals(target): indexer = None missing = np.array([], dtype=np.intp) else: indexer, missing = self.get_indexer_non_unique(np.array(target)) if len(self.codes) and indexer is not None: new_target = self.take(indexer) else: new_target = target # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) if not isinstance(cats, CategoricalIndex) or (cats == -1).any(): # coerce to a regular index here! result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique(target) else: codes = new_target.codes.copy() codes[indexer == -1] = cats[missing] cat = self._data._from_backing_data(codes) new_target = type(self)._simple_new(cat, name=self.name) # we always want to return an Index type here # to be consistent with .reindex for other index types (e.g. they don't # coerce based on the actual values, only on the dtype) # unless we had an initial Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) if is_categorical_dtype(target): cat = Categorical(new_target, dtype=target.dtype) new_target = type(self)._simple_new(cat, name=self.name) else: new_target = Index(new_target, name=self.name) return new_target, indexer
def get_duplicates(self): values = Index.get_duplicates(self) return self._simple_new(values)
def strftime(self, date_format) -> Index: arr = self._data.strftime(date_format) return Index(arr, name=self.name)
def slice_indexer(self, start=None, end=None, step=None, kind=None): """ Return indexer for specified label slice. Index.slice_indexer, customized to handle time slicing. In addition to functionality provided by Index.slice_indexer, does the following: - if both `start` and `end` are instances of `datetime.time`, it invokes `indexer_between_time` - if `start` and `end` are both either string or None perform value-based selection in non-monotonic cases. """ # For historical reasons DatetimeIndex supports slices between two # instances of datetime.time as if it were applying a slice mask to # an array of (self.hour, self.minute, self.seconds, self.microsecond). if isinstance(start, time) and isinstance(end, time): if step is not None and step != 1: raise ValueError("Must have step size of 1 with time slices") return self.indexer_between_time(start, end) if isinstance(start, time) or isinstance(end, time): raise KeyError("Cannot mix time and non-time slice keys") # Pandas supports slicing with dates, treated as datetimes at midnight. # https://github.com/pandas-dev/pandas/issues/31501 if isinstance(start, date) and not isinstance(start, datetime): start = datetime.combine(start, time(0, 0)) if isinstance(end, date) and not isinstance(end, datetime): end = datetime.combine(end, time(0, 0)) try: return Index.slice_indexer(self, start, end, step, kind=kind) except KeyError: # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, # let's try that. if (start is None or isinstance( start, str)) and (end is None or isinstance(end, str)): mask = np.array(True) deprecation_mask = np.array(True) if start is not None: start_casted = self._maybe_cast_slice_bound( start, "left", kind) mask = start_casted <= self deprecation_mask = start_casted == self if end is not None: end_casted = self._maybe_cast_slice_bound( end, "right", kind) mask = (self <= end_casted) & mask deprecation_mask = (end_casted == self) | deprecation_mask if not deprecation_mask.any(): warnings.warn( "Value based partial slicing on non-monotonic DatetimeIndexes " "with non-existing keys is deprecated and will raise a " "KeyError in a future Version.", FutureWarning, stacklevel=5, ) indexer = mask.nonzero()[0][::step] if len(indexer) == len(self): return slice(None) else: return indexer else: raise
def _convert_listlike_datetimes( arg, format: Optional[str], name: Label = None, tz: Optional[Timezone] = None, unit: Optional[str] = None, errors: Optional[str] = None, infer_datetime_format: Optional[bool] = None, dayfirst: Optional[bool] = None, yearfirst: Optional[bool] = None, exact: Optional[bool] = None, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") arg_dtype = getattr(arg, "dtype", None) # these are shortcutable if is_datetime64tz_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass elif tz: # DatetimeArray, DatetimeIndex return arg.tz_localize(tz) return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, "_values", arg) # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): result = arg.astype(f"datetime64[{unit}]") tz_parsed = None else: result, tz_parsed = tslib.array_with_unit_to_datetime( arg, unit, errors=errors) if errors == "ignore": result = Index(result, name=name) else: result = DatetimeIndex(result, name=name) # GH 23758: We may still need to localize the result with tz # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) # result will be naive but in UTC try: result = result.tz_localize("UTC").tz_convert(tz_parsed) except AttributeError: # Regular Index from 'ignore' path return result if tz is not None: if result.tz is None: result = result.tz_localize(tz) else: result = result.tz_convert(tz) return result elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False) except TypeError: if errors == "coerce": result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(result, name=name) elif errors == "ignore": result = Index(arg, name=name) return result raise arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == "%Y%m%d": try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime) as err: raise ValueError( "cannot convert the input to '%Y%m%d' date format" ) from err # fallback if result is None: try: result, timezones = array_strptime(arg, format, exact=exact, errors=errors) if "%Z" in format or "%z" in format: return _return_parsed_timezone_results( result, timezones, tz, name) except tslibs.OutOfBoundsDatetime: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) dta = DatetimeArray(values, dtype=tz_to_dtype(tz)) return DatetimeIndex._simple_new(dta, name=name) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name)
def f(self): result = fget(self) if is_bool_dtype(result): # return numpy array b/c there is no BoolIndex return result return Index(result, name=self.name)
def get_duplicates(self): values = Index.get_duplicates(self) return self._simple_new(values)
def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray or None Indices of output values in original index """ if method is not None: raise NotImplementedError("argument method is not implemented for " "CategoricalIndex.reindex") if level is not None: raise NotImplementedError("argument level is not implemented for " "CategoricalIndex.reindex") if limit is not None: raise NotImplementedError("argument limit is not implemented for " "CategoricalIndex.reindex") target = ibase._ensure_index(target) if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) new_target = self.take(indexer) # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) if (cats == -1).any(): # coerce to a regular index here! result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) else: codes = new_target.codes.copy() codes[indexer == -1] = cats[missing] new_target = self._create_from_codes(codes) # we always want to return an Index type here # to be consistent with .reindex for other index types (e.g. they don't # coerce based on the actual values, only on the dtype) # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) if is_categorical_dtype(target): new_target = target._shallow_copy(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) return new_target, indexer
def intersection(self, other, sort=False): """ Specialized intersection for DatetimeIndex/TimedeltaIndex. May be much faster than Index.intersection Parameters ---------- other : Same type as self or array-like sort : False or None, default False Sort the resulting index if possible. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 Changed the default to ``False`` to match the behaviour from before 0.24.0. .. versionchanged:: 0.25.0 The `sort` keyword is added Returns ------- y : Index or same type as self """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) res_name = get_op_result_name(self, other) if self.equals(other): return self._get_reconciled_name_object(other) if len(self) == 0: return self.copy() if len(other) == 0: return other.copy() if not isinstance(other, type(self)): result = Index.intersection(self, other, sort=sort) if isinstance(result, type(self)): if result.freq is None: # TODO: no tests rely on this; needed? result = result._with_freq("infer") assert result.name == res_name return result elif not self._can_fast_intersect(other): result = Index.intersection(self, other, sort=sort) assert result.name == res_name # We need to invalidate the freq because Index.intersection # uses _shallow_copy on a view of self._data, which will preserve # self.freq if we're not careful. result = result._with_freq(None)._with_freq("infer") return result # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self # after sorting, the intersection always starts with the right index # and ends with the index of which the last elements is smallest end = min(left[-1], right[-1]) start = right[0] if end < start: return type(self)(data=[], dtype=self.dtype, freq=self.freq, name=res_name) else: lslice = slice(*left.slice_locs(start, end)) left_chunk = left._values[lslice] return type(self)._simple_new(left_chunk, name=res_name)
def _box_values_as_index(self): """ return object Index which contains boxed values """ from pandas.core.index import Index return Index(self._box_values(self.asi8), name=self.name, dtype=object)
def strftime(self, date_format): return Index(self.format(date_format=date_format), dtype=compat.text_type)
def total_seconds(self): result = TimedeltaArrayMixin.total_seconds(self) return Index(result, name=self.name)
def left(self) -> Index: return Index(self._data.left, copy=False)
def map(self, mapper): """ Map values using input correspondence (a dict, Series, or function). Maps the values (their categories, not the codes) of the index to new categories. If the mapping correspondence is one-to-one the result is a :class:`~pandas.CategoricalIndex` which has the same order property as the original, otherwise an :class:`~pandas.Index` is returned. If a `dict` or :class:`~pandas.Series` is used any unmapped category is mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` will be returned. Parameters ---------- mapper : function, dict, or Series Mapping correspondence. Returns ------- pandas.CategoricalIndex or pandas.Index Mapped index. See Also -------- Index.map : Apply a mapping correspondence on an :class:`~pandas.Index`. Series.map : Apply a mapping correspondence on a :class:`~pandas.Series`. Series.apply : Apply more complex functions on a :class:`~pandas.Series`. Examples -------- >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) >>> idx CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') >>> idx.map(lambda x: x.upper()) CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], ordered=False, dtype='category') >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) CategoricalIndex(['first', 'second', 'third'], categories=['first', 'second', 'third'], ordered=False, dtype='category') If the mapping is one-to-one the ordering of the categories is preserved: >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) >>> idx CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=True, dtype='category') >>> idx.map({'a': 3, 'b': 2, 'c': 1}) CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, dtype='category') If the mapping is not one-to-one an :class:`~pandas.Index` is returned: >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) Index(['first', 'second', 'first'], dtype='object') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: >>> idx.map({'a': 'first', 'b': 'second'}) Index(['first', 'second', nan], dtype='object') """ mapped = self._values.map(mapper) return Index(mapped, name=self.name)
def right(self) -> Index: return Index(self._data.right, copy=False)
def _delegate_method(self, name, *args, **kwargs): result = operator.methodcaller(name, *args, **kwargs)(self._data) if name not in self._raw_methods: result = Index(result, name=self.name) return result
def mid(self) -> Index: return Index(self._data.mid, copy=False)
def _union_indexes(indexes, sort=True): """ Return the union of indexes. The behavior of sort and names is not consistent. Parameters ---------- indexes : list of Index or list objects sort : bool, default True Whether the result index should come out sorted or not. Returns ------- Index """ if len(indexes) == 0: raise AssertionError('Must have at least 1 Index to union') if len(indexes) == 1: result = indexes[0] if isinstance(result, list): result = Index(sorted(result)) return result indexes, kind = _sanitize_and_check(indexes) def _unique_indices(inds): """ Convert indexes to lists and concatenate them, removing duplicates. The final dtype is inferred. Parameters ---------- inds : list of Index or list objects Returns ------- Index """ def conv(i): if isinstance(i, Index): i = i.tolist() return i return Index( lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) if kind == 'special': result = indexes[0] if hasattr(result, 'union_many'): return result.union_many(indexes[1:]) else: for other in indexes[1:]: result = result.union(other) return result elif kind == 'array': index = indexes[0] for other in indexes[1:]: if not index.equals(other): if sort is None: # TODO: remove once pd.concat sort default changes warnings.warn(_sort_msg, FutureWarning, stacklevel=8) sort = True return _unique_indices(indexes) name = _get_consensus_names(indexes)[0] if name != index.name: index = index._shallow_copy(name=name) return index else: # kind='list' return _unique_indices(indexes)
def length(self) -> Index: return Index(self._data.length, copy=False)
def _delegate_property_get(self, name, *args, **kwargs): result = getattr(self._data, name) if name not in self._raw_properties: result = Index(result, name=self.name) return result
def _convert_listlike_datetimes( arg, format: str | None, name: Hashable = None, tz: Timezone | None = None, unit: str | None = None, errors: str = "raise", infer_datetime_format: bool = False, dayfirst: bool | None = None, yearfirst: bool | None = None, exact: bool = True, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name tz : object None or 'utc' unit : str None or string of the frequency of the passed data errors : str error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : bool, default False inferring format behavior from to_datetime dayfirst : bool dayfirst parsing behavior from to_datetime yearfirst : bool yearfirst parsing behavior from to_datetime exact : bool, default True exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") arg_dtype = getattr(arg, "dtype", None) # these are shortcutable if is_datetime64tz_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass elif tz: # DatetimeArray, DatetimeIndex return arg.tz_localize(tz) return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") return _to_datetime_with_unit(arg, unit, name, tz, errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False, tz=timezones.maybe_get_tz(tz)) except TypeError: if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(npvalues, name=name) elif errors == "ignore": idx = Index(arg, name=name) return idx raise arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None if format is not None: res = _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format) if res is not None: return res assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name)
def to_tuples(self): return Index(com._asarray_tuplesafe(zip(self.left, self.right)))
def f(self): result = fget(self) return Index(result, name=self.name)
def wrapper(self, other): result = getattr(TimedeltaArrayMixin, opname)(self, other) if is_bool_dtype(result): # support of bool dtype indexers return result return Index(result)
def __sub__(self, other): from pandas import Index other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datelike(other) elif is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these result = self.shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datelike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): result = self._addsub_int_array(other, operator.sub) elif isinstance(other, Index): raise TypeError("cannot subtract {cls} and {typ}" .format(cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_categorical_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if result is NotImplemented: return NotImplemented elif not isinstance(result, Index): # Index.__new__ will choose appropriate subclass for dtype result = Index(result) res_name = ops.get_op_result_name(self, other) result.name = res_name return result
def melt( frame: DataFrame, id_vars=None, value_vars=None, var_name=None, value_name="value", col_level=None, ) -> DataFrame: # TODO: what about the existing index? # If multiindex, gather names of columns on all level for checking presence # of `id_vars` and `value_vars` if isinstance(frame.columns, ABCMultiIndex): cols = [x for c in frame.columns for x in c] else: cols = list(frame.columns) if id_vars is not None: if not is_list_like(id_vars): id_vars = [id_vars] elif isinstance(frame.columns, ABCMultiIndex) and not isinstance(id_vars, list): raise ValueError( "id_vars must be a list of tuples when columns are a MultiIndex" ) else: # Check that `id_vars` are in frame id_vars = list(id_vars) missing = Index(com.flatten(id_vars)).difference(cols) if not missing.empty: raise KeyError("The following 'id_vars' are not present " "in the DataFrame: {missing}" "".format(missing=list(missing))) else: id_vars = [] if value_vars is not None: if not is_list_like(value_vars): value_vars = [value_vars] elif isinstance(frame.columns, ABCMultiIndex) and not isinstance(value_vars, list): raise ValueError( "value_vars must be a list of tuples when columns are a MultiIndex" ) else: value_vars = list(value_vars) # Check that `value_vars` are in frame missing = Index(com.flatten(value_vars)).difference(cols) if not missing.empty: raise KeyError("The following 'value_vars' are not present in " "the DataFrame: {missing}" "".format(missing=list(missing))) frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() if col_level is not None: # allow list or other? # frame is a copy frame.columns = frame.columns.get_level_values(col_level) if var_name is None: if isinstance(frame.columns, ABCMultiIndex): if len(frame.columns.names) == len(set(frame.columns.names)): var_name = frame.columns.names else: var_name = [ "variable_{i}".format(i=i) for i in range(len(frame.columns.names)) ] else: var_name = [ frame.columns.name if frame.columns.name is not None else "variable" ] if isinstance(var_name, str): var_name = [var_name] N, K = frame.shape K -= len(id_vars) mdata = {} for col in id_vars: id_data = frame.pop(col) if is_extension_array_dtype(id_data): id_data = concat([id_data] * K, ignore_index=True) else: id_data = np.tile(id_data.values, K) mdata[col] = id_data mcolumns = id_vars + var_name + [value_name] mdata[value_name] = frame.values.ravel("F") for i, col in enumerate(var_name): # asanyarray will keep the columns as an Index mdata[col] = np.asanyarray( frame.columns._get_level_values(i)).repeat(N) return frame._constructor(mdata, columns=mcolumns)
def reindex(self, target, method=None, level=None, limit=None, tolerance=None) -> tuple[Index, npt.NDArray[np.intp] | None]: """ Create index with target's values (move/add/delete values as necessary) Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray[np.intp] or None Indices of output values in original index """ if method is not None: raise NotImplementedError( "argument method is not implemented for CategoricalIndex.reindex" ) if level is not None: raise NotImplementedError( "argument level is not implemented for CategoricalIndex.reindex" ) if limit is not None: raise NotImplementedError( "argument limit is not implemented for CategoricalIndex.reindex" ) target = ibase.ensure_index(target) if self.equals(target): indexer = None missing = np.array([], dtype=np.intp) else: indexer, missing = self.get_indexer_non_unique(target) if not self.is_unique: # GH#42568 warnings.warn( "reindexing with a non-unique Index is deprecated and will " "raise in a future version.", FutureWarning, stacklevel=find_stack_level(), ) new_target: Index if len(self) and indexer is not None: new_target = self.take(indexer) else: new_target = target # filling in missing if needed if len(missing): cats = self.categories.get_indexer(target) if not isinstance(target, CategoricalIndex) or (cats == -1).any(): new_target, indexer, _ = super()._reindex_non_unique(target) else: # error: "Index" has no attribute "codes" codes = new_target.codes.copy() # type: ignore[attr-defined] codes[indexer == -1] = cats[missing] cat = self._data._from_backing_data(codes) new_target = type(self)._simple_new(cat, name=self.name) # we always want to return an Index type here # to be consistent with .reindex for other index types (e.g. they don't # coerce based on the actual values, only on the dtype) # unless we had an initial Categorical to begin with # in which case we are going to conform to the passed Categorical if is_categorical_dtype(target): cat = Categorical(new_target, dtype=target.dtype) new_target = type(self)._simple_new(cat, name=self.name) else: # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target new_target_array = np.asarray(new_target) new_target = Index._with_infer(new_target_array, name=self.name) return new_target, indexer
def astype(self, dtype, copy: bool = True): with rewrite_exception("IntervalArray", type(self).__name__): new_values = self._values.astype(dtype, copy=copy) return Index(new_values, dtype=new_values.dtype, name=self.name)
def astype(self, dtype, copy=True): res_data = self._data.astype(dtype, copy=copy) return Index(res_data, name=self.name)