Exemplo n.º 1
0
    def reindex(self, target, method=None, level=None, limit=None,
                tolerance=None):
        """
        Create index with target's values (move/add/delete values as necessary)

        Returns
        -------
        new_index : pd.Index
            Resulting index
        indexer : np.ndarray or None
            Indices of output values in original index

        """

        if method is not None:
            raise NotImplementedError("argument method is not implemented for "
                                      "CategoricalIndex.reindex")
        if level is not None:
            raise NotImplementedError("argument level is not implemented for "
                                      "CategoricalIndex.reindex")
        if limit is not None:
            raise NotImplementedError("argument limit is not implemented for "
                                      "CategoricalIndex.reindex")

        target = ibase._ensure_index(target)

        if not is_categorical_dtype(target) and not target.is_unique:
            raise ValueError("cannot reindex with a non-unique indexer")

        indexer, missing = self.get_indexer_non_unique(np.array(target))
        new_target = self.take(indexer)

        # filling in missing if needed
        if len(missing):
            cats = self.categories.get_indexer(target)

            if (cats == -1).any():
                # coerce to a regular index here!
                result = Index(np.array(self), name=self.name)
                new_target, indexer, _ = result._reindex_non_unique(
                    np.array(target))

            else:

                codes = new_target.codes.copy()
                codes[indexer == -1] = cats[missing]
                new_target = self._create_from_codes(codes)

        # we always want to return an Index type here
        # to be consistent with .reindex for other index types (e.g. they don't
        # coerce based on the actual values, only on the dtype)
        # unless we had an inital Categorical to begin with
        # in which case we are going to conform to the passed Categorical
        new_target = np.asarray(new_target)
        if is_categorical_dtype(target):
            new_target = target._shallow_copy(new_target, name=self.name)
        else:
            new_target = Index(new_target, name=self.name)

        return new_target, indexer
Exemplo n.º 2
0
    def index_method(self, *args, **kwargs):
        result = method(self, *args, **kwargs)

        # Index.__new__ will choose the appropriate subclass to return
        result = Index(result)
        if pin_name:
            result.name = self.name
        return result
Exemplo n.º 3
0
        def __sub__(self, other):
            from pandas import Index

            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._sub_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(-other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(-other)
            elif isinstance(other, Period):
                result = self._sub_period(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.sub)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                result = self._sub_datelike(other)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {cls} and {typ}"
                                .format(cls=type(self).__name__,
                                        typ=type(other).__name__))
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")

            elif is_float_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Exemplo n.º 4
0
    def intersection(self, other, sort=False):
        self._validate_sort_keyword(sort)
        self._assert_can_do_setop(other)

        if self.equals(other):
            return self._get_reconciled_name_object(other)

        if len(self) == 0:
            return self.copy()
        if len(other) == 0:
            return other.copy()

        if not isinstance(other, type(self)):
            result = Index.intersection(self, other, sort=sort)
            if isinstance(result, type(self)):
                if result.freq is None:
                    result.freq = to_offset(result.inferred_freq)
            return result

        elif (other.freq is None or self.freq is None or
              other.freq != self.freq or
              not other.freq.isAnchored() or
              (not self.is_monotonic or not other.is_monotonic)):
            result = Index.intersection(self, other, sort=sort)

            # Invalidate the freq of `result`, which may not be correct at
            # this point, depending on the values.
            result.freq = None
            if hasattr(self, 'tz'):
                result = self._shallow_copy(result._values, name=result.name,
                                            tz=result.tz, freq=None)
            else:
                result = self._shallow_copy(result._values, name=result.name,
                                            freq=None)
            if result.freq is None:
                result.freq = to_offset(result.inferred_freq)
            return result

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        # after sorting, the intersection always starts with the right index
        # and ends with the index of which the last elements is smallest
        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            return type(self)(data=[])
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left.values[lslice]
            return self._shallow_copy(left_chunk)
Exemplo n.º 5
0
def wrap_arithmetic_op(self, other, result):
    if result is NotImplemented:
        return NotImplemented

    if not isinstance(result, Index):
        # Index.__new__ will choose appropriate subclass for dtype
        result = Index(result)

    res_name = ops.get_op_result_name(self, other)
    result.name = res_name
    return result
Exemplo n.º 6
0
        def __add__(self, other):
            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._add_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._add_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.add)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                return self._add_datelike(other)
            elif is_integer_dtype(other):
                result = self._addsub_int_array(other, operator.add)
            elif is_float_dtype(other) or is_period_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot add {dtype}-dtype to {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            elif is_categorical_dtype(other):
                # Categorical op will raise; defer explicitly
                return NotImplemented
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Exemplo n.º 7
0
    def union(self, other):
        """
        Specialized union for TimedeltaIndex objects. If combine
        overlapping ranges with the same DateOffset, will be much
        faster than Index.union

        Parameters
        ----------
        other : TimedeltaIndex or array-like

        Returns
        -------
        y : Index or TimedeltaIndex
        """
        self._assert_can_do_setop(other)

        if len(other) == 0 or self.equals(other) or len(self) == 0:
            return super(TimedeltaIndex, self).union(other)

        if not isinstance(other, TimedeltaIndex):
            try:
                other = TimedeltaIndex(other)
            except (TypeError, ValueError):
                pass
        this, other = self, other

        if this._can_fast_union(other):
            return this._fast_union(other)
        else:
            result = Index.union(this, other)
            if isinstance(result, TimedeltaIndex):
                if result.freq is None:
                    result.freq = to_offset(result.inferred_freq)
            return result
Exemplo n.º 8
0
def _union_indexes(indexes, sort=True):
    if len(indexes) == 0:
        raise AssertionError('Must have at least 1 Index to union')
    if len(indexes) == 1:
        result = indexes[0]
        if isinstance(result, list):
            result = Index(sorted(result))
        return result

    indexes, kind = _sanitize_and_check(indexes)

    def _unique_indices(inds):
        def conv(i):
            if isinstance(i, Index):
                i = i.tolist()
            return i

        return Index(
            lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))

    if kind == 'special':
        result = indexes[0]

        if hasattr(result, 'union_many'):
            return result.union_many(indexes[1:])
        else:
            for other in indexes[1:]:
                result = result.union(other)
            return result
    elif kind == 'array':
        index = indexes[0]
        for other in indexes[1:]:
            if not index.equals(other):

                if sort is None:
                    # TODO: remove once pd.concat sort default changes
                    warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
                    sort = True

                return _unique_indices(indexes)

        name = _get_consensus_names(indexes)[0]
        if name != index.name:
            index = index._shallow_copy(name=name)
        return index
    else:  # kind='list'
        return _unique_indices(indexes)
Exemplo n.º 9
0
def wrap_arithmetic_op(self, other, result):
    if result is NotImplemented:
        return NotImplemented

    if isinstance(result, tuple):
        # divmod, rdivmod
        assert len(result) == 2
        return (wrap_arithmetic_op(self, other, result[0]),
                wrap_arithmetic_op(self, other, result[1]))

    if not isinstance(result, Index):
        # Index.__new__ will choose appropriate subclass for dtype
        result = Index(result)

    res_name = ops.get_op_result_name(self, other)
    result.name = res_name
    return result
Exemplo n.º 10
0
        def __add__(self, other):
            from pandas import DateOffset

            other = lib.item_from_zerodim(other)
            if isinstance(other, ABCSeries):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._add_nat()
            elif isinstance(other, (DateOffset, timedelta, np.timedelta64)):
                result = self._add_delta(other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._add_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.add)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                return self._add_datelike(other)
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Exemplo n.º 11
0
    def join(self, other, how='left', level=None, return_indexers=False):
        """
        See Index.join
        """
        if _is_convertible_to_index(other):
            try:
                other = TimedeltaIndex(other)
            except (TypeError, ValueError):
                pass

        return Index.join(self, other, how=how, level=level,
                          return_indexers=return_indexers)
Exemplo n.º 12
0
def _get_combined_index(indexes, intersect=False, sort=False):
    # TODO: handle index names!
    indexes = _get_distinct_objs(indexes)
    if len(indexes) == 0:
        index = Index([])
    elif len(indexes) == 1:
        index = indexes[0]
    elif intersect:
        index = indexes[0]
        for other in indexes[1:]:
            index = index.intersection(other)
    else:
        index = _union_indexes(indexes, sort=sort)
        index = ensure_index(index)

    if sort:
        try:
            index = index.sort_values()
        except TypeError:
            pass
    return index
Exemplo n.º 13
0
    def get_loc(self, key, method=None, tolerance=None):
        """
        Get integer location for requested label

        Returns
        -------
        loc : int
        """
        if is_list_like(key) or (isinstance(key, datetime) and key is not NaT):
            # GH#20464 datetime check here is to ensure we don't allow
            #   datetime objects to be incorrectly treated as timedelta
            #   objects; NaT is a special case because it plays a double role
            #   as Not-A-Timedelta
            raise TypeError

        if isna(key):
            key = NaT

        if tolerance is not None:
            # try converting tolerance now, so errors don't get swallowed by
            # the try/except clauses below
            tolerance = self._convert_tolerance(tolerance, np.asarray(key))

        if _is_convertible_to_td(key):
            key = Timedelta(key)
            return Index.get_loc(self, key, method, tolerance)

        try:
            return Index.get_loc(self, key, method, tolerance)
        except (KeyError, ValueError, TypeError):
            try:
                return self._get_string_slice(key)
            except (TypeError, KeyError, ValueError):
                pass

            try:
                stamp = Timedelta(key)
                return Index.get_loc(self, stamp, method, tolerance)
            except (KeyError, ValueError):
                raise KeyError(key)
Exemplo n.º 14
0
def _union_indexes(indexes):
    if len(indexes) == 0:
        raise AssertionError('Must have at least 1 Index to union')
    if len(indexes) == 1:
        result = indexes[0]
        if isinstance(result, list):
            result = Index(sorted(result))
        return result

    indexes, kind = _sanitize_and_check(indexes)

    def _unique_indices(inds):
        def conv(i):
            if isinstance(i, Index):
                i = i.tolist()
            return i

        return Index(lib.fast_unique_multiple_list([conv(i) for i in inds]))

    if kind == 'special':
        result = indexes[0]

        if hasattr(result, 'union_many'):
            return result.union_many(indexes[1:])
        else:
            for other in indexes[1:]:
                result = result.union(other)
            return result
    elif kind == 'array':
        index = indexes[0]
        for other in indexes[1:]:
            if not index.equals(other):
                return _unique_indices(indexes)

        name = _get_consensus_names(indexes)[0]
        if name != index.name:
            index = index._shallow_copy(name=name)
        return index
    else:
        return _unique_indices(indexes)
Exemplo n.º 15
0
def _get_combined_index(indexes, intersect=False, sort=False):
    """
    Return the union or intersection of indexes.

    Parameters
    ----------
    indexes : list of Index or list objects
        When intersect=True, do not accept list of lists.
    intersect : bool, default False
        If True, calculate the intersection between indexes. Otherwise,
        calculate the union.
    sort : bool, default False
        Whether the result index should come out sorted or not.

    Returns
    -------
    Index
    """

    # TODO: handle index names!
    indexes = _get_distinct_objs(indexes)
    if len(indexes) == 0:
        index = Index([])
    elif len(indexes) == 1:
        index = indexes[0]
    elif intersect:
        index = indexes[0]
        for other in indexes[1:]:
            index = index.intersection(other)
    else:
        index = _union_indexes(indexes, sort=sort)
        index = ensure_index(index)

    if sort:
        try:
            index = index.sort_values()
        except TypeError:
            pass
    return index
Exemplo n.º 16
0
    def get_loc(self, key, method=None, tolerance=None):
        """
        Get integer location for requested label

        Returns
        -------
        loc : int
        """

        if is_bool_indexer(key):
            raise TypeError

        if isnull(key):
            key = NaT

        if tolerance is not None:
            # try converting tolerance now, so errors don't get swallowed by
            # the try/except clauses below
            tolerance = self._convert_tolerance(tolerance)

        if _is_convertible_to_td(key):
            key = Timedelta(key)
            return Index.get_loc(self, key, method, tolerance)

        try:
            return Index.get_loc(self, key, method, tolerance)
        except (KeyError, ValueError, TypeError):
            try:
                return self._get_string_slice(key)
            except (TypeError, KeyError, ValueError):
                pass

            try:
                stamp = Timedelta(key)
                return Index.get_loc(self, stamp, method, tolerance)
            except (KeyError, ValueError):
                raise KeyError(key)
Exemplo n.º 17
0
    def intersection(self, other):
        """
        Specialized intersection for TimedeltaIndex objects. May be much faster
        than Index.intersection

        Parameters
        ----------
        other : TimedeltaIndex or array-like

        Returns
        -------
        y : Index or TimedeltaIndex
        """
        self._assert_can_do_setop(other)

        if self.equals(other):
            return self._get_reconciled_name_object(other)

        if not isinstance(other, TimedeltaIndex):
            try:
                other = TimedeltaIndex(other)
            except (TypeError, ValueError):
                pass
            result = Index.intersection(self, other)
            return result

        if len(self) == 0:
            return self
        if len(other) == 0:
            return other
        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            return type(self)(data=[])
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left.values[lslice]
            return self._shallow_copy(left_chunk)
Exemplo n.º 18
0
    def _union(self, other, sort):
        if len(other) == 0 or self.equals(other) or len(self) == 0:
            return super()._union(other, sort=sort)

        if not isinstance(other, TimedeltaIndex):
            try:
                other = TimedeltaIndex(other)
            except (TypeError, ValueError):
                pass
        this, other = self, other

        if this._can_fast_union(other):
            return this._fast_union(other)
        else:
            result = Index._union(this, other, sort=sort)
            if isinstance(result, TimedeltaIndex):
                if result.freq is None:
                    result.freq = to_offset(result.inferred_freq)
            return result
Exemplo n.º 19
0
    def get_value(self, series, key):
        """
        Fast lookup of value from 1-dimensional ndarray. Only use this if you
        know what you're doing
        """

        if _is_convertible_to_td(key):
            key = Timedelta(key)
            return self.get_value_maybe_box(series, key)

        try:
            return com.maybe_box(self, Index.get_value(self, series, key),
                                 series, key)
        except KeyError:
            try:
                loc = self._get_string_slice(key)
                return series[loc]
            except (TypeError, ValueError, KeyError):
                pass

            try:
                return self.get_value_maybe_box(series, key)
            except (TypeError, ValueError, KeyError):
                raise KeyError(key)
Exemplo n.º 20
0
 def to_tuples(self, na_tuple=True):
     tuples = self._data.to_tuples(na_tuple=na_tuple)
     return Index(tuples)
Exemplo n.º 21
0
 def is_monotonic_decreasing(self):
     return Index(self.codes).is_monotonic_decreasing
Exemplo n.º 22
0
 def astype(self, dtype, copy: bool = True):
     with rewrite_exception("IntervalArray", type(self).__name__):
         new_values = self._values.astype(dtype, copy=copy)
     if is_interval_dtype(new_values.dtype):
         return self._shallow_copy(new_values)
     return Index.astype(self, dtype, copy=copy)
Exemplo n.º 23
0
    def reindex(self,
                target,
                method=None,
                level=None,
                limit=None,
                tolerance=None) -> tuple[Index, np.ndarray | None]:
        """
        Create index with target's values (move/add/delete values as necessary)

        Returns
        -------
        new_index : pd.Index
            Resulting index
        indexer : np.ndarray[np.intp] or None
            Indices of output values in original index

        """
        if method is not None:
            raise NotImplementedError(
                "argument method is not implemented for CategoricalIndex.reindex"
            )
        if level is not None:
            raise NotImplementedError(
                "argument level is not implemented for CategoricalIndex.reindex"
            )
        if limit is not None:
            raise NotImplementedError(
                "argument limit is not implemented for CategoricalIndex.reindex"
            )

        target = ibase.ensure_index(target)

        if self.equals(target):
            indexer = None
            missing = np.array([], dtype=np.intp)
        else:
            indexer, missing = self.get_indexer_non_unique(np.array(target))

        if len(self.codes) and indexer is not None:
            new_target = self.take(indexer)
        else:
            new_target = target

        # filling in missing if needed
        if len(missing):
            cats = self.categories.get_indexer(target)

            if not isinstance(cats, CategoricalIndex) or (cats == -1).any():
                # coerce to a regular index here!
                result = Index(np.array(self), name=self.name)
                new_target, indexer, _ = result._reindex_non_unique(target)
            else:

                codes = new_target.codes.copy()
                codes[indexer == -1] = cats[missing]
                cat = self._data._from_backing_data(codes)
                new_target = type(self)._simple_new(cat, name=self.name)

        # we always want to return an Index type here
        # to be consistent with .reindex for other index types (e.g. they don't
        # coerce based on the actual values, only on the dtype)
        # unless we had an initial Categorical to begin with
        # in which case we are going to conform to the passed Categorical
        new_target = np.asarray(new_target)
        if is_categorical_dtype(target):
            cat = Categorical(new_target, dtype=target.dtype)
            new_target = type(self)._simple_new(cat, name=self.name)
        else:
            new_target = Index(new_target, name=self.name)

        return new_target, indexer
Exemplo n.º 24
0
 def get_duplicates(self):
     values = Index.get_duplicates(self)
     return self._simple_new(values)
Exemplo n.º 25
0
 def strftime(self, date_format) -> Index:
     arr = self._data.strftime(date_format)
     return Index(arr, name=self.name)
Exemplo n.º 26
0
    def slice_indexer(self, start=None, end=None, step=None, kind=None):
        """
        Return indexer for specified label slice.
        Index.slice_indexer, customized to handle time slicing.

        In addition to functionality provided by Index.slice_indexer, does the
        following:

        - if both `start` and `end` are instances of `datetime.time`, it
          invokes `indexer_between_time`
        - if `start` and `end` are both either string or None perform
          value-based selection in non-monotonic cases.

        """
        # For historical reasons DatetimeIndex supports slices between two
        # instances of datetime.time as if it were applying a slice mask to
        # an array of (self.hour, self.minute, self.seconds, self.microsecond).
        if isinstance(start, time) and isinstance(end, time):
            if step is not None and step != 1:
                raise ValueError("Must have step size of 1 with time slices")
            return self.indexer_between_time(start, end)

        if isinstance(start, time) or isinstance(end, time):
            raise KeyError("Cannot mix time and non-time slice keys")

        # Pandas supports slicing with dates, treated as datetimes at midnight.
        # https://github.com/pandas-dev/pandas/issues/31501
        if isinstance(start, date) and not isinstance(start, datetime):
            start = datetime.combine(start, time(0, 0))
        if isinstance(end, date) and not isinstance(end, datetime):
            end = datetime.combine(end, time(0, 0))

        try:
            return Index.slice_indexer(self, start, end, step, kind=kind)
        except KeyError:
            # For historical reasons DatetimeIndex by default supports
            # value-based partial (aka string) slices on non-monotonic arrays,
            # let's try that.
            if (start is None or isinstance(
                    start, str)) and (end is None or isinstance(end, str)):
                mask = np.array(True)
                deprecation_mask = np.array(True)
                if start is not None:
                    start_casted = self._maybe_cast_slice_bound(
                        start, "left", kind)
                    mask = start_casted <= self
                    deprecation_mask = start_casted == self

                if end is not None:
                    end_casted = self._maybe_cast_slice_bound(
                        end, "right", kind)
                    mask = (self <= end_casted) & mask
                    deprecation_mask = (end_casted == self) | deprecation_mask

                if not deprecation_mask.any():
                    warnings.warn(
                        "Value based partial slicing on non-monotonic DatetimeIndexes "
                        "with non-existing keys is deprecated and will raise a "
                        "KeyError in a future Version.",
                        FutureWarning,
                        stacklevel=5,
                    )
                indexer = mask.nonzero()[0][::step]
                if len(indexer) == len(self):
                    return slice(None)
                else:
                    return indexer
            else:
                raise
Exemplo n.º 27
0
def _convert_listlike_datetimes(
    arg,
    format: Optional[str],
    name: Label = None,
    tz: Optional[Timezone] = None,
    unit: Optional[str] = None,
    errors: Optional[str] = None,
    infer_datetime_format: Optional[bool] = None,
    dayfirst: Optional[bool] = None,
    yearfirst: Optional[bool] = None,
    exact: Optional[bool] = None,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, "_values", arg)

        # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
        # because it expects an ndarray argument
        if isinstance(arg, IntegerArray):
            result = arg.astype(f"datetime64[{unit}]")
            tz_parsed = None
        else:

            result, tz_parsed = tslib.array_with_unit_to_datetime(
                arg, unit, errors=errors)

        if errors == "ignore":

            result = Index(result, name=name)
        else:
            result = DatetimeIndex(result, name=name)
        # GH 23758: We may still need to localize the result with tz
        # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
        # result will be naive but in UTC
        try:
            result = result.tz_localize("UTC").tz_convert(tz_parsed)
        except AttributeError:
            # Regular Index from 'ignore' path
            return result
        if tz is not None:
            if result.tz is None:
                result = result.tz_localize(tz)
            else:
                result = result.tz_convert(tz)
        return result
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(result, name=name)
        elif errors == "ignore":
            result = Index(arg, name=name)
            return result
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == "%Y%m%d":
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError,
                        tslibs.OutOfBoundsDatetime) as err:
                    raise ValueError(
                        "cannot convert the input to '%Y%m%d' date format"
                    ) from err

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if "%Z" in format or "%z" in format:
                        return _return_parsed_timezone_results(
                            result, timezones, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == "raise":
                        raise
                    elif errors == "coerce":
                        result = np.empty(arg.shape, dtype="M8[ns]")
                        iresult = result.view("i8")
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == "raise":
                            raise
                        elif errors == "coerce":
                            result = np.empty(arg.shape, dtype="M8[ns]")
                            iresult = result.view("i8")
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
                return DatetimeIndex._simple_new(dta, name=name)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
Exemplo n.º 28
0
 def f(self):
     result = fget(self)
     if is_bool_dtype(result):
         # return numpy array b/c there is no BoolIndex
         return result
     return Index(result, name=self.name)
Exemplo n.º 29
0
 def get_duplicates(self):
     values = Index.get_duplicates(self)
     return self._simple_new(values)
Exemplo n.º 30
0
    def reindex(self,
                target,
                method=None,
                level=None,
                limit=None,
                tolerance=None):
        """
        Create index with target's values (move/add/delete values as necessary)

        Returns
        -------
        new_index : pd.Index
            Resulting index
        indexer : np.ndarray or None
            Indices of output values in original index

        """

        if method is not None:
            raise NotImplementedError("argument method is not implemented for "
                                      "CategoricalIndex.reindex")
        if level is not None:
            raise NotImplementedError("argument level is not implemented for "
                                      "CategoricalIndex.reindex")
        if limit is not None:
            raise NotImplementedError("argument limit is not implemented for "
                                      "CategoricalIndex.reindex")

        target = ibase._ensure_index(target)

        if not is_categorical_dtype(target) and not target.is_unique:
            raise ValueError("cannot reindex with a non-unique indexer")

        indexer, missing = self.get_indexer_non_unique(np.array(target))
        new_target = self.take(indexer)

        # filling in missing if needed
        if len(missing):
            cats = self.categories.get_indexer(target)

            if (cats == -1).any():
                # coerce to a regular index here!
                result = Index(np.array(self), name=self.name)
                new_target, indexer, _ = result._reindex_non_unique(
                    np.array(target))

            else:

                codes = new_target.codes.copy()
                codes[indexer == -1] = cats[missing]
                new_target = self._create_from_codes(codes)

        # we always want to return an Index type here
        # to be consistent with .reindex for other index types (e.g. they don't
        # coerce based on the actual values, only on the dtype)
        # unless we had an inital Categorical to begin with
        # in which case we are going to conform to the passed Categorical
        new_target = np.asarray(new_target)
        if is_categorical_dtype(target):
            new_target = target._shallow_copy(new_target, name=self.name)
        else:
            new_target = Index(new_target, name=self.name)

        return new_target, indexer
Exemplo n.º 31
0
    def intersection(self, other, sort=False):
        """
        Specialized intersection for DatetimeIndex/TimedeltaIndex.

        May be much faster than Index.intersection

        Parameters
        ----------
        other : Same type as self or array-like
        sort : False or None, default False
            Sort the resulting index if possible.

            .. versionadded:: 0.24.0

            .. versionchanged:: 0.24.1

               Changed the default to ``False`` to match the behaviour
               from before 0.24.0.

            .. versionchanged:: 0.25.0

               The `sort` keyword is added

        Returns
        -------
        y : Index or same type as self
        """
        self._validate_sort_keyword(sort)
        self._assert_can_do_setop(other)
        res_name = get_op_result_name(self, other)

        if self.equals(other):
            return self._get_reconciled_name_object(other)

        if len(self) == 0:
            return self.copy()
        if len(other) == 0:
            return other.copy()

        if not isinstance(other, type(self)):
            result = Index.intersection(self, other, sort=sort)
            if isinstance(result, type(self)):
                if result.freq is None:
                    # TODO: no tests rely on this; needed?
                    result = result._with_freq("infer")
            assert result.name == res_name
            return result

        elif not self._can_fast_intersect(other):
            result = Index.intersection(self, other, sort=sort)
            assert result.name == res_name
            # We need to invalidate the freq because Index.intersection
            #  uses _shallow_copy on a view of self._data, which will preserve
            #  self.freq if we're not careful.
            result = result._with_freq(None)._with_freq("infer")
            return result

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        # after sorting, the intersection always starts with the right index
        # and ends with the index of which the last elements is smallest
        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            return type(self)(data=[],
                              dtype=self.dtype,
                              freq=self.freq,
                              name=res_name)
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left._values[lslice]
            return type(self)._simple_new(left_chunk, name=res_name)
Exemplo n.º 32
0
 def _box_values_as_index(self):
     """
     return object Index which contains boxed values
     """
     from pandas.core.index import Index
     return Index(self._box_values(self.asi8), name=self.name, dtype=object)
Exemplo n.º 33
0
 def strftime(self, date_format):
     return Index(self.format(date_format=date_format),
                  dtype=compat.text_type)
Exemplo n.º 34
0
 def total_seconds(self):
     result = TimedeltaArrayMixin.total_seconds(self)
     return Index(result, name=self.name)
Exemplo n.º 35
0
 def left(self) -> Index:
     return Index(self._data.left, copy=False)
Exemplo n.º 36
0
    def map(self, mapper):
        """
        Map values using input correspondence (a dict, Series, or function).

        Maps the values (their categories, not the codes) of the index to new
        categories. If the mapping correspondence is one-to-one the result is a
        :class:`~pandas.CategoricalIndex` which has the same order property as
        the original, otherwise an :class:`~pandas.Index` is returned.

        If a `dict` or :class:`~pandas.Series` is used any unmapped category is
        mapped to `NaN`. Note that if this happens an :class:`~pandas.Index`
        will be returned.

        Parameters
        ----------
        mapper : function, dict, or Series
            Mapping correspondence.

        Returns
        -------
        pandas.CategoricalIndex or pandas.Index
            Mapped index.

        See Also
        --------
        Index.map : Apply a mapping correspondence on an
            :class:`~pandas.Index`.
        Series.map : Apply a mapping correspondence on a
            :class:`~pandas.Series`.
        Series.apply : Apply more complex functions on a
            :class:`~pandas.Series`.

        Examples
        --------
        >>> idx = pd.CategoricalIndex(['a', 'b', 'c'])
        >>> idx
        CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
                          ordered=False, dtype='category')
        >>> idx.map(lambda x: x.upper())
        CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'],
                         ordered=False, dtype='category')
        >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'})
        CategoricalIndex(['first', 'second', 'third'], categories=['first',
                         'second', 'third'], ordered=False, dtype='category')

        If the mapping is one-to-one the ordering of the categories is
        preserved:

        >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True)
        >>> idx
        CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'],
                         ordered=True, dtype='category')
        >>> idx.map({'a': 3, 'b': 2, 'c': 1})
        CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True,
                         dtype='category')

        If the mapping is not one-to-one an :class:`~pandas.Index` is returned:

        >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'})
        Index(['first', 'second', 'first'], dtype='object')

        If a `dict` is used, all unmapped categories are mapped to `NaN` and
        the result is an :class:`~pandas.Index`:

        >>> idx.map({'a': 'first', 'b': 'second'})
        Index(['first', 'second', nan], dtype='object')
        """
        mapped = self._values.map(mapper)
        return Index(mapped, name=self.name)
Exemplo n.º 37
0
 def right(self) -> Index:
     return Index(self._data.right, copy=False)
Exemplo n.º 38
0
 def _delegate_method(self, name, *args, **kwargs):
     result = operator.methodcaller(name, *args, **kwargs)(self._data)
     if name not in self._raw_methods:
         result = Index(result, name=self.name)
     return result
Exemplo n.º 39
0
 def mid(self) -> Index:
     return Index(self._data.mid, copy=False)
Exemplo n.º 40
0
def _union_indexes(indexes, sort=True):
    """
    Return the union of indexes.

    The behavior of sort and names is not consistent.

    Parameters
    ----------
    indexes : list of Index or list objects
    sort : bool, default True
        Whether the result index should come out sorted or not.

    Returns
    -------
    Index
    """
    if len(indexes) == 0:
        raise AssertionError('Must have at least 1 Index to union')
    if len(indexes) == 1:
        result = indexes[0]
        if isinstance(result, list):
            result = Index(sorted(result))
        return result

    indexes, kind = _sanitize_and_check(indexes)

    def _unique_indices(inds):
        """
        Convert indexes to lists and concatenate them, removing duplicates.

        The final dtype is inferred.

        Parameters
        ----------
        inds : list of Index or list objects

        Returns
        -------
        Index
        """
        def conv(i):
            if isinstance(i, Index):
                i = i.tolist()
            return i

        return Index(
            lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))

    if kind == 'special':
        result = indexes[0]

        if hasattr(result, 'union_many'):
            return result.union_many(indexes[1:])
        else:
            for other in indexes[1:]:
                result = result.union(other)
            return result
    elif kind == 'array':
        index = indexes[0]
        for other in indexes[1:]:
            if not index.equals(other):

                if sort is None:
                    # TODO: remove once pd.concat sort default changes
                    warnings.warn(_sort_msg, FutureWarning, stacklevel=8)
                    sort = True

                return _unique_indices(indexes)

        name = _get_consensus_names(indexes)[0]
        if name != index.name:
            index = index._shallow_copy(name=name)
        return index
    else:  # kind='list'
        return _unique_indices(indexes)
Exemplo n.º 41
0
 def length(self) -> Index:
     return Index(self._data.length, copy=False)
Exemplo n.º 42
0
 def _delegate_property_get(self, name, *args, **kwargs):
     result = getattr(self._data, name)
     if name not in self._raw_properties:
         result = Index(result, name=self.name)
     return result
Exemplo n.º 43
0
def _convert_listlike_datetimes(
    arg,
    format: str | None,
    name: Hashable = None,
    tz: Timezone | None = None,
    unit: str | None = None,
    errors: str = "raise",
    infer_datetime_format: bool = False,
    dayfirst: bool | None = None,
    yearfirst: bool | None = None,
    exact: bool = True,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : str
        None or string of the frequency of the passed data
    errors : str
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : bool, default False
        inferring format behavior from to_datetime
    dayfirst : bool
        dayfirst parsing behavior from to_datetime
    yearfirst : bool
        yearfirst parsing behavior from to_datetime
    exact : bool, default True
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """
    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        return _to_datetime_with_unit(arg, unit, name, tz, errors)
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg,
                                     copy=False,
                                     tz=timezones.maybe_get_tz(tz))
    except TypeError:
        if errors == "coerce":
            npvalues = np.array(["NaT"],
                                dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(npvalues, name=name)
        elif errors == "ignore":
            idx = Index(arg, name=name)
            return idx
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    if format is not None:
        res = _to_datetime_with_format(arg, orig_arg, name, tz, format, exact,
                                       errors, infer_datetime_format)
        if res is not None:
            return res

    assert format is None or infer_datetime_format
    utc = tz == "utc"
    result, tz_parsed = objects_to_datetime64ns(
        arg,
        dayfirst=dayfirst,
        yearfirst=yearfirst,
        utc=utc,
        errors=errors,
        require_iso8601=require_iso8601,
        allow_object=True,
    )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
Exemplo n.º 44
0
 def to_tuples(self):
     return Index(com._asarray_tuplesafe(zip(self.left, self.right)))
Exemplo n.º 45
0
 def f(self):
     result = fget(self)
     return Index(result, name=self.name)
Exemplo n.º 46
0
 def wrapper(self, other):
     result = getattr(TimedeltaArrayMixin, opname)(self, other)
     if is_bool_dtype(result):
         # support of bool dtype indexers
         return result
     return Index(result)
Exemplo n.º 47
0
        def __sub__(self, other):
            from pandas import Index

            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._sub_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(-other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(-other)
            elif isinstance(other, Period):
                result = self._sub_period(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.sub)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                result = self._sub_datelike(other)
            elif is_period_dtype(other):
                # PeriodIndex
                result = self._sub_period_array(other)
            elif is_integer_dtype(other):
                result = self._addsub_int_array(other, operator.sub)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {cls} and {typ}"
                                .format(cls=type(self).__name__,
                                        typ=type(other).__name__))
            elif is_float_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            elif is_categorical_dtype(other):
                # Categorical op will raise; defer explicitly
                return NotImplemented
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Exemplo n.º 48
0
def melt(
    frame: DataFrame,
    id_vars=None,
    value_vars=None,
    var_name=None,
    value_name="value",
    col_level=None,
) -> DataFrame:
    # TODO: what about the existing index?
    # If multiindex, gather names of columns on all level for checking presence
    # of `id_vars` and `value_vars`
    if isinstance(frame.columns, ABCMultiIndex):
        cols = [x for c in frame.columns for x in c]
    else:
        cols = list(frame.columns)

    if id_vars is not None:
        if not is_list_like(id_vars):
            id_vars = [id_vars]
        elif isinstance(frame.columns,
                        ABCMultiIndex) and not isinstance(id_vars, list):
            raise ValueError(
                "id_vars must be a list of tuples when columns are a MultiIndex"
            )
        else:
            # Check that `id_vars` are in frame
            id_vars = list(id_vars)
            missing = Index(com.flatten(id_vars)).difference(cols)
            if not missing.empty:
                raise KeyError("The following 'id_vars' are not present "
                               "in the DataFrame: {missing}"
                               "".format(missing=list(missing)))
    else:
        id_vars = []

    if value_vars is not None:
        if not is_list_like(value_vars):
            value_vars = [value_vars]
        elif isinstance(frame.columns,
                        ABCMultiIndex) and not isinstance(value_vars, list):
            raise ValueError(
                "value_vars must be a list of tuples when columns are a MultiIndex"
            )
        else:
            value_vars = list(value_vars)
            # Check that `value_vars` are in frame
            missing = Index(com.flatten(value_vars)).difference(cols)
            if not missing.empty:
                raise KeyError("The following 'value_vars' are not present in "
                               "the DataFrame: {missing}"
                               "".format(missing=list(missing)))
        frame = frame.loc[:, id_vars + value_vars]
    else:
        frame = frame.copy()

    if col_level is not None:  # allow list or other?
        # frame is a copy
        frame.columns = frame.columns.get_level_values(col_level)

    if var_name is None:
        if isinstance(frame.columns, ABCMultiIndex):
            if len(frame.columns.names) == len(set(frame.columns.names)):
                var_name = frame.columns.names
            else:
                var_name = [
                    "variable_{i}".format(i=i)
                    for i in range(len(frame.columns.names))
                ]
        else:
            var_name = [
                frame.columns.name
                if frame.columns.name is not None else "variable"
            ]
    if isinstance(var_name, str):
        var_name = [var_name]

    N, K = frame.shape
    K -= len(id_vars)

    mdata = {}
    for col in id_vars:
        id_data = frame.pop(col)
        if is_extension_array_dtype(id_data):
            id_data = concat([id_data] * K, ignore_index=True)
        else:
            id_data = np.tile(id_data.values, K)
        mdata[col] = id_data

    mcolumns = id_vars + var_name + [value_name]

    mdata[value_name] = frame.values.ravel("F")
    for i, col in enumerate(var_name):
        # asanyarray will keep the columns as an Index
        mdata[col] = np.asanyarray(
            frame.columns._get_level_values(i)).repeat(N)

    return frame._constructor(mdata, columns=mcolumns)
Exemplo n.º 49
0
    def reindex(self,
                target,
                method=None,
                level=None,
                limit=None,
                tolerance=None) -> tuple[Index, npt.NDArray[np.intp] | None]:
        """
        Create index with target's values (move/add/delete values as necessary)

        Returns
        -------
        new_index : pd.Index
            Resulting index
        indexer : np.ndarray[np.intp] or None
            Indices of output values in original index

        """
        if method is not None:
            raise NotImplementedError(
                "argument method is not implemented for CategoricalIndex.reindex"
            )
        if level is not None:
            raise NotImplementedError(
                "argument level is not implemented for CategoricalIndex.reindex"
            )
        if limit is not None:
            raise NotImplementedError(
                "argument limit is not implemented for CategoricalIndex.reindex"
            )

        target = ibase.ensure_index(target)

        if self.equals(target):
            indexer = None
            missing = np.array([], dtype=np.intp)
        else:
            indexer, missing = self.get_indexer_non_unique(target)
            if not self.is_unique:
                # GH#42568
                warnings.warn(
                    "reindexing with a non-unique Index is deprecated and will "
                    "raise in a future version.",
                    FutureWarning,
                    stacklevel=find_stack_level(),
                )

        new_target: Index
        if len(self) and indexer is not None:
            new_target = self.take(indexer)
        else:
            new_target = target

        # filling in missing if needed
        if len(missing):
            cats = self.categories.get_indexer(target)

            if not isinstance(target, CategoricalIndex) or (cats == -1).any():
                new_target, indexer, _ = super()._reindex_non_unique(target)
            else:
                # error: "Index" has no attribute "codes"
                codes = new_target.codes.copy()  # type: ignore[attr-defined]
                codes[indexer == -1] = cats[missing]
                cat = self._data._from_backing_data(codes)
                new_target = type(self)._simple_new(cat, name=self.name)

        # we always want to return an Index type here
        # to be consistent with .reindex for other index types (e.g. they don't
        # coerce based on the actual values, only on the dtype)
        # unless we had an initial Categorical to begin with
        # in which case we are going to conform to the passed Categorical
        if is_categorical_dtype(target):
            cat = Categorical(new_target, dtype=target.dtype)
            new_target = type(self)._simple_new(cat, name=self.name)
        else:
            # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target
            new_target_array = np.asarray(new_target)
            new_target = Index._with_infer(new_target_array, name=self.name)

        return new_target, indexer
Exemplo n.º 50
0
 def astype(self, dtype, copy: bool = True):
     with rewrite_exception("IntervalArray", type(self).__name__):
         new_values = self._values.astype(dtype, copy=copy)
     return Index(new_values, dtype=new_values.dtype, name=self.name)
Exemplo n.º 51
0
 def astype(self, dtype, copy=True):
     res_data = self._data.astype(dtype, copy=copy)
     return Index(res_data, name=self.name)