Exemplo n.º 1
0
def _get_combined_index(
    indexes: List[Index],
    intersect: bool = False,
    sort: bool = False,
    copy: bool = False,
) -> Index:
    """
    Return the union or intersection of indexes.

    Parameters
    ----------
    indexes : list of Index or list objects
        When intersect=True, do not accept list of lists.
    intersect : bool, default False
        If True, calculate the intersection between indexes. Otherwise,
        calculate the union.
    sort : bool, default False
        Whether the result index should come out sorted or not.
    copy : bool, default False
        If True, return a copy of the combined index.

    Returns
    -------
    Index
    """
    # TODO: handle index names!
    indexes = _get_distinct_objs(indexes)
    if len(indexes) == 0:
        index = Index([])
    elif len(indexes) == 1:
        index = indexes[0]
    elif intersect:
        index = indexes[0]
        for other in indexes[1:]:
            index = index.intersection(other)
    else:
        index = union_indexes(indexes, sort=sort)
        index = ensure_index(index)

    if sort:
        try:
            index = index.sort_values()
        except TypeError:
            pass

    # GH 29879
    if copy:
        index = index.copy()

    return index
Exemplo n.º 2
0
    def _intersection(self, other: Index, sort=False) -> Index:
        """
        intersection specialized to the case with matching dtypes.
        """
        if len(self) == 0:
            return self.copy()._get_reconciled_name_object(other)
        if len(other) == 0:
            return other.copy()._get_reconciled_name_object(self)

        if not isinstance(other, type(self)):
            result = Index.intersection(self, other, sort=sort)
            if isinstance(result, type(self)):
                if result.freq is None:
                    # TODO: no tests rely on this; needed?
                    result = result._with_freq("infer")
            return result

        elif not self._can_fast_intersect(other):
            result = Index._intersection(self, other, sort=sort)
            # We need to invalidate the freq because Index._intersection
            #  uses _shallow_copy on a view of self._data, which will preserve
            #  self.freq if we're not careful.
            result = self._wrap_setop_result(other, result)
            return result._with_freq(None)._with_freq("infer")

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        # after sorting, the intersection always starts with the right index
        # and ends with the index of which the last elements is smallest
        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            result = self[:0]
        else:
            lslice = slice(*left.slice_locs(start, end))
            left_chunk = left._values[lslice]
            # error: Argument 1 to "_simple_new" of "DatetimeIndexOpsMixin" has
            # incompatible type "Union[ExtensionArray, Any]"; expected
            # "Union[DatetimeArray, TimedeltaArray, PeriodArray]"  [arg-type]
            result = type(self)._simple_new(left_chunk)  # type: ignore[arg-type]

        return self._wrap_setop_result(other, result)
Exemplo n.º 3
0
    def _intersection(self, other: Index, sort=False) -> Index:
        """
        intersection specialized to the case with matching dtypes.
        """
        other = cast("DatetimeTimedeltaMixin", other)
        if len(self) == 0:
            return self.copy()._get_reconciled_name_object(other)
        if len(other) == 0:
            return other.copy()._get_reconciled_name_object(self)

        elif not self._can_fast_intersect(other):
            result = Index._intersection(self, other, sort=sort)
            # We need to invalidate the freq because Index._intersection
            #  uses _shallow_copy on a view of self._data, which will preserve
            #  self.freq if we're not careful.
            # At this point we should have result.dtype == self.dtype
            #  and type(result) is type(self._data)
            result = self._wrap_setop_result(other, result)
            return result._with_freq(None)._with_freq("infer")

        # to make our life easier, "sort" the two ranges
        if self[0] <= other[0]:
            left, right = self, other
        else:
            left, right = other, self

        # after sorting, the intersection always starts with the right index
        # and ends with the index of which the last elements is smallest
        end = min(left[-1], right[-1])
        start = right[0]

        if end < start:
            result = self[:0]
        else:
            lslice = slice(*left.slice_locs(start, end))
            result = left._values[lslice]

        return result