示例#1
0
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if (is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype)
            and np.issubdtype(dtype, np.integer)):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)

    if (isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]
            and dtype is np.dtype("object")):
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = array(arr)

    if is_extension_array_dtype(dtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return array(arr, dtype=dtype, copy=False)
    return arr.astype(dtype, copy=False)
示例#2
0
def dispatch_to_extension_op(
    op, left: Union[ABCExtensionArray, np.ndarray], right: Any,
):
    """
    Assume that left or right is a Series backed by an ExtensionArray,
    apply the operator defined by op.

    Parameters
    ----------
    op : binary operator
    left : ExtensionArray or np.ndarray
    right : object

    Returns
    -------
    ExtensionArray or np.ndarray
        2-tuple of these if op is divmod or rdivmod
    """
    # NB: left and right should already be unboxed, so neither should be
    #  a Series or Index.

    if left.dtype.kind in "mM" and isinstance(left, np.ndarray):
        # We need to cast datetime64 and timedelta64 ndarrays to
        #  DatetimeArray/TimedeltaArray.  But we avoid wrapping others in
        #  PandasArray as that behaves poorly with e.g. IntegerArray.
        left = array(left)

    # The op calls will raise TypeError if the op is not defined
    # on the ExtensionArray
    res_values = op(left, right)
    return res_values
示例#3
0
def dispatch_to_extension_op(op, left, right):
    """
    Assume that left or right is a Series backed by an ExtensionArray,
    apply the operator defined by op.
    """

    if left.dtype.kind in "mM":
        # We need to cast datetime64 and timedelta64 ndarrays to
        #  DatetimeArray/TimedeltaArray.  But we avoid wrapping others in
        #  PandasArray as that behaves poorly with e.g. IntegerArray.
        left = array(left)

    # The op calls will raise TypeError if the op is not defined
    # on the ExtensionArray

    # unbox Series and Index to arrays
    new_left = extract_array(left, extract_numpy=True)
    new_right = extract_array(right, extract_numpy=True)

    try:
        res_values = op(new_left, new_right)
    except NullFrequencyError:
        # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
        # on add/sub of integers (or int-like).  We re-raise as a TypeError.
        raise TypeError(
            "incompatible type for a datetime/timedelta "
            "operation [{name}]".format(name=op.__name__)
        )
    return res_values
示例#4
0
def dispatch_to_extension_op(
    op,
    left: Union[ABCExtensionArray, np.ndarray],
    right: Any,
    keep_null_freq: bool = False,
):
    """
    Assume that left or right is a Series backed by an ExtensionArray,
    apply the operator defined by op.

    Parameters
    ----------
    op : binary operator
    left : ExtensionArray or np.ndarray
    right : object
    keep_null_freq : bool, default False
        Whether to re-raise a NullFrequencyError unchanged, as opposed to
        catching and raising TypeError.

    Returns
    -------
    ExtensionArray or np.ndarray
        2-tuple of these if op is divmod or rdivmod
    """
    # NB: left and right should already be unboxed, so neither should be
    #  a Series or Index.

    if left.dtype.kind in "mM" and isinstance(left, np.ndarray):
        # We need to cast datetime64 and timedelta64 ndarrays to
        #  DatetimeArray/TimedeltaArray.  But we avoid wrapping others in
        #  PandasArray as that behaves poorly with e.g. IntegerArray.
        left = array(left)

    # The op calls will raise TypeError if the op is not defined
    # on the ExtensionArray

    try:
        res_values = op(left, right)
    except NullFrequencyError:
        # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
        # on add/sub of integers (or int-like).  We re-raise as a TypeError.
        if keep_null_freq:
            # TODO: remove keep_null_freq after Timestamp+int deprecation
            #  GH#22535 is enforced
            raise
        raise TypeError(
            "incompatible type for a datetime/timedelta "
            "operation [{name}]".format(name=op.__name__)
        )
    return res_values
示例#5
0
    def __eq__(self, other):
        # ensure pandas array for list-like and eliminate non-interval scalars
        if is_list_like(other):
            if len(self) != len(other):
                raise ValueError("Lengths must match to compare")
            other = array(other)
        elif not isinstance(other, Interval):
            # non-interval scalar -> no matches
            return np.zeros(len(self), dtype=bool)

        # determine the dtype of the elements we want to compare
        if isinstance(other, Interval):
            other_dtype = pandas_dtype("interval")
        elif not is_categorical_dtype(other.dtype):
            other_dtype = other.dtype
        else:
            # for categorical defer to categories for dtype
            other_dtype = other.categories.dtype

            # extract intervals if we have interval categories with matching closed
            if is_interval_dtype(other_dtype):
                if self.closed != other.categories.closed:
                    return np.zeros(len(self), dtype=bool)
                other = other.categories.take(other.codes)

        # interval-like -> need same closed and matching endpoints
        if is_interval_dtype(other_dtype):
            if self.closed != other.closed:
                return np.zeros(len(self), dtype=bool)
            return (self._left == other.left) & (self._right == other.right)

        # non-interval/non-object dtype -> no matches
        if not is_object_dtype(other_dtype):
            return np.zeros(len(self), dtype=bool)

        # object dtype -> iteratively check for intervals
        result = np.zeros(len(self), dtype=bool)
        for i, obj in enumerate(other):
            # need object to be an Interval with same closed and endpoints
            if (
                isinstance(obj, Interval)
                and self.closed == obj.closed
                and self._left[i] == obj.left
                and self._right[i] == obj.right
            ):
                result[i] = True

        return result
示例#6
0
    def _cmp_method(self, other, op):
        # ensure pandas array for list-like and eliminate non-interval scalars
        if is_list_like(other):
            if len(self) != len(other):
                raise ValueError("Lengths must match to compare")
            other = array(other)
        elif not isinstance(other, Interval):
            # non-interval scalar -> no matches
            return invalid_comparison(self, other, op)

        # determine the dtype of the elements we want to compare
        if isinstance(other, Interval):
            other_dtype = pandas_dtype("interval")
        elif not is_categorical_dtype(other.dtype):
            other_dtype = other.dtype
        else:
            # for categorical defer to categories for dtype
            other_dtype = other.categories.dtype

            # extract intervals if we have interval categories with matching closed
            if is_interval_dtype(other_dtype):
                if self.closed != other.categories.closed:
                    return invalid_comparison(self, other, op)

                other = other.categories.take(
                    other.codes,
                    allow_fill=True,
                    fill_value=other.categories._na_value)

        # interval-like -> need same closed and matching endpoints
        if is_interval_dtype(other_dtype):
            if self.closed != other.closed:
                return invalid_comparison(self, other, op)
            elif not isinstance(other, Interval):
                other = type(self)(other)

            if op is operator.eq:
                return (self._left == other.left) & (self._right
                                                     == other.right)
            elif op is operator.ne:
                return (self._left != other.left) | (self._right !=
                                                     other.right)
            elif op is operator.gt:
                return (self._left > other.left) | (
                    (self._left == other.left) & (self._right > other.right))
            elif op is operator.ge:
                return (self == other) | (self > other)
            elif op is operator.lt:
                return (self._left < other.left) | (
                    (self._left == other.left) & (self._right < other.right))
            else:
                # operator.lt
                return (self == other) | (self < other)

        # non-interval/non-object dtype -> no matches
        if not is_object_dtype(other_dtype):
            return invalid_comparison(self, other, op)

        # object dtype -> iteratively check for intervals
        result = np.zeros(len(self), dtype=bool)
        for i, obj in enumerate(other):
            try:
                result[i] = op(self[i], obj)
            except TypeError:
                if obj is NA:
                    # comparison with np.nan returns NA
                    # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092
                    result[i] = op is operator.ne
                else:
                    raise
        return result