Exemplo n.º 1
0
def quantile_ea_compat(values: ExtensionArray, qs, interpolation: str,
                       axis: int) -> ExtensionArray:
    """
    ExtensionArray compatibility layer for quantile_with_mask.

    We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
    for compatibility with non-EA code.

    Parameters
    ----------
    values : ExtensionArray
    qs : a scalar or list of the quantiles to be computed
    interpolation: str
    axis : int

    Returns
    -------
    ExtensionArray
    """
    # TODO(EA2D): make-believe not needed with 2D EAs
    orig = values

    # asarray needed for Sparse, see GH#24600
    mask = np.asarray(values.isna())
    mask = np.atleast_2d(mask)

    # error: Incompatible types in assignment (expression has type "ndarray", variable
    # has type "ExtensionArray")
    values, fill_value = values._values_for_factorize(
    )  # type: ignore[assignment]
    # error: No overload variant of "atleast_2d" matches argument type "ExtensionArray"
    values = np.atleast_2d(values)  # type: ignore[call-overload]

    # error: Argument 1 to "quantile_with_mask" has incompatible type "ExtensionArray";
    # expected "ndarray"
    result = quantile_with_mask(
        values,
        mask,
        fill_value,
        qs,
        interpolation,
        axis  # type: ignore[arg-type]
    )

    if not is_sparse(orig.dtype):
        # shape[0] should be 1 as long as EAs are 1D

        if result.ndim == 1:
            # i.e. qs was originally a scalar
            assert result.shape == (1, ), result.shape
            result = type(orig)._from_factorized(result, orig)

        else:
            assert result.shape == (1, len(qs)), result.shape
            result = type(orig)._from_factorized(result[0], orig)

    # error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
    return result  # type: ignore[return-value]
Exemplo n.º 2
0
    def _ea_wrap_cython_operation(
        self,
        values: ExtensionArray,
        min_count: int,
        ngroups: int,
        comp_ids: np.ndarray,
        **kwargs,
    ) -> ArrayLike:
        """
        If we have an ExtensionArray, unwrap, call _cython_operation, and
        re-wrap if appropriate.
        """
        # TODO: general case implementation overridable by EAs.
        if isinstance(values, BaseMaskedArray) and self.uses_mask():
            return self._masked_ea_wrap_cython_operation(
                values,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                **kwargs,
            )

        if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)):
            # All of the functions implemented here are ordinal, so we can
            #  operate on the tz-naive equivalents
            npvalues = values._ndarray.view("M8[ns]")
        elif isinstance(values.dtype, (BooleanDtype, IntegerDtype)):
            # IntegerArray or BooleanArray
            npvalues = values.to_numpy("float64", na_value=np.nan)
        elif isinstance(values.dtype, FloatingDtype):
            # FloatingArray
            npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan)
        elif isinstance(values.dtype, StringDtype):
            # StringArray
            npvalues = values.to_numpy(object, na_value=np.nan)
        else:
            raise NotImplementedError(
                f"function is not implemented for this dtype: {values.dtype}"
            )

        res_values = self._cython_op_ndim_compat(
            npvalues,
            min_count=min_count,
            ngroups=ngroups,
            comp_ids=comp_ids,
            mask=None,
            **kwargs,
        )

        if self.how in ["rank"]:
            # i.e. how in WrappedCythonOp.cast_blocklist, since
            #  other cast_blocklist methods dont go through cython_operation
            return res_values

        return self._reconstruct_ea_result(values, res_values)
Exemplo n.º 3
0
def _quantile_ea_fallback(values: ExtensionArray, qs: np.ndarray,
                          interpolation: str) -> ExtensionArray:
    """
    quantile compatibility for ExtensionArray subclasses that do not
    implement `_from_factorized`, e.g. IntegerArray.

    Notes
    -----
    We assume that all impacted cases are 1D-only.
    """
    mask = np.atleast_2d(np.asarray(values.isna()))
    npvalues = np.atleast_2d(np.asarray(values))

    res = _quantile_with_mask(
        npvalues,
        mask=mask,
        fill_value=values.dtype.na_value,
        qs=qs,
        interpolation=interpolation,
    )
    assert res.ndim == 2
    assert res.shape[0] == 1
    res = res[0]
    out = type(values)._from_sequence(res, dtype=values.dtype)
    return out
Exemplo n.º 4
0
Arquivo: ops.py Projeto: tnir/pandas
    def _reconstruct_ea_result(
        self, values: ExtensionArray, res_values: np.ndarray
    ) -> ExtensionArray:
        """
        Construct an ExtensionArray result from an ndarray result.
        """
        dtype: BaseMaskedDtype | StringDtype

        if isinstance(values.dtype, StringDtype):
            dtype = values.dtype
            string_array_cls = dtype.construct_array_type()
            return string_array_cls._from_sequence(res_values, dtype=dtype)

        elif isinstance(values.dtype, BaseMaskedDtype):
            new_dtype = self._get_result_dtype(values.dtype.numpy_dtype)
            dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype)
            masked_array_cls = dtype.construct_array_type()
            return masked_array_cls._from_sequence(res_values, dtype=dtype)

        elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)):
            # In to_cython_values we took a view as M8[ns]
            assert res_values.dtype == "M8[ns]"
            res_values = res_values.view(values._ndarray.dtype)
            return values._from_backing_data(res_values)

        raise NotImplementedError
Exemplo n.º 5
0
def _reorder_for_extension_array_stack(
    arr: ExtensionArray, n_rows: int, n_columns: int
) -> ExtensionArray:
    """
    Re-orders the values when stacking multiple extension-arrays.

    The indirect stacking method used for EAs requires a followup
    take to get the order correct.

    Parameters
    ----------
    arr : ExtensionArray
    n_rows, n_columns : int
        The number of rows and columns in the original DataFrame.

    Returns
    -------
    taken : ExtensionArray
        The original `arr` with elements re-ordered appropriately

    Examples
    --------
    >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f'])
    >>> _reorder_for_extension_array_stack(arr, 2, 3)
    array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1')

    >>> _reorder_for_extension_array_stack(arr, 3, 2)
    array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1')
    """
    # final take to get the order correct.
    # idx is an indexer like
    # [c0r0, c1r0, c2r0, ...,
    #  c0r1, c1r1, c2r1, ...]
    idx = np.arange(n_rows * n_columns).reshape(n_columns, n_rows).T.ravel()
    return arr.take(idx)
Exemplo n.º 6
0
def nargminmax(values: ExtensionArray, method: str, axis: int = 0):
    """
    Implementation of np.argmin/argmax but for ExtensionArray and which
    handles missing values.

    Parameters
    ----------
    values : ExtensionArray
    method : {"argmax", "argmin"}
    axis : int, default 0

    Returns
    -------
    int
    """
    assert method in {"argmax", "argmin"}
    func = np.argmax if method == "argmax" else np.argmin

    mask = np.asarray(isna(values))
    arr_values = values._values_for_argsort()

    if arr_values.ndim > 1:
        if mask.any():
            if axis == 1:
                zipped = zip(arr_values, mask)
            else:
                zipped = zip(arr_values.T, mask.T)
            return np.array([_nanargminmax(v, m, func) for v, m in zipped])
        return func(arr_values, axis=axis)

    return _nanargminmax(arr_values, mask, func)
Exemplo n.º 7
0
def _quantile_ea_fallback(values: ExtensionArray, qs: npt.NDArray[np.float64],
                          interpolation: str) -> ExtensionArray:
    """
    quantile compatibility for ExtensionArray subclasses that do not
    implement `_from_factorized`, e.g. IntegerArray.

    Notes
    -----
    We assume that all impacted cases are 1D-only.
    """
    mask = np.atleast_2d(np.asarray(values.isna()))
    npvalues = np.atleast_2d(np.asarray(values))

    res = _quantile_with_mask(
        npvalues,
        mask=mask,
        fill_value=values.dtype.na_value,
        qs=qs,
        interpolation=interpolation,
    )
    assert res.ndim == 2
    assert res.shape[0] == 1
    res = res[0]
    try:
        out = type(values)._from_sequence(res, dtype=values.dtype)
    except TypeError:
        # GH#42626: not able to safely cast Int64
        # for floating point output
        out = np.atleast_2d(np.asarray(res, dtype=np.float64))
    return out
Exemplo n.º 8
0
    def _reconstruct_ea_result(
        self, values: ExtensionArray, res_values: np.ndarray
    ) -> ExtensionArray:
        """
        Construct an ExtensionArray result from an ndarray result.
        """
        dtype: BaseMaskedDtype | StringDtype

        if isinstance(values.dtype, StringDtype):
            dtype = values.dtype
            cls = dtype.construct_array_type()
            return cls._from_sequence(res_values, dtype=dtype)

        elif isinstance(values.dtype, BaseMaskedDtype):
            new_dtype = self._get_result_dtype(values.dtype.numpy_dtype)
            dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype)
            # error: Incompatible types in assignment (expression has type
            # "Type[BaseMaskedArray]", variable has type "Type[BaseStringArray]")
            cls = dtype.construct_array_type()  # type: ignore[assignment]
            return cls._from_sequence(res_values, dtype=dtype)

        elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)):
            # In to_cython_values we took a view as M8[ns]
            assert res_values.dtype == "M8[ns]"
            res_values = res_values.view(values._ndarray.dtype)
            return values._from_backing_data(res_values)

        raise NotImplementedError
Exemplo n.º 9
0
    def shift(self, periods=1, fill_value=None):
        """
        Shift values by desired number.

        Newly introduced missing values are filled with
        ``self.dtype.na_value``.

        .. versionadded:: 0.24.0

        Parameters
        ----------
        periods : int, default 1
            The number of periods to shift. Negative values are allowed
            for shifting backwards.
        fill_value : optional, default NaT

            .. versionadded:: 0.24.0

        Returns
        -------
        shifted : PeriodArray
        """
        # TODO(DatetimeArray): remove
        # The semantics for Index.shift differ from EA.shift
        # then just call super.
        return ExtensionArray.shift(self, periods, fill_value=fill_value)
Exemplo n.º 10
0
def quantile_ea_compat(values: ExtensionArray, qs, interpolation: str,
                       axis: int) -> ExtensionArray:
    """
    ExtensionArray compatibility layer for quantile_with_mask.

    We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
    for compatibility with non-EA code.

    Parameters
    ----------
    values : ExtensionArray
    qs : a scalar or list of the quantiles to be computed
    interpolation: str
    axis : int

    Returns
    -------
    ExtensionArray
    """
    # TODO(EA2D): make-believe not needed with 2D EAs
    orig = values

    # asarray needed for Sparse, see GH#24600
    mask = np.asarray(values.isna())
    mask = np.atleast_2d(mask)

    values, fill_value = values._values_for_factorize()
    values = np.atleast_2d(values)

    result = quantile_with_mask(values, mask, fill_value, qs, interpolation,
                                axis)

    if not is_sparse(orig.dtype):
        # shape[0] should be 1 as long as EAs are 1D

        if result.ndim == 1:
            # i.e. qs was originally a scalar
            assert result.shape == (1, ), result.shape
            result = type(orig)._from_factorized(result, orig)

        else:
            assert result.shape == (1, len(qs)), result.shape
            result = type(orig)._from_factorized(result[0], orig)

    return result
Exemplo n.º 11
0
Arquivo: ops.py Projeto: tnir/pandas
    def _ea_wrap_cython_operation(
        self,
        values: ExtensionArray,
        min_count: int,
        ngroups: int,
        comp_ids: np.ndarray,
        **kwargs,
    ) -> ArrayLike:
        """
        If we have an ExtensionArray, unwrap, call _cython_operation, and
        re-wrap if appropriate.
        """
        if isinstance(values, BaseMaskedArray) and self.uses_mask():
            return self._masked_ea_wrap_cython_operation(
                values,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                **kwargs,
            )

        elif isinstance(values, Categorical) and self.uses_mask():
            assert self.how == "rank"  # the only one implemented ATM
            assert values.ordered  # checked earlier
            mask = values.isna()
            npvalues = values._ndarray

            res_values = self._cython_op_ndim_compat(
                npvalues,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=mask,
                **kwargs,
            )

            # If we ever have more than just "rank" here, we'll need to do
            #  `if self.how in self.cast_blocklist` like we do for other dtypes.
            return res_values

        npvalues = self._ea_to_cython_values(values)

        res_values = self._cython_op_ndim_compat(
            npvalues,
            min_count=min_count,
            ngroups=ngroups,
            comp_ids=comp_ids,
            mask=None,
            **kwargs,
        )

        if self.how in self.cast_blocklist:
            # i.e. how in ["rank"], since other cast_blocklist methods dont go
            #  through cython_operation
            return res_values

        return self._reconstruct_ea_result(values, res_values)
Exemplo n.º 12
0
def _quantile_ea_compat(
    values: ExtensionArray, qs: np.ndarray, interpolation: str
) -> ExtensionArray:
    """
    ExtensionArray compatibility layer for _quantile_with_mask.

    We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
    for compatibility with non-EA code.

    Parameters
    ----------
    values : ExtensionArray
    qs : np.ndarray[float64]
    interpolation: str

    Returns
    -------
    ExtensionArray
    """
    # TODO(EA2D): make-believe not needed with 2D EAs
    orig = values

    # asarray needed for Sparse, see GH#24600
    mask = np.asarray(values.isna())
    mask = np.atleast_2d(mask)

    arr, fill_value = values._values_for_factorize()
    arr = np.atleast_2d(arr)

    result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation)

    if not is_sparse(orig.dtype):
        # shape[0] should be 1 as long as EAs are 1D

        if orig.ndim == 2:
            # i.e. DatetimeArray
            result = type(orig)._from_factorized(result, orig)

        else:
            assert result.shape == (1, len(qs)), result.shape
            result = type(orig)._from_factorized(result[0], orig)

    # error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
    return result  # type: ignore[return-value]
Exemplo n.º 13
0
Arquivo: ops.py Projeto: tnir/pandas
 def _ea_to_cython_values(self, values: ExtensionArray) -> np.ndarray:
     # GH#43682
     if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)):
         # All of the functions implemented here are ordinal, so we can
         #  operate on the tz-naive equivalents
         npvalues = values._ndarray.view("M8[ns]")
     elif isinstance(values.dtype, (BooleanDtype, IntegerDtype)):
         # IntegerArray or BooleanArray
         npvalues = values.to_numpy("float64", na_value=np.nan)
     elif isinstance(values.dtype, FloatingDtype):
         # FloatingArray
         npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan)
     elif isinstance(values.dtype, StringDtype):
         # StringArray
         npvalues = values.to_numpy(object, na_value=np.nan)
     else:
         raise NotImplementedError(
             f"function is not implemented for this dtype: {values.dtype}"
         )
     return npvalues
Exemplo n.º 14
0
    def shift(self, periods=1):
        """
        Shift values by desired number.

        Newly introduced missing values are filled with
        ``self.dtype.na_value``.

        .. versionadded:: 0.24.0

        Parameters
        ----------
        periods : int, default 1
            The number of periods to shift. Negative values are allowed
            for shifting backwards.

        Returns
        -------
        shifted : PeriodArray
        """
        # TODO(DatetimeArray): remove
        # The semantics for Index.shift differ from EA.shift
        # then just call super.
        return ExtensionArray.shift(self, periods)
Exemplo n.º 15
0
    def _ea_wrap_cython_operation(
        self,
        values: ExtensionArray,
        min_count: int,
        ngroups: int,
        comp_ids: np.ndarray,
        **kwargs,
    ) -> ArrayLike:
        """
        If we have an ExtensionArray, unwrap, call _cython_operation, and
        re-wrap if appropriate.
        """
        # TODO: general case implementation overridable by EAs.
        if isinstance(values, BaseMaskedArray) and self.uses_mask():
            return self._masked_ea_wrap_cython_operation(
                values,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                **kwargs,
            )
        orig_values = values

        if isinstance(orig_values, (DatetimeArray, PeriodArray)):
            # All of the functions implemented here are ordinal, so we can
            #  operate on the tz-naive equivalents
            npvalues = orig_values._ndarray.view("M8[ns]")
            res_values = self._cython_op_ndim_compat(
                npvalues,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                # preserve float64 dtype
                return res_values

            res_values = res_values.view("i8")
            result = type(orig_values)(res_values, dtype=orig_values.dtype)
            return result

        elif isinstance(orig_values, TimedeltaArray):
            # We have an ExtensionArray but not ExtensionDtype
            res_values = self._cython_op_ndim_compat(
                orig_values._ndarray,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                # preserve float64 dtype
                return res_values

            # otherwise res_values has the same dtype as original values
            return type(orig_values)(res_values)

        elif isinstance(values.dtype, (BooleanDtype, _IntegerDtype)):
            # IntegerArray or BooleanArray
            npvalues = values.to_numpy("float64", na_value=np.nan)
            res_values = self._cython_op_ndim_compat(
                npvalues,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                return res_values

            dtype = self._get_result_dtype(orig_values.dtype)
            cls = dtype.construct_array_type()
            return cls._from_sequence(res_values, dtype=dtype)

        elif isinstance(values.dtype, FloatingDtype):
            # FloatingArray
            npvalues = values.to_numpy(
                values.dtype.numpy_dtype,
                na_value=np.nan,
            )
            res_values = self._cython_op_ndim_compat(
                npvalues,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                return res_values

            dtype = self._get_result_dtype(orig_values.dtype)
            cls = dtype.construct_array_type()
            return cls._from_sequence(res_values, dtype=dtype)

        raise NotImplementedError(
            f"function is not implemented for this dtype: {values.dtype}")
Exemplo n.º 16
0
    def _ea_wrap_cython_operation(
        self,
        values: ExtensionArray,
        min_count: int,
        ngroups: int,
        comp_ids: np.ndarray,
        **kwargs,
    ) -> ArrayLike:
        """
        If we have an ExtensionArray, unwrap, call _cython_operation, and
        re-wrap if appropriate.
        """
        # TODO: general case implementation overridable by EAs.
        orig_values = values

        if is_datetime64tz_dtype(values.dtype) or is_period_dtype(
                values.dtype):
            # All of the functions implemented here are ordinal, so we can
            #  operate on the tz-naive equivalents
            npvalues = values.view("M8[ns]")
            res_values = self._cython_op_ndim_compat(
                # error: Argument 1 to "_cython_op_ndim_compat" of
                # "WrappedCythonOp" has incompatible type
                # "Union[ExtensionArray, ndarray]"; expected "ndarray"
                npvalues,  # type: ignore[arg-type]
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                # preserve float64 dtype
                return res_values

            res_values = res_values.astype("i8", copy=False)
            # error: Too many arguments for "ExtensionArray"
            result = type(orig_values)(  # type: ignore[call-arg]
                res_values, dtype=orig_values.dtype)
            return result

        elif is_integer_dtype(values.dtype) or is_bool_dtype(values.dtype):
            # IntegerArray or BooleanArray
            npvalues = values.to_numpy("float64", na_value=np.nan)
            res_values = self._cython_op_ndim_compat(
                npvalues,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                return res_values

            dtype = self.get_result_dtype(orig_values.dtype)
            # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]"
            # has no attribute "construct_array_type"
            cls = dtype.construct_array_type()  # type: ignore[union-attr]
            return cls._from_sequence(res_values, dtype=dtype)

        elif is_float_dtype(values.dtype):
            # FloatingArray
            # error: "ExtensionDtype" has no attribute "numpy_dtype"
            npvalues = values.to_numpy(
                values.dtype.numpy_dtype,  # type: ignore[attr-defined]
                na_value=np.nan,
            )
            res_values = self._cython_op_ndim_compat(
                npvalues,
                min_count=min_count,
                ngroups=ngroups,
                comp_ids=comp_ids,
                mask=None,
                **kwargs,
            )
            if self.how in ["rank"]:
                # i.e. how in WrappedCythonOp.cast_blocklist, since
                #  other cast_blocklist methods dont go through cython_operation
                return res_values

            dtype = self.get_result_dtype(orig_values.dtype)
            # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]"
            # has no attribute "construct_array_type"
            cls = dtype.construct_array_type()  # type: ignore[union-attr]
            return cls._from_sequence(res_values, dtype=dtype)

        raise NotImplementedError(
            f"function is not implemented for this dtype: {values.dtype}")
Exemplo n.º 17
0
 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
     # the super() method NDArrayBackedExtensionArray._putmask uses
     # np.putmask which doesn't properly handle None/pd.NA, so using the
     # base class implementation that uses __setitem__
     ExtensionArray._putmask(self, mask, value)