Пример #1
0
    def _str_repeat(self, repeats):
        if is_scalar(repeats):

            def scalar_rep(x):
                try:
                    return bytes.__mul__(x, repeats)
                except TypeError:
                    return str.__mul__(x, repeats)

            return self._str_map(scalar_rep, dtype=str)
        else:
            from pandas.core.arrays.string_ import StringArray

            def rep(x, r):
                if x is libmissing.NA:
                    return x
                try:
                    return bytes.__mul__(x, r)
                except TypeError:
                    return str.__mul__(x, r)

            repeats = np.asarray(repeats, dtype=object)
            result = libops.vec_binop(np.asarray(self), repeats, rep)
            if isinstance(self, StringArray):
                # Not going through map, so we have to do this here.
                result = StringArray._from_sequence(result)
            return result
Пример #2
0
    def astype(self, dtype, copy: bool = True) -> ArrayLike:
        """
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        Parameters
        ----------
        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        Returns
        -------
        ndarray or ExtensionArray
            NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with
            'dtype' for its dtype.

        Raises
        ------
        TypeError
            if incompatible type with an FloatingDtype, equivalent of same_kind
            casting
        """
        from pandas.core.arrays.string_ import StringArray, StringDtype

        dtype = pandas_dtype(dtype)

        # if the dtype is exactly the same, we can fastpath
        if self.dtype == dtype:
            # return the same object for copy=False
            return self.copy() if copy else self
        # if we are astyping to another nullable masked dtype, we can fastpath
        if isinstance(dtype, BaseMaskedDtype):
            # TODO deal with NaNs
            data = self._data.astype(dtype.numpy_dtype, copy=copy)
            # mask is copied depending on whether the data was copied, and
            # not directly depending on the `copy` keyword
            mask = self._mask if data is self._data else self._mask.copy()
            return dtype.construct_array_type()(data, mask, copy=False)
        elif isinstance(dtype, StringDtype):
            return StringArray._from_sequence(self, copy=False)

        # coerce
        if is_float_dtype(dtype):
            # In astype, we consider dtype=float to also mean na_value=np.nan
            kwargs = dict(na_value=np.nan)
        elif is_datetime64_dtype(dtype):
            kwargs = dict(na_value=np.datetime64("NaT"))
        else:
            kwargs = {}

        data = self.to_numpy(dtype=dtype, **kwargs)
        return astype_nansafe(data, dtype, copy=False)
Пример #3
0
def data_for_grouping():
    return StringArray._from_sequence(
        ["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"])
Пример #4
0
def data_missing_for_sorting():
    return StringArray._from_sequence(["B", pd.NA, "A"])
Пример #5
0
def data_for_sorting():
    return StringArray._from_sequence(["B", "C", "A"])
Пример #6
0
def data_missing():
    """Length 2 array with [NA, Valid]"""
    return StringArray._from_sequence([pd.NA, "A"])
Пример #7
0
def data():
    strings = np.random.choice(list(string.ascii_letters), size=100)
    while strings[0] == strings[1]:
        strings = np.random.choice(list(string.ascii_letters), size=100)

    return StringArray._from_sequence(strings)
Пример #8
0
def test_from_sequence_wrong_dtype_raises():
    with pd.option_context("string_storage", "python"):
        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")

    with pd.option_context("string_storage", "pyarrow"):
        ArrowStringArray._from_sequence(["a", None, "c"], dtype="string")

    with pytest.raises(AssertionError, match=None):
        ArrowStringArray._from_sequence(["a", None, "c"],
                                        dtype="string[python]")

    ArrowStringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")

    with pytest.raises(AssertionError, match=None):
        with pd.option_context("string_storage", "python"):
            ArrowStringArray._from_sequence(["a", None, "c"],
                                            dtype=StringDtype())

    with pd.option_context("string_storage", "pyarrow"):
        ArrowStringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

    with pytest.raises(AssertionError, match=None):
        ArrowStringArray._from_sequence(["a", None, "c"],
                                        dtype=StringDtype("python"))

    ArrowStringArray._from_sequence(["a", None, "c"],
                                    dtype=StringDtype("pyarrow"))

    with pd.option_context("string_storage", "python"):
        StringArray._from_sequence(["a", None, "c"], dtype="string")

    with pd.option_context("string_storage", "pyarrow"):
        StringArray._from_sequence(["a", None, "c"], dtype="string")

    StringArray._from_sequence(["a", None, "c"], dtype="string[python]")

    with pytest.raises(AssertionError, match=None):
        StringArray._from_sequence(["a", None, "c"], dtype="string[pyarrow]")

    with pd.option_context("string_storage", "python"):
        StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

    with pytest.raises(AssertionError, match=None):
        with pd.option_context("string_storage", "pyarrow"):
            StringArray._from_sequence(["a", None, "c"], dtype=StringDtype())

    StringArray._from_sequence(["a", None, "c"], dtype=StringDtype("python"))

    with pytest.raises(AssertionError, match=None):
        StringArray._from_sequence(["a", None, "c"],
                                   dtype=StringDtype("pyarrow"))
Пример #9
0
def data_for_grouping():
    return StringArray._from_sequence(
        ["B", "B", np.nan, np.nan, "A", "A", "B", "C"])