示例#1
0
    def _str_len(self):
        if pa_version_under4p0:
            fallback_performancewarning(version="4")
            return super()._str_len()

        result = pc.utf8_length(self._data)
        return Int64Dtype().__from_arrow__(result)
示例#2
0
文件: array.py 项目: tnir/pandas
    def argsort(
        self,
        ascending: bool = True,
        kind: str = "quicksort",
        na_position: str = "last",
        *args,
        **kwargs,
    ) -> np.ndarray:
        order = "ascending" if ascending else "descending"
        null_placement = {
            "last": "at_end",
            "first": "at_start"
        }.get(na_position, None)
        if null_placement is None or pa_version_under7p0:
            # Although pc.array_sort_indices exists in version 6
            # there's a bug that affects the pa.ChunkedArray backing
            # https://issues.apache.org/jira/browse/ARROW-12042
            fallback_performancewarning("7")
            return super().argsort(ascending=ascending,
                                   kind=kind,
                                   na_position=na_position)

        result = pc.array_sort_indices(self._data,
                                       order=order,
                                       null_placement=null_placement)
        if pa_version_under2p0:
            np_result = result.to_pandas().values
        else:
            np_result = result.to_numpy()
        return np_result.astype(np.intp, copy=False)
示例#3
0
    def _str_isspace(self):
        if pa_version_under2p0:
            fallback_performancewarning(version="2")
            return super()._str_isspace()

        result = pc.utf8_is_space(self._data)
        return BooleanDtype().__from_arrow__(result)
示例#4
0
    def _str_endswith(self, pat: str, na=None):
        if pa_version_under4p0:
            fallback_performancewarning(version="4")
            return super()._str_endswith(pat, na)

        pat = re.escape(pat) + "$"
        return self._str_contains(pat, na=na, regex=True)
示例#5
0
    def isin(self, values):
        if pa_version_under2p0:
            fallback_performancewarning(version="2")
            return super().isin(values)

        value_set = [
            pa_scalar.as_py() for pa_scalar in
            [pa.scalar(value, from_pandas=True) for value in values]
            if pa_scalar.type in (pa.string(), pa.null())
        ]

        # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True
        # for null values, so we short-circuit to return all False array.
        if not len(value_set):
            return np.zeros(len(self), dtype=bool)

        kwargs = {}
        if pa_version_under3p0:
            # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises
            # with unexpected keyword argument in pyarrow 3.0.0+
            kwargs["skip_null"] = True

        result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs)
        # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls
        # to False
        return np.array(result, dtype=np.bool_)
示例#6
0
    def _str_rstrip(self, to_strip=None):
        if pa_version_under4p0:
            fallback_performancewarning(version="4")
            return super()._str_rstrip(to_strip)

        if to_strip is None:
            result = pc.utf8_rtrim_whitespace(self._data)
        else:
            result = pc.utf8_rtrim(self._data, characters=to_strip)
        return type(self)(result)
示例#7
0
文件: array.py 项目: YarShev/pandas
    def unique(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
        """
        Compute the ArrowExtensionArray of unique values.

        Returns
        -------
        ArrowExtensionArray
        """
        if pa_version_under2p0:
            fallback_performancewarning(version="2")
            return super().unique()
        else:
            return type(self)(pc.unique(self._data))
示例#8
0
文件: array.py 项目: YarShev/pandas
    def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
        """
        Return ArrowExtensionArray without NA values.

        Returns
        -------
        ArrowExtensionArray
        """
        if pa_version_under6p0:
            fallback_performancewarning(version="6")
            return super().dropna()
        else:
            return type(self)(pc.drop_null(self._data))
示例#9
0
            return type(self)(result)
        else:
            # This is when the result type is object. We reach this when
            # -> We know the result type is truly object (e.g. .encode returns bytes
            #    or .findall returns a list).
            # -> We don't know the result type. E.g. `.get` can return anything.
            return lib.map_infer_mask(arr, f, mask.view("uint8"))

    def _str_contains(self,
                      pat,
                      case=True,
                      flags=0,
                      na=np.nan,
                      regex: bool = True):
        if flags:
            fallback_performancewarning()
            return super()._str_contains(pat, case, flags, na, regex)

        if regex:
            if pa_version_under4p0 or case is False:
                fallback_performancewarning(version="4")
                return super()._str_contains(pat, case, flags, na, regex)
            else:
                result = pc.match_substring_regex(self._data, pat)
        else:
            if case:
                result = pc.match_substring(self._data, pat)
            else:
                result = pc.match_substring(pc.utf8_upper(self._data),
                                            pat.upper())
        result = BooleanDtype().__from_arrow__(result)