示例#1
0
文件: numerical.py 项目: wphicks/cudf
 def find_and_replace(self, to_replace, replacement, all_nan):
     """
     Return col with *to_replace* replaced with *value*.
     """
     to_replace_col = _normalize_find_and_replace_input(
         self.dtype, to_replace
     )
     if all_nan:
         replacement_col = column.as_column(replacement, dtype=self.dtype)
     else:
         replacement_col = _normalize_find_and_replace_input(
             self.dtype, replacement
         )
     if len(replacement_col) == 1 and len(to_replace_col) > 1:
         replacement_col = column.as_column(
             utils.scalar_broadcast_to(
                 replacement[0], (len(to_replace_col),), self.dtype
             )
         )
     replaced = self.copy()
     to_replace_col, replacement_col, replaced = numeric_normalize_types(
         to_replace_col, replacement_col, replaced
     )
     return libcudf.replace.replace(
         replaced, to_replace_col, replacement_col
     )
示例#2
0
    def find_and_replace(
        self,
        to_replace: ColumnLike,
        replacement: ColumnLike,
        all_nan: bool = False,
    ) -> NumericalColumn:
        """
        Return col with *to_replace* replaced with *value*.
        """

        # If all of `to_replace`/`replacement` are `None`,
        # dtype of `to_replace_col`/`replacement_col`
        # is inferred as `string`, but this is a valid
        # float64 column too, Hence we will need to type-cast
        # to self.dtype.
        to_replace_col = column.as_column(to_replace)
        if to_replace_col.null_count == len(to_replace_col):
            to_replace_col = to_replace_col.astype(self.dtype)

        replacement_col = column.as_column(replacement)
        if replacement_col.null_count == len(replacement_col):
            replacement_col = replacement_col.astype(self.dtype)

        if type(to_replace_col) != type(replacement_col):
            raise TypeError(
                f"to_replace and value should be of same types,"
                f"got to_replace dtype: {to_replace_col.dtype} and "
                f"value dtype: {replacement_col.dtype}")

        if not isinstance(to_replace_col, NumericalColumn) and not isinstance(
                replacement_col, NumericalColumn):
            return self.copy()

        to_replace_col = _normalize_find_and_replace_input(
            self.dtype, to_replace)
        if all_nan:
            replacement_col = column.as_column(replacement, dtype=self.dtype)
        else:
            replacement_col = _normalize_find_and_replace_input(
                self.dtype, replacement)
        if len(replacement_col) == 1 and len(to_replace_col) > 1:
            replacement_col = column.as_column(
                utils.scalar_broadcast_to(replacement[0],
                                          (len(to_replace_col), ), self.dtype))
        elif len(replacement_col) == 1 and len(to_replace_col) == 0:
            return self.copy()
        to_replace_col, replacement_col, replaced = numeric_normalize_types(
            to_replace_col, replacement_col, self)
        df = cudf.DataFrame._from_data({
            "old": to_replace_col,
            "new": replacement_col
        })
        df = df.drop_duplicates(subset=["old"], keep="last", ignore_index=True)
        if df._data["old"].null_count == 1:
            replaced = replaced.fillna(
                df._data["new"][df._data["old"].isnull()][0])
            df = df.dropna(subset=["old"])

        return libcudf.replace.replace(replaced, df._data["old"],
                                       df._data["new"])
示例#3
0
 def find_and_replace(self, to_replace, replacement, all_nan):
     """
     Return col with *to_replace* replaced with *value*.
     """
     to_replace_col = column.as_column(to_replace)
     replacement_dtype = self.dtype if all_nan else None
     replacement_col = column.as_column(replacement,
                                        dtype=replacement_dtype)
     replaced = self.copy()
     to_replace_col, replacement_col, replaced = numeric_normalize_types(
         to_replace_col, replacement_col, replaced)
     output = libcudf.replace.replace(replaced, to_replace_col,
                                      replacement_col)
     return output
示例#4
0
    def find_and_replace(
        self,
        to_replace: ColumnLike,
        replacement: ColumnLike,
        all_nan: bool = False,
    ) -> NumericalColumn:
        """
        Return col with *to_replace* replaced with *value*.
        """
        to_replace_col = as_column(to_replace)
        replacement_col = as_column(replacement)

        if type(to_replace_col) != type(replacement_col):
            raise TypeError(
                f"to_replace and value should be of same types,"
                f"got to_replace dtype: {to_replace_col.dtype} and "
                f"value dtype: {replacement_col.dtype}"
            )

        if not isinstance(to_replace_col, NumericalColumn) and not isinstance(
            replacement_col, NumericalColumn
        ):
            return self.copy()

        to_replace_col = _normalize_find_and_replace_input(
            self.dtype, to_replace
        )
        if all_nan:
            replacement_col = column.as_column(replacement, dtype=self.dtype)
        else:
            replacement_col = _normalize_find_and_replace_input(
                self.dtype, replacement
            )
        replaced = self.copy()
        if len(replacement_col) == 1 and len(to_replace_col) > 1:
            replacement_col = column.as_column(
                utils.scalar_broadcast_to(
                    replacement[0], (len(to_replace_col),), self.dtype
                )
            )
        elif len(replacement_col) == 1 and len(to_replace_col) == 0:
            return replaced
        to_replace_col, replacement_col, replaced = numeric_normalize_types(
            to_replace_col, replacement_col, replaced
        )
        return libcudf.replace.replace(
            replaced, to_replace_col, replacement_col
        )
示例#5
0
    def find_and_replace(
        self,
        to_replace: ColumnLike,
        replacement: ColumnLike,
        all_nan: bool = False,
    ) -> NumericalColumn:
        """
        Return col with *to_replace* replaced with *value*.
        """
        to_replace_col = column.as_column(to_replace)
        replacement_col = column.as_column(replacement)

        if type(to_replace_col) != type(replacement_col):
            raise TypeError(
                f"to_replace and value should be of same types,"
                f"got to_replace dtype: {to_replace_col.dtype} and "
                f"value dtype: {replacement_col.dtype}")

        if not isinstance(to_replace_col, NumericalColumn) and not isinstance(
                replacement_col, NumericalColumn):
            return self.copy()

        to_replace_col = _normalize_find_and_replace_input(
            self.dtype, to_replace)
        if all_nan:
            replacement_col = column.as_column(replacement, dtype=self.dtype)
        else:
            replacement_col = _normalize_find_and_replace_input(
                self.dtype, replacement)
        replaced = self.copy()
        if len(replacement_col) == 1 and len(to_replace_col) > 1:
            replacement_col = column.as_column(
                utils.scalar_broadcast_to(replacement[0],
                                          (len(to_replace_col), ), self.dtype))
        elif len(replacement_col) == 1 and len(to_replace_col) == 0:
            return replaced
        to_replace_col, replacement_col, replaced = numeric_normalize_types(
            to_replace_col, replacement_col, replaced)
        df = cudf.DataFrame({"old": to_replace_col, "new": replacement_col})
        df = df.drop_duplicates(subset=["old"], keep="last", ignore_index=True)
        if df._data["old"].null_count == 1:
            replaced = replaced.fillna(
                df._data["new"][df._data["old"].isna()][0])
            df = df.dropna(subset=["old"])

        return libcudf.replace.replace(replaced, df["old"]._column,
                                       df["new"]._column)
示例#6
0
    def append(self, other):
        """
        Append a collection of Index options together.

        Parameters
        ----------
        other : Index or list/tuple of indices

        Returns
        -------
        appended : Index

        Examples
        --------
        >>> import cudf
        >>> idx = cudf.Index([1, 2, 10, 100])
        >>> idx
        Int64Index([1, 2, 10, 100], dtype='int64')
        >>> other = cudf.Index([200, 400, 50])
        >>> other
        Int64Index([200, 400, 50], dtype='int64')
        >>> idx.append(other)
        Int64Index([1, 2, 10, 100, 200, 400, 50], dtype='int64')

        append accepts list of Index objects

        >>> idx.append([other, other])
        Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64')
        """

        if is_list_like(other):
            to_concat = [self]
            to_concat.extend(other)
        else:
            this = self
            if len(other) == 0:
                # short-circuit and return a copy
                to_concat = [self]

            other = cudf.Index(other)

            if len(self) == 0:
                to_concat = [other]

            if len(self) and len(other):
                if is_mixed_with_object_dtype(this, other):
                    got_dtype = (other.dtype if this.dtype
                                 == cudf.dtype("object") else this.dtype)
                    raise TypeError(
                        f"cudf does not support appending an Index of "
                        f"dtype `{cudf.dtype('object')}` with an Index "
                        f"of dtype `{got_dtype}`, please type-cast "
                        f"either one of them to same dtypes.")

                if isinstance(self._values, cudf.core.column.NumericalColumn):
                    if self.dtype != other.dtype:
                        this, other = numeric_normalize_types(self, other)
                to_concat = [this, other]

        for obj in to_concat:
            if not isinstance(obj, BaseIndex):
                raise TypeError("all inputs must be Index")

        return self._concat(to_concat)