Пример #1
0
    def difference(self, other, sort=None):
        """
        Return a new Index with elements from the index that are not in
        `other`.

        This is the set difference of two Index objects.

        Parameters
        ----------
        other : Index or array-like
        sort : False or None, default None
            Whether to sort the resulting index. By default, the
            values are attempted to be sorted, but any TypeError from
            incomparable elements is caught by cudf.

            * None : Attempt to sort the result, but catch any TypeErrors
              from comparing incomparable elements.
            * False : Do not sort the result.

        Returns
        -------
        difference : Index

        Examples
        --------
        >>> import cudf
        >>> idx1 = cudf.Index([2, 1, 3, 4])
        >>> idx1
        Int64Index([2, 1, 3, 4], dtype='int64')
        >>> idx2 = cudf.Index([3, 4, 5, 6])
        >>> idx2
        Int64Index([3, 4, 5, 6], dtype='int64')
        >>> idx1.difference(idx2)
        Int64Index([1, 2], dtype='int64')
        >>> idx1.difference(idx2, sort=False)
        Int64Index([2, 1], dtype='int64')
        """
        if sort not in {None, False}:
            raise ValueError(f"The 'sort' keyword only takes the values "
                             f"of None or False; {sort} was passed.")

        other = cudf.Index(other)

        if is_mixed_with_object_dtype(self, other):
            difference = self.copy()
        else:
            difference = self.join(other, how="leftanti")
            if self.dtype != other.dtype:
                difference = difference.astype(self.dtype)

        if sort is None and len(other):
            return difference.sort_values()

        return difference
Пример #2
0
    def add_categories(self, new_categories, inplace=False):
        """
        Add new categories.

        `new_categories` will be included at the last/highest
        place in the categories and will be unused directly
        after this call.

        Parameters
        ----------

        new_categories : category or list-like of category
            The new categories to be included.

        inplace : bool, default False
            Whether or not to add the categories inplace
            or return a copy of this categorical with
            added categories.

        Returns
        -------
        cat
            Categorical with new categories added or
            None if inplace.

        Examples
        --------
        >>> import cudf
        >>> s = cudf.Series([1, 2], dtype="category")
        >>> s
        0    1
        1    2
        dtype: category
        Categories (2, int64): [1, 2]
        >>> s.cat.add_categories([0, 3, 4])
        0    1
        1    2
        dtype: category
        Categories (5, int64): [1, 2, 0, 3, 4]
        >>> s
        0    1
        1    2
        dtype: category
        Categories (2, int64): [1, 2]
        >>> s.cat.add_categories([0, 3, 4], inplace=True)
        >>> s
        0    1
        1    2
        dtype: category
        Categories (5, int64): [1, 2, 0, 3, 4]
        """

        old_categories = self._column.categories
        new_categories = column.as_column(
            new_categories,
            dtype=old_categories.dtype if len(new_categories) == 0 else None,
        )

        if is_mixed_with_object_dtype(old_categories, new_categories):
            raise TypeError(
                f"cudf does not support adding categories with existing "
                f"categories of dtype `{old_categories.dtype}` and new "
                f"categories of dtype `{new_categories.dtype}`, please "
                f"type-cast new_categories to the same type as "
                f"existing categories.")
        common_dtype = np.find_common_type(
            [old_categories.dtype, new_categories.dtype], [])

        new_categories = new_categories.astype(common_dtype)
        old_categories = old_categories.astype(common_dtype)

        if old_categories.isin(new_categories).any():
            raise ValueError("new categories must not include old categories")

        new_categories = old_categories.append(new_categories)
        out_col = self._column
        if not self._categories_equal(new_categories):
            out_col = self._set_categories(old_categories, new_categories)

        return self._return_or_inplace(out_col, inplace=inplace)
Пример #3
0
    def append(self, other):
        """
        Append a collection of Index options together.

        Parameters
        ----------
        other : Index or list/tuple of indices

        Returns
        -------
        appended : Index

        Examples
        --------
        >>> import cudf
        >>> idx = cudf.Index([1, 2, 10, 100])
        >>> idx
        Int64Index([1, 2, 10, 100], dtype='int64')
        >>> other = cudf.Index([200, 400, 50])
        >>> other
        Int64Index([200, 400, 50], dtype='int64')
        >>> idx.append(other)
        Int64Index([1, 2, 10, 100, 200, 400, 50], dtype='int64')

        append accepts list of Index objects

        >>> idx.append([other, other])
        Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64')
        """

        if is_list_like(other):
            to_concat = [self]
            to_concat.extend(other)
        else:
            this = self
            if len(other) == 0:
                # short-circuit and return a copy
                to_concat = [self]

            other = cudf.Index(other)

            if len(self) == 0:
                to_concat = [other]

            if len(self) and len(other):
                if is_mixed_with_object_dtype(this, other):
                    got_dtype = (other.dtype if this.dtype
                                 == cudf.dtype("object") else this.dtype)
                    raise TypeError(
                        f"cudf does not support appending an Index of "
                        f"dtype `{cudf.dtype('object')}` with an Index "
                        f"of dtype `{got_dtype}`, please type-cast "
                        f"either one of them to same dtypes.")

                if isinstance(self._values, cudf.core.column.NumericalColumn):
                    if self.dtype != other.dtype:
                        this, other = numeric_normalize_types(self, other)
                to_concat = [this, other]

        for obj in to_concat:
            if not isinstance(obj, BaseIndex):
                raise TypeError("all inputs must be Index")

        return self._concat(to_concat)