示例#1
0
def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase,
                     how: str) -> Tuple[ColumnBase, ColumnBase]:
    # Casts lcol and rcol to a common dtype for use as join keys. If no casting
    # is necessary, they are returned as is.

    common_type = None

    # cast the keys lcol and rcol to a common dtype
    ltype = lcol.dtype
    rtype = rcol.dtype

    # if either side is categorical, different logic
    left_is_categorical = isinstance(ltype, CategoricalDtype)
    right_is_categorical = isinstance(rtype, CategoricalDtype)
    if left_is_categorical and right_is_categorical:
        return _match_categorical_dtypes_both(cast(CategoricalColumn, lcol),
                                              cast(CategoricalColumn, rcol),
                                              how)
    elif left_is_categorical or right_is_categorical:
        if left_is_categorical:
            if how in {"left", "leftsemi", "leftanti"}:
                return lcol, rcol.astype(ltype)
            common_type = ltype.categories.dtype
        else:
            common_type = rtype.categories.dtype
        return lcol.astype(common_type), rcol.astype(common_type)

    if is_dtype_equal(ltype, rtype):
        return lcol, rcol

    if is_decimal_dtype(ltype) or is_decimal_dtype(rtype):
        raise TypeError(
            "Decimal columns can only be merged with decimal columns "
            "of the same precision and scale")

    if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
        common_type = (max(ltype, rtype) if ltype.kind == rtype.kind else
                       np.find_common_type([], (ltype, rtype)))

    elif np.issubdtype(ltype, np.datetime64) and np.issubdtype(
            rtype, np.datetime64):
        common_type = max(ltype, rtype)

    if how == "left":
        if rcol.fillna(0).can_cast_safely(ltype):
            return lcol, rcol.astype(ltype)
        else:
            warnings.warn(f"Can't safely cast column from {rtype} to {ltype}, "
                          f"upcasting to {common_type}.")

    return lcol.astype(common_type), rcol.astype(common_type)
示例#2
0
    def fillna(self, fill_value=None, method=None):
        if fill_value is not None:
            if cudf.utils.utils.isnat(fill_value):
                return _fillna_natwise(self)
            col = self
            if is_scalar(fill_value):
                if isinstance(fill_value, np.timedelta64):
                    dtype = determine_out_dtype(self.dtype, fill_value.dtype)
                    fill_value = fill_value.astype(dtype)
                    col = col.astype(dtype)
                if not isinstance(fill_value, cudf.Scalar):
                    fill_value = cudf.Scalar(fill_value, dtype=dtype)
            else:
                fill_value = column.as_column(fill_value, nan_as_null=False)

            return ColumnBase.fillna(col, fill_value)
        else:
            return super().fillna(method=method)
示例#3
0
def _match_join_keys(lcol: ColumnBase, rcol: ColumnBase,
                     how: str) -> Tuple[ColumnBase, ColumnBase]:
    # returns the common dtype that lcol and rcol should be casted to,
    # before they can be used as left and right join keys.
    # If no casting is necessary, returns None

    common_type = None

    # cast the keys lcol and rcol to a common dtype
    ltype = lcol.dtype
    rtype = rcol.dtype

    # if either side is categorical, different logic
    if isinstance(ltype, CategoricalDtype) or isinstance(
            rtype, CategoricalDtype):
        return _match_categorical_dtypes(lcol, rcol, how)

    if pd.api.types.is_dtype_equal(ltype, rtype):
        return lcol, rcol

    if isinstance(ltype, cudf.Decimal64Dtype) or isinstance(
            rtype, cudf.Decimal64Dtype):
        raise TypeError(
            "Decimal columns can only be merged with decimal columns "
            "of the same precision and scale")

    if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
        common_type = (max(ltype, rtype) if ltype.kind == rtype.kind else
                       np.find_common_type([], (ltype, rtype)))

    elif np.issubdtype(ltype, np.datetime64) and np.issubdtype(
            rtype, np.datetime64):
        common_type = max(ltype, rtype)

    if how == "left":
        if rcol.fillna(0).can_cast_safely(ltype):
            return lcol, rcol.astype(ltype)
        else:
            warnings.warn(f"Can't safely cast column from {rtype} to {ltype}, "
                          "upcasting to {common_type}.")

    return lcol.astype(common_type), rcol.astype(common_type)