def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction. msg = ( "Note that there is a behavior difference of timestamp subtraction. " "The timestamp subtraction returns an integer in seconds, " "whereas pandas returns 'timedelta64[ns]'.") if isinstance(right, IndexOpsMixin) and isinstance( right.spark.data_type, (TimestampType, TimestampNTZType)): warnings.warn(msg, UserWarning) return left.astype("long") - right.astype("long") elif isinstance(right, datetime.datetime): warnings.warn(msg, UserWarning) return cast( SeriesOrIndex, left._with_new_scol( left.astype("long").spark.column - self._cast_spark_column_timestamp_to_long(SF.lit(right)), field=left._internal.data_fields[0].copy( dtype=np.dtype("int64"), spark_type=LongType()), ), ) else: raise TypeError( "Datetime subtraction can only be applied to datetime series.")
def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not isinstance(right, numbers.Number): raise TypeError( "Multiplication can not be applied to given types.") right = transform_boolean_operand_to_numeric(right) return column_op(Column.__rmul__)(left, right)
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("Subtraction can not be applied to given types.") right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return column_op(Column.__sub__)(left, right)
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: from pyspark.pandas.base import column_op _sanitize_list_like(right) if isinstance(right, timedelta): return column_op(Column.__rsub__)(left, right) else: raise TypeError("Timedelta subtraction can only be applied to timedelta series.")
def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if isinstance(right, numbers.Number) and not isinstance(right, bool): left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right))) return right % left else: raise TypeError( "Modulo can not be applied to %s and the given type." % self.pretty_name )
def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not isinstance(right, numbers.Number): raise TypeError("Exponentiation can not be applied to given types.") def rpow_func(left: Column, right: Any) -> Column: return F.when(SF.lit(right == 1), right).otherwise(Column.__rpow__(left, right)) right = transform_boolean_operand_to_numeric(right) return column_op(rpow_func)(left, right)
def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not isinstance(right, numbers.Number): raise TypeError("Modulo can not be applied to given types.") def rmod(left: Column, right: Any) -> Column: return ((right % left) + left) % left right = transform_boolean_operand_to_numeric(right) return column_op(rmod)(left, right)
def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("Modulo can not be applied to given types.") def mod(left: Column, right: Any) -> Column: return ((left % right) + right) % right right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return column_op(mod)(left, right)
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType): return column_op(SF.repeat)(right, left) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("Multiplication can not be applied to given types.") right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return column_op(Column.__mul__)(left, right)
def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if isinstance(right, bytes): return cast( SeriesOrIndex, left._with_new_scol(F.concat(SF.lit(right), left.spark.column))) else: raise TypeError( "Concatenation can not be applied to %s and the given type." % self.pretty_name)
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if isinstance(right, IndexOpsMixin) and isinstance( right.spark.data_type, BinaryType): return column_op(F.concat)(left, right) elif isinstance(right, bytes): return column_op(F.concat)(left, SF.lit(right)) else: raise TypeError( "Concatenation can not be applied to %s and the given type." % self.pretty_name)
def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) def or_func(left: Column, right: Any) -> Column: if not isinstance(right, Column): if pd.isna(right): right = SF.lit(None) else: right = SF.lit(right) return left | right return column_op(or_func)(left, right)
def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not isinstance(right, numbers.Number): raise TypeError("True division can not be applied to given types.") def rtruediv(left: Column, right: Any) -> Column: return F.when(left == 0, SF.lit(np.inf).__div__(right)).otherwise( SF.lit(right).__truediv__(left) ) right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return numpy_column_op(rtruediv)(left, right)
def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("True division can not be applied to given types.") def truediv(left: Column, right: Any) -> Column: return F.when( SF.lit(right != 0) | SF.lit(right).isNull(), left.__div__(right) ).otherwise(SF.lit(np.inf).__div__(left)) right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return numpy_column_op(truediv)(left, right)
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: from pyspark.pandas.base import column_op _sanitize_list_like(right) if ( isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, DayTimeIntervalType) or isinstance(right, timedelta) ): return column_op(Column.__sub__)(left, right) else: raise TypeError("Timedelta subtraction can only be applied to timedelta series.")
def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False): raise TypeError( "Subtraction can not be applied to %s and the given type." % self.pretty_name ) if isinstance(right, numbers.Number): left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right))) return left - right else: assert isinstance(right, IndexOpsMixin) left = transform_boolean_operand_to_numeric(left, spark_type=right.spark.data_type) return left - right
def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) # Note that date subtraction casts arguments to integer. This is to mimic pandas's # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction. msg = ("Note that there is a behavior difference of date subtraction. " "The date subtraction returns an integer in days, " "whereas pandas returns 'timedelta64[ns]'.") if isinstance(right, datetime.date) and not isinstance( right, datetime.datetime): warnings.warn(msg, UserWarning) return -column_op(F.datediff)(left, SF.lit(right)).astype("long") else: raise TypeError( "Date subtraction can only be applied to date series.")
def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes): return right.__or__(left) else: def or_func(left: Column, right: Any) -> Column: if not isinstance(right, Column) and pd.isna(right): return SF.lit(False) else: scol = left | SF.lit(right) return F.when(left.isNull() | scol.isNull(), False).otherwise(scol) return column_op(or_func)(left, right)
def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError("Exponentiation can not be applied to given types.") def pow_func(left: Column, right: Any) -> Column: return ( F.when(left == 1, left) .when(SF.lit(right) == 0, 1) .otherwise(Column.__pow__(left, right)) ) right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return column_op(pow_func)(left, right)
def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if _is_boolean_type(right): def xor_func(left: Column, right: Any) -> Column: if not isinstance(right, Column): if pd.isna(right): right = SF.lit(None) else: right = SF.lit(right) return left.cast("integer").bitwiseXOR( right.cast("integer")).cast("boolean") return column_op(xor_func)(left, right) else: raise TypeError("XOR can not be applied to given types.")
def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not is_valid_operand_for_numeric_arithmetic(right): raise TypeError( "Multiplication can not be applied to %s and the given type." % self.pretty_name ) if isinstance(right, bool): return left.__and__(right) elif isinstance(right, numbers.Number): left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right))) return left * right else: assert isinstance(right, IndexOpsMixin) if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, BooleanType): return left.__and__(right) else: left = transform_boolean_operand_to_numeric(left, spark_type=right.spark.data_type) return left * right
def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if isinstance(right, IndexOpsMixin) and isinstance( right.dtype, extension_dtypes): return right ^ left elif _is_valid_for_logical_operator(right): def xor_func(left: Column, right: Any) -> Column: if not isinstance(right, Column): if pd.isna(right): right = SF.lit(None) else: right = SF.lit(right) scol = left.cast("integer").bitwiseXOR( right.cast("integer")).cast("boolean") return F.when(scol.isNull(), False).otherwise(scol) return column_op(xor_func)(left, right) else: raise TypeError("XOR can not be applied to given types.")
def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) if not isinstance(right, IndexOpsMixin) or ( isinstance(right, IndexOpsMixin) and not isinstance(right.spark.data_type, ArrayType) ): raise TypeError( "Concatenation can not be applied to %s and the given type." % self.pretty_name ) left_type = cast(ArrayType, left.spark.data_type).elementType right_type = cast(ArrayType, right.spark.data_type).elementType if left_type != right_type and not ( isinstance(left_type, NumericType) and isinstance(right_type, NumericType) ): raise TypeError( "Concatenation can only be applied to %s of the same type" % self.pretty_name ) return column_op(F.concat)(left, right)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: from pyspark.pandas.base import column_op _sanitize_list_like(right) return column_op(Column.__gt__)(left, right)
def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) return _compare(left, right, Column.__ne__, is_equality_comparison=True)
def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) return column_op(Column.__gt__)(left, right)
def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) return _compare(left, right, Column.__ge__)
def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: _sanitize_list_like(right) raise TypeError("XOR can not be applied to given types.")