Exemplo n.º 1
0
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                scol = F.when(
                    index_ops.spark.column.isNotNull(),
                    F.when(index_ops.spark.column, "True").otherwise("False"),
                )
            else:
                null_str = str(None)
                casted = F.when(index_ops.spark.column,
                                "True").otherwise("False")
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Exemplo n.º 2
0
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                # seems like a pandas' bug?
                scol = F.when(index_ops.spark.column.isNull(),
                              str(pd.NaT)).otherwise(
                                  index_ops.spark.column.cast(spark_type))
            else:
                null_str = str(pd.NaT)
                casted = index_ops.spark.column.cast(spark_type)
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Exemplo n.º 3
0
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                scol = F.when(
                    index_ops.spark.column.isNotNull(),
                    F.when(index_ops.spark.column, "True").otherwise("False"),
                )
                nullable = index_ops.spark.nullable
            else:
                null_str = str(pd.NA) if isinstance(
                    self, BooleanExtensionOps) else str(None)
                casted = F.when(index_ops.spark.column,
                                "True").otherwise("False")
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
                nullable = False
            return index_ops._with_new_scol(
                scol,
                field=index_ops._internal.data_fields[0].copy(
                    dtype=dtype, spark_type=spark_type, nullable=nullable),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Exemplo n.º 4
0
def _non_fractional_astype(index_ops: IndexOpsLike, dtype: Dtype,
                           spark_type: DataType) -> IndexOpsLike:
    if isinstance(dtype, CategoricalDtype):
        return _as_categorical_type(index_ops, dtype, spark_type)
    elif isinstance(spark_type, BooleanType):
        return _as_bool_type(index_ops, dtype)
    elif isinstance(spark_type, StringType):
        return _as_string_type(index_ops, dtype, null_str=str(np.nan))
    else:
        return _as_other_type(index_ops, dtype, spark_type)
Exemplo n.º 5
0
    def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(np.nan))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Exemplo n.º 6
0
    def astype(self, index_ops: Union["Index", "Series"],
               dtype: Union[str, type, Dtype]) -> Union["Index", "Series"]:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype)
        else:
            return _as_other_type(index_ops, dtype, spark_type)