Пример #1
0
def orc_type(field):
    if pa.types.is_boolean(field):
        return pyorc.Boolean()
    elif pa.types.is_int8(field):
        return pyorc.TinyInt()
    elif pa.types.is_int16(field):
        return pyorc.SmallInt()
    elif pa.types.is_int32(field):
        return pyorc.Int()
    elif pa.types.is_int64(field):
        return pyorc.BigInt()
    elif pa.types.is_float32(field):
        return pyorc.Float()
    elif pa.types.is_float64(field):
        return pyorc.Double()
    elif pa.types.is_decimal(field):
        return pyorc.Decimal(field.precision, field.scale)
    elif pa.types.is_list(field):
        return pyorc.Array(orc_type(field.value_type))
    elif pa.types.is_timestamp(field):
        return pyorc.Timestamp()
    elif pa.types.is_date(field):
        return pyorc.Date()
    elif pa.types.is_binary(field):
        return pyorc.Binary()
    elif pa.types.is_string(field):
        return pyorc.String()
    else:
        raise ValueError('Cannot Convert %s' % field)
Пример #2
0
    pd.StringDtype(): ["string", "null"],
    cudf.dtype("bool_"): "boolean",
    cudf.dtype("int16"): "int",
    cudf.dtype("int32"): "int",
    cudf.dtype("int64"): "long",
    cudf.dtype("O"): "string",
    cudf.dtype("str"): "string",
    cudf.dtype("float32"): "float",
    cudf.dtype("float64"): "double",
    cudf.dtype("<M8[ns]"): {"type": "long", "logicalType": "timestamp-millis"},
    cudf.dtype("<M8[ms]"): {"type": "long", "logicalType": "timestamp-millis"},
    cudf.dtype("<M8[us]"): {"type": "long", "logicalType": "timestamp-micros"},
}

PANDAS_TO_ORC_TYPES = {
    cudf.dtype("int8"): pyorc.TinyInt(),
    pd.Int8Dtype(): pyorc.TinyInt(),
    pd.Int16Dtype(): pyorc.SmallInt(),
    pd.Int32Dtype(): pyorc.Int(),
    pd.Int64Dtype(): pyorc.BigInt(),
    pd.BooleanDtype(): pyorc.Boolean(),
    cudf.dtype("bool_"): pyorc.Boolean(),
    cudf.dtype("int16"): pyorc.SmallInt(),
    cudf.dtype("int32"): pyorc.Int(),
    cudf.dtype("int64"): pyorc.BigInt(),
    cudf.dtype("O"): pyorc.String(),
    pd.StringDtype(): pyorc.String(),
    cudf.dtype("float32"): pyorc.Float(),
    cudf.dtype("float64"): pyorc.Double(),
    cudf.dtype("<M8[ns]"): pyorc.Timestamp(),
    cudf.dtype("<M8[ms]"): pyorc.Timestamp(),
Пример #3
0
    np.dtype("<M8[ns]"): {
        "type": "long",
        "logicalType": "timestamp-millis"
    },
    np.dtype("<M8[ms]"): {
        "type": "long",
        "logicalType": "timestamp-millis"
    },
    np.dtype("<M8[us]"): {
        "type": "long",
        "logicalType": "timestamp-micros"
    },
}

PANDAS_TO_ORC_TYPES = {
    np.dtype("int8"): pyorc.TinyInt(),
    pd.Int8Dtype(): pyorc.TinyInt(),
    pd.Int16Dtype(): pyorc.SmallInt(),
    pd.Int32Dtype(): pyorc.Int(),
    pd.Int64Dtype(): pyorc.BigInt(),
    pd.BooleanDtype(): pyorc.Boolean(),
    np.dtype("bool_"): pyorc.Boolean(),
    np.dtype("int16"): pyorc.SmallInt(),
    np.dtype("int32"): pyorc.Int(),
    np.dtype("int64"): pyorc.BigInt(),
    np.dtype("O"): pyorc.String(),
    pd.StringDtype(): pyorc.String(),
    np.dtype("float32"): pyorc.Float(),
    np.dtype("float64"): pyorc.Double(),
    np.dtype("<M8[ns]"): pyorc.Timestamp(),
    np.dtype("<M8[ms]"): pyorc.Timestamp(),