示例#1
0
    def test_kudu_schema_convert(self):
        spec = [
            # name, type, is_nullable, is_primary_key
            ('a', dt.Int8(False), 'int8', False, True),
            ('b', dt.Int16(False), 'int16', False, True),
            ('c', dt.Int32(False), 'int32', False, False),
            ('d', dt.Int64(True), 'int64', True, False),
            ('e', dt.String(True), 'string', True, False),
            ('f', dt.Boolean(False), 'bool', False, False),
            ('g', dt.Float(False), 'float', False, False),
            ('h', dt.Double(True), 'double', True, False),
            # TODO
            # ('i', 'binary', False, False),
            ('j', dt.Timestamp(True), 'timestamp', True, False),
        ]

        builder = kudu.schema_builder()
        primary_keys = []
        ibis_types = []
        for name, itype, type_, is_nullable, is_primary_key in spec:
            builder.add_column(name, type_, nullable=is_nullable)

            if is_primary_key:
                primary_keys.append(name)

            ibis_types.append((name, itype))

        builder.set_primary_keys(primary_keys)
        kschema = builder.build()

        ischema = ksupport.schema_kudu_to_ibis(kschema)
        expected = ibis.schema(ibis_types)

        assert_equal(ischema, expected)
示例#2
0
def test_map_get_with_null_on_not_nullable(null_value):
    map_type = dt.Map(dt.string, dt.Int16(nullable=False))
    value = ibis.literal({'A': 1000, 'B': 2000}).cast(map_type)
    assert value.type() == map_type
    with pytest.raises(IbisTypeError):
        assert value.get('C', null_value)
示例#3
0
def sa_smallint(_, satype, nullable=True):
    return dt.Int16(nullable=nullable)
示例#4
0
def parse(text: str) -> DataType:
    @p.generate
    def datetime():
        yield dt.spaceless_string("datetime64", "datetime")
        timezone = yield parened_string.optional()
        return dt.Timestamp(timezone=timezone, nullable=False)

    primitive = (
        datetime
        | dt.spaceless_string("null", "nothing").result(dt.null)
        | dt.spaceless_string("bigint", "int64").result(
            dt.Int64(nullable=False))
        | dt.spaceless_string("double", "float64").result(
            dt.Float64(nullable=False))
        | dt.spaceless_string("float32", "float").result(
            dt.Float32(nullable=False))
        | dt.spaceless_string("smallint", "int16", "int2").result(
            dt.Int16(nullable=False))
        | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False))
        | dt.spaceless_string("time").result(dt.Time(nullable=False))
        | dt.spaceless_string("tinyint", "int8", "int1", "boolean",
                              "bool").result(dt.Int8(nullable=False))
        | dt.spaceless_string("integer", "int32", "int4", "int").result(
            dt.Int32(nullable=False))
        | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False))
        | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False))
        | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False))
        | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False))
        | dt.spaceless_string("uuid").result(dt.UUID(nullable=False))
        | dt.spaceless_string(
            "longtext",
            "mediumtext",
            "tinytext",
            "text",
            "longblob",
            "mediumblob",
            "tinyblob",
            "blob",
            "varchar",
            "char",
            "string",
        ).result(dt.String(nullable=False)))

    @p.generate
    def parened_string():
        yield dt.LPAREN
        s = yield dt.RAW_STRING
        yield dt.RPAREN
        return s

    @p.generate
    def nullable():
        yield dt.spaceless_string("nullable")
        yield dt.LPAREN
        parsed_ty = yield ty
        yield dt.RPAREN
        return parsed_ty(nullable=True)

    @p.generate
    def fixed_string():
        yield dt.spaceless_string("fixedstring")
        yield dt.LPAREN
        yield dt.NUMBER
        yield dt.RPAREN
        return dt.String(nullable=False)

    @p.generate
    def decimal():
        yield dt.spaceless_string("decimal", "numeric")
        precision, scale = yield dt.LPAREN.then(
            p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN)
        return dt.Decimal(precision, scale, nullable=False)

    @p.generate
    def paren_type():
        yield dt.LPAREN
        value_type = yield ty
        yield dt.RPAREN
        return value_type

    @p.generate
    def array():
        yield dt.spaceless_string("array")
        value_type = yield paren_type
        return dt.Array(value_type, nullable=False)

    @p.generate
    def map():
        yield dt.spaceless_string("map")
        yield dt.LPAREN
        key_type = yield ty
        yield dt.COMMA
        value_type = yield ty
        yield dt.RPAREN
        return dt.Map(key_type, value_type, nullable=False)

    at_least_one_space = p.regex(r"\s+")

    @p.generate
    def nested():
        yield dt.spaceless_string("nested")
        yield dt.LPAREN

        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space)),
            ty).combine(lambda field, ty:
                        (field, dt.Array(ty, nullable=False))).sep_by(
                            dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(field_names_types, nullable=False)

    @p.generate
    def struct():
        yield dt.spaceless_string("tuple")
        yield dt.LPAREN
        field_names_types = yield (p.seq(
            dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()),
            ty,
        ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA))
        yield dt.RPAREN
        return dt.Struct.from_tuples(
            [(field_name if field_name is not None else f"f{i:d}", typ)
             for i, (field_name, typ) in enumerate(field_names_types)],
            nullable=False,
        )

    ty = (nullable
          | nested
          | primitive
          | fixed_string
          | decimal
          | array
          | map
          | struct)
    return ty.parse(text)
示例#5
0
    sql_types = ["toFixedString('foo', 8) AS fixedstring_col"]
    if parse_version(con.version).base_version >= '1.1.54337':
        sql_types.append(
            "toDateTime('2018-07-02 00:00:00', 'UTC') AS datetime_col")
    sql = 'SELECT {}'.format(', '.join(sql_types))
    df = con.sql(sql).execute()
    assert df.fixedstring_col.dtype.name == 'object'
    if parse_version(con.version).base_version >= '1.1.54337':
        assert df.datetime_col.dtype.name == 'datetime64[ns]'


@pytest.mark.parametrize(
    ('ch_type', 'ibis_type'),
    [
        ('Array(Int8)', dt.Array(dt.Int8(nullable=False))),
        ('Array(Int16)', dt.Array(dt.Int16(nullable=False))),
        ('Array(Int32)', dt.Array(dt.Int32(nullable=False))),
        ('Array(Int64)', dt.Array(dt.Int64(nullable=False))),
        ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))),
        ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))),
        ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))),
        ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))),
        ('Array(Float32)', dt.Array(dt.Float32(nullable=False))),
        ('Array(Float64)', dt.Array(dt.Float64(nullable=False))),
        ('Array(String)', dt.Array(dt.String(nullable=False))),
        ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))),
        ('Array(Date)', dt.Array(dt.Date(nullable=False))),
        ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))),
        ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))),
        ('Array(Nothing)', dt.Array(dt.Null(nullable=False))),
        ('Array(Null)', dt.Array(dt.Null(nullable=False))),