def test_string_col_is_unicode(backend, alltypes, df): dtype = alltypes.string_col.type() assert dtype == dt.String(nullable=dtype.nullable) is_text_type = lambda x: isinstance(x, str) # noqa: E731 assert df.string_col.map(is_text_type).all() result = alltypes.string_col.execute() assert result.map(is_text_type).all()
def test_kudu_schema_convert(self): spec = [ # name, type, is_nullable, is_primary_key ('a', dt.Int8(False), 'int8', False, True), ('b', dt.Int16(False), 'int16', False, True), ('c', dt.Int32(False), 'int32', False, False), ('d', dt.Int64(True), 'int64', True, False), ('e', dt.String(True), 'string', True, False), ('f', dt.Boolean(False), 'bool', False, False), ('g', dt.Float(False), 'float', False, False), ('h', dt.Double(True), 'double', True, False), # TODO # ('i', 'binary', False, False), ('j', dt.Timestamp(True), 'timestamp', True, False), ] builder = kudu.schema_builder() primary_keys = [] ibis_types = [] for name, itype, type_, is_nullable, is_primary_key in spec: builder.add_column(name, type_, nullable=is_nullable) if is_primary_key: primary_keys.append(name) ibis_types.append((name, itype)) builder.set_primary_keys(primary_keys) kschema = builder.build() ischema = ksupport.schema_kudu_to_ibis(kschema) expected = ibis.schema(ibis_types) assert_equal(ischema, expected)
def test_nullable_column_propagated(): t = ibis.table( [ ('a', dt.Int32(nullable=True)), ('b', dt.Int32(nullable=False)), ('c', dt.String(nullable=False)), ('d', dt.double), # nullable by default ('f', dt.Double(nullable=False)), ] ) assert t.a.type().nullable is True assert t.b.type().nullable is False assert t.c.type().nullable is False assert t.d.type().nullable is True assert t.f.type().nullable is False s = t.a + t.d assert s.type().nullable is True s = t.b + t.d assert s.type().nullable is True s = t.b + t.f assert s.type().nullable is False
def test_literal_none_to_nullable_colum(alltypes): # GH: 2985 t = alltypes nrows = t.count().execute() expr = t.mutate( ibis.literal(None, dt.String(nullable=True)).name('nullable_string_column')) result = expr['nullable_string_column'].execute() expected = pd.Series([None] * nrows, name='nullable_string_column') tm.assert_series_equal(result, expected)
def sa_string(_, satype, nullable=True): return dt.String(nullable=nullable)
@pytest.mark.parametrize(('to_type', 'expected'), [ ('int8', 'CAST(`double_col` AS Int8)'), ('int16', 'CAST(`double_col` AS Int16)'), ('float', 'CAST(`double_col` AS Float32)'), # alltypes.double_col is non-nullable (dt.Double(nullable=False), '`double_col`') ]) def test_cast_double_col(alltypes, translate, to_type, expected): expr = alltypes.double_col.cast(to_type) assert translate(expr) == expected @pytest.mark.parametrize(('to_type', 'expected'), [ ('int8', 'CAST(`string_col` AS Int8)'), ('int16', 'CAST(`string_col` AS Int16)'), (dt.String(nullable=False), '`string_col`'), ('timestamp', 'CAST(`string_col` AS DateTime)'), ('date', 'CAST(`string_col` AS Date)') ]) def test_cast_string_col(alltypes, translate, to_type, expected): expr = alltypes.string_col.cast(to_type) assert translate(expr) == expected @pytest.mark.xfail(raises=AssertionError, reason='Clickhouse doesn\'t have decimal type') def test_decimal_cast(): assert False @pytest.mark.parametrize('column', [
def test_nullable_non_nullable_field(): t = ibis.table([('a', dt.String(nullable=False))]) assert nullable(t.a.type()) == ()
def fixed_string(): yield dt.spaceless_string("fixedstring") yield dt.LPAREN yield dt.NUMBER yield dt.RPAREN return dt.String(nullable=False)
def parse(text: str) -> DataType: @p.generate def datetime(): yield dt.spaceless_string("datetime64", "datetime") timezone = yield parened_string.optional() return dt.Timestamp(timezone=timezone, nullable=False) primitive = ( datetime | dt.spaceless_string("null", "nothing").result(dt.null) | dt.spaceless_string("bigint", "int64").result( dt.Int64(nullable=False)) | dt.spaceless_string("double", "float64").result( dt.Float64(nullable=False)) | dt.spaceless_string("float32", "float").result( dt.Float32(nullable=False)) | dt.spaceless_string("smallint", "int16", "int2").result( dt.Int16(nullable=False)) | dt.spaceless_string("date32", "date").result(dt.Date(nullable=False)) | dt.spaceless_string("time").result(dt.Time(nullable=False)) | dt.spaceless_string("tinyint", "int8", "int1", "boolean", "bool").result(dt.Int8(nullable=False)) | dt.spaceless_string("integer", "int32", "int4", "int").result( dt.Int32(nullable=False)) | dt.spaceless_string("uint64").result(dt.UInt64(nullable=False)) | dt.spaceless_string("uint32").result(dt.UInt32(nullable=False)) | dt.spaceless_string("uint16").result(dt.UInt16(nullable=False)) | dt.spaceless_string("uint8").result(dt.UInt8(nullable=False)) | dt.spaceless_string("uuid").result(dt.UUID(nullable=False)) | dt.spaceless_string( "longtext", "mediumtext", "tinytext", "text", "longblob", "mediumblob", "tinyblob", "blob", "varchar", "char", "string", ).result(dt.String(nullable=False))) @p.generate def parened_string(): yield dt.LPAREN s = yield dt.RAW_STRING yield dt.RPAREN return s @p.generate def nullable(): yield dt.spaceless_string("nullable") yield dt.LPAREN parsed_ty = yield ty yield dt.RPAREN return parsed_ty(nullable=True) @p.generate def fixed_string(): yield dt.spaceless_string("fixedstring") yield dt.LPAREN yield dt.NUMBER yield dt.RPAREN return dt.String(nullable=False) @p.generate def decimal(): yield dt.spaceless_string("decimal", "numeric") precision, scale = yield dt.LPAREN.then( p.seq(dt.PRECISION.skip(dt.COMMA), dt.SCALE)).skip(dt.RPAREN) return dt.Decimal(precision, scale, nullable=False) @p.generate def paren_type(): yield dt.LPAREN value_type = yield ty yield dt.RPAREN return value_type @p.generate def array(): yield dt.spaceless_string("array") value_type = yield paren_type return dt.Array(value_type, nullable=False) @p.generate def map(): yield dt.spaceless_string("map") yield dt.LPAREN key_type = yield ty yield dt.COMMA value_type = yield ty yield dt.RPAREN return dt.Map(key_type, value_type, nullable=False) at_least_one_space = p.regex(r"\s+") @p.generate def nested(): yield dt.spaceless_string("nested") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space)), ty).combine(lambda field, ty: (field, dt.Array(ty, nullable=False))).sep_by( dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples(field_names_types, nullable=False) @p.generate def struct(): yield dt.spaceless_string("tuple") yield dt.LPAREN field_names_types = yield (p.seq( dt.SPACES.then(dt.FIELD.skip(at_least_one_space).optional()), ty, ).combine(lambda field, ty: (field, ty)).sep_by(dt.COMMA)) yield dt.RPAREN return dt.Struct.from_tuples( [(field_name if field_name is not None else f"f{i:d}", typ) for i, (field_name, typ) in enumerate(field_names_types)], nullable=False, ) ty = (nullable | nested | primitive | fixed_string | decimal | array | map | struct) return ty.parse(text)
@pytest.mark.parametrize( ('ch_type', 'ibis_type'), [ ('Array(Int8)', dt.Array(dt.Int8(nullable=False))), ('Array(Int16)', dt.Array(dt.Int16(nullable=False))), ('Array(Int32)', dt.Array(dt.Int32(nullable=False))), ('Array(Int64)', dt.Array(dt.Int64(nullable=False))), ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False))), ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False))), ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False))), ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False))), ('Array(Float32)', dt.Array(dt.Float32(nullable=False))), ('Array(Float64)', dt.Array(dt.Float64(nullable=False))), ('Array(String)', dt.Array(dt.String(nullable=False))), ('Array(FixedString(32))', dt.Array(dt.String(nullable=False))), ('Array(Date)', dt.Array(dt.Date(nullable=False))), ('Array(DateTime)', dt.Array(dt.Timestamp(nullable=False))), ('Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False))), ('Array(Nothing)', dt.Array(dt.Null(nullable=False))), ('Array(Null)', dt.Array(dt.Null(nullable=False))), ('Array(Array(Int8))', dt.Array(dt.Array(dt.Int8(nullable=False)))), ( 'Array(Array(Array(Int8)))', dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False)))), ), ( 'Array(Array(Array(Array(Int8))))', dt.Array(dt.Array(dt.Array(dt.Array(dt.Int8(nullable=False))))), ),
def test_string_col_is_unicode(backend, alltypes, df): dtype = alltypes.string_col.type() assert dtype == dt.String(nullable=dtype.nullable) assert df.string_col.map(is_text_type).all() result = alltypes.string_col.execute() assert result.map(is_text_type).all()
def sa_oracle_VARCHAR2(_, satype, nullable=True): return dt.String(nullable=nullable)
def sa_oracle_LONG(_, satype, nullable=True): return dt.String(nullable=nullable)
'CAST(`double_col` AS Float64)', id="float64", ), ], ) def test_cast_double_col(alltypes, translate, to_type, expected): expr = alltypes.double_col.cast(to_type) assert translate(expr) == expected @pytest.mark.parametrize( ('to_type', 'expected'), [ ('int8', 'CAST(`string_col` AS Nullable(Int8))'), ('int16', 'CAST(`string_col` AS Nullable(Int16))'), (dt.String(nullable=False), 'CAST(`string_col` AS String)'), ('timestamp', 'CAST(`string_col` AS Nullable(DateTime64(6)))'), ('date', 'CAST(`string_col` AS Nullable(Date))'), ( '!map<string, int64>', 'CAST(`string_col` AS Map(Nullable(String), Nullable(Int64)))', ), ( '!struct<a: string, b: int64>', ('CAST(`string_col` AS ' 'Tuple(a Nullable(String), b Nullable(Int64)))'), ), ], ) def test_cast_string_col(alltypes, translate, to_type, expected): expr = alltypes.string_col.cast(to_type)
def test_insert_with_more_columns(temporary_alltypes, df): temporary = temporary_alltypes records = df[:10].copy() records['non_existing_column'] = 'raise on me' with pytest.raises(AssertionError): temporary.insert(records) @pytest.mark.parametrize( ("query", "expected_schema"), [ ( "SELECT 1 as a, 2 + dummy as b", ibis.schema( dict(a=dt.UInt8(nullable=False), b=dt.UInt16(nullable=False))), ), ( "SELECT string_col, sum(double_col) as b FROM functional_alltypes GROUP BY string_col", # noqa: E501 ibis.schema( dict( string_col=dt.String(nullable=True), b=dt.Float64(nullable=True), )), ), ], ) def test_get_schema_using_query(con, query, expected_schema): result = con._get_schema_using_query(query) assert result == expected_schema
('Array(Int16)', dt.Array(dt.Int16(nullable=False), nullable=False)), ('Array(Int32)', dt.Array(dt.Int32(nullable=False), nullable=False)), ('Array(Int64)', dt.Array(dt.Int64(nullable=False), nullable=False)), ('Array(UInt8)', dt.Array(dt.UInt8(nullable=False), nullable=False)), ('Array(UInt16)', dt.Array(dt.UInt16(nullable=False), nullable=False)), ('Array(UInt32)', dt.Array(dt.UInt32(nullable=False), nullable=False)), ('Array(UInt64)', dt.Array(dt.UInt64(nullable=False), nullable=False)), ( 'Array(Float32)', dt.Array(dt.Float32(nullable=False), nullable=False), ), ( 'Array(Float64)', dt.Array(dt.Float64(nullable=False), nullable=False), ), ('Array(String)', dt.Array(dt.String(nullable=False), nullable=False)), ( 'Array(FixedString(32))', dt.Array(dt.String(nullable=False), nullable=False), ), ('Array(Date)', dt.Array(dt.Date(nullable=False), nullable=False)), ( 'Array(DateTime)', dt.Array(dt.Timestamp(nullable=False), nullable=False), ), ( 'Array(DateTime64)', dt.Array(dt.Timestamp(nullable=False), nullable=False), ), ('Array(Nothing)', dt.Array(dt.null, nullable=False)), ('Array(Null)', dt.Array(dt.null, nullable=False)),