def _type_from_cursor_info(descr, field) -> dt.DataType: """Construct an ibis type from MySQL field descr and field result metadata. This method is complex because the MySQL protocol is complex. Types are not encoded in a self contained way, meaning you need multiple pieces of information coming from the result set metadata to determine the most precise type for a field. Even then, the decoding is not high fidelity in some cases: UUIDs for example are decoded as strings, because the protocol does not appear to preserve the logical type, only the physical type. """ from pymysql.connections import TEXT_TYPES _, type_code, _, _, field_length, scale, _ = descr flags = _FieldFlags(field.flags) typename = _type_codes.get(type_code) if typename is None: raise NotImplementedError( f"MySQL type code {type_code:d} is not supported") typ = _type_mapping[typename] if typename in ("DECIMAL", "NEWDECIMAL"): precision = _decimal_length_to_precision( length=field_length, scale=scale, is_unsigned=flags.is_unsigned, ) typ = partial(typ, precision=precision, scale=scale) elif typename == "BIT": if field_length <= 8: typ = dt.int8 elif field_length <= 16: typ = dt.int16 elif field_length <= 32: typ = dt.int32 elif field_length <= 64: typ = dt.int64 else: assert False, "invalid field length for BIT type" else: if flags.is_set: # sets are limited to strings typ = dt.Set(dt.string) elif flags.is_unsigned and flags.is_num: typ = getattr(dt, f"U{typ.__name__}") elif type_code in TEXT_TYPES: # binary text if field.charsetnr == MY_CHARSET_BIN: typ = dt.Binary else: typ = dt.String # projection columns are always nullable return typ(nullable=True)
def test_scalar_parameter_set(): value = ibis.param({dt.int64}) assert isinstance(value.op(), ops.ScalarParameter) assert value.type().equals(dt.Set(dt.int64))
def test_validate_type(): assert dt.validate_type is dt.dtype @pytest.mark.parametrize( ('spec', 'expected'), [ ('ARRAY<DOUBLE>', dt.Array(dt.double)), ('array<array<string>>', dt.Array(dt.Array(dt.string))), ('map<string, double>', dt.Map(dt.string, dt.double)), ( 'map<int64, array<map<string, int8>>>', dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))), ), ('set<uint8>', dt.Set(dt.uint8)), ([dt.uint8], dt.Array(dt.uint8)), ([dt.float32, dt.float64], dt.Array(dt.float64)), ({dt.string}, dt.Set(dt.string)), ('point', dt.point), ('point;4326', dt.point), ('point;4326:geometry', dt.point), ('point;4326:geography', dt.point), ('linestring', dt.linestring), ('linestring;4326', dt.linestring), ('linestring;4326:geometry', dt.linestring), ('linestring;4326:geography', dt.linestring), ('polygon', dt.polygon), ('polygon;4326', dt.polygon), ('polygon;4326:geometry', dt.polygon), ('polygon;4326:geography', dt.polygon),
("char(32)", dt.string), ("char byte", dt.binary), ("varchar(42)", dt.string), ("mediumtext", dt.string), ("text", dt.string), ("binary(42)", dt.binary), ("varbinary(42)", dt.binary), ("bit(1)", dt.int8), ("bit(9)", dt.int16), ("bit(17)", dt.int32), ("bit(33)", dt.int64), # mariadb doesn't have a distinct json type ("json", dt.string), ("enum('small', 'medium', 'large')", dt.string), ("inet6", dt.string), ("set('a', 'b', 'c', 'd')", dt.Set(dt.string)), ("mediumblob", dt.binary), ("blob", dt.binary), ("uuid", dt.string), ] @pytest.mark.parametrize( ("mysql_type", "expected_type"), [ param(mysql_type, ibis_type, id=mysql_type) for mysql_type, ibis_type in MYSQL_TYPES ], ) def test_get_schema_from_query(con, mysql_type, expected_type): raw_name = ibis.util.guid()
import ibis.expr.datatypes as dt from ibis.common import IbisTypeError def test_validate_type(): assert dt.validate_type is dt.dtype @pytest.mark.parametrize( ('spec', 'expected'), [('ARRAY<DOUBLE>', dt.Array(dt.double)), ('array<array<string>>', dt.Array(dt.Array(dt.string))), ('map<string, double>', dt.Map(dt.string, dt.double)), ('map<int64, array<map<string, int8>>>', dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))), ('set<uint8>', dt.Set(dt.uint8)), ([dt.uint8], dt.Array(dt.uint8)), ([dt.float32, dt.float64], dt.Array(dt.float64)), ({dt.string}, dt.Set(dt.string)), ('point', dt.point), ('point;4326', dt.point), ('point;4326:geometry', dt.point), ('point;4326:geography', dt.point), ('linestring', dt.linestring), ('linestring;4326', dt.linestring), ('linestring;4326:geometry', dt.linestring), ('linestring;4326:geography', dt.linestring), ('polygon', dt.polygon), ('polygon;4326', dt.polygon), ('polygon;4326:geometry', dt.polygon), ('polygon;4326:geography', dt.polygon), ('multipolygon', dt.multipolygon), ('multipolygon;4326', dt.multipolygon), ('multipolygon;4326:geometry', dt.multipolygon), ('multipolygon;4326:geography', dt.multipolygon)]) def test_dtype(spec, expected): assert dt.dtype(spec) == expected
"TIMESTAMP": lambda nullable: dt.Timestamp( timezone="UTC", nullable=nullable, ), "LONGLONG": dt.Int64, "INT24": dt.Int32, "DATE": dt.Date, "TIME": dt.Time, "DATETIME": dt.Timestamp, "YEAR": dt.Int16, "VARCHAR": dt.String, "BIT": dt.Int8, "JSON": dt.JSON, "NEWDECIMAL": dt.Decimal, "ENUM": dt.String, "SET": lambda nullable: dt.Set(dt.string, nullable=nullable), "TINY_BLOB": dt.Binary, "MEDIUM_BLOB": dt.Binary, "LONG_BLOB": dt.Binary, "BLOB": dt.Binary, "VAR_STRING": dt.String, "STRING": dt.String, "GEOMETRY": dt.Geometry, } class _FieldFlags: """Flags used to disambiguate field types. Gaps in the flag numbers are because we do not map in flags that are of no use in determining the field's type, such as whether the field is a primary
# import ibis_bigquery.datatypes import pytest @pytest.mark.parametrize( ("spec", "expected"), [ ("ARRAY<DOUBLE>", dt.Array(dt.double)), ("array<array<string>>", dt.Array(dt.Array(dt.string))), ("map<string, double>", dt.Map(dt.string, dt.double)), ( "map<int64, array<map<string, int8>>>", dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))), ), ("set<uint8>", dt.Set(dt.uint8)), ([dt.uint8], dt.Array(dt.uint8)), ([dt.float32, dt.float64], dt.Array(dt.float64)), ({dt.string}, dt.Set(dt.string)), ("geography", dt.Geography()), ("geography;4326", dt.Geography(srid=4326)), ("geography;2000", dt.Geography(srid=2000)), ("geometry", dt.Geometry()), ("point", dt.point), ("point;4326", dt.point), ("point;4326:geometry", dt.point), ("point;4326:geography", dt.point), ("linestring", dt.linestring), ("linestring;4326", dt.linestring), ("linestring;4326:geometry", dt.linestring), ("linestring;4326:geography", dt.linestring),