示例#1
0
def _type_from_cursor_info(descr, field) -> dt.DataType:
    """Construct an ibis type from MySQL field descr and field result metadata.

    This method is complex because the MySQL protocol is complex.

    Types are not encoded in a self contained way, meaning you need
    multiple pieces of information coming from the result set metadata to
    determine the most precise type for a field. Even then, the decoding is
    not high fidelity in some cases: UUIDs for example are decoded as
    strings, because the protocol does not appear to preserve the logical
    type, only the physical type.
    """
    from pymysql.connections import TEXT_TYPES

    _, type_code, _, _, field_length, scale, _ = descr
    flags = _FieldFlags(field.flags)
    typename = _type_codes.get(type_code)
    if typename is None:
        raise NotImplementedError(
            f"MySQL type code {type_code:d} is not supported")

    typ = _type_mapping[typename]

    if typename in ("DECIMAL", "NEWDECIMAL"):
        precision = _decimal_length_to_precision(
            length=field_length,
            scale=scale,
            is_unsigned=flags.is_unsigned,
        )
        typ = partial(typ, precision=precision, scale=scale)
    elif typename == "BIT":
        if field_length <= 8:
            typ = dt.int8
        elif field_length <= 16:
            typ = dt.int16
        elif field_length <= 32:
            typ = dt.int32
        elif field_length <= 64:
            typ = dt.int64
        else:
            assert False, "invalid field length for BIT type"
    else:
        if flags.is_set:
            # sets are limited to strings
            typ = dt.Set(dt.string)
        elif flags.is_unsigned and flags.is_num:
            typ = getattr(dt, f"U{typ.__name__}")
        elif type_code in TEXT_TYPES:
            # binary text
            if field.charsetnr == MY_CHARSET_BIN:
                typ = dt.Binary
            else:
                typ = dt.String

    # projection columns are always nullable
    return typ(nullable=True)
示例#2
0
def test_scalar_parameter_set():
    value = ibis.param({dt.int64})

    assert isinstance(value.op(), ops.ScalarParameter)
    assert value.type().equals(dt.Set(dt.int64))
示例#3
0
def test_validate_type():
    assert dt.validate_type is dt.dtype


@pytest.mark.parametrize(
    ('spec', 'expected'),
    [
        ('ARRAY<DOUBLE>', dt.Array(dt.double)),
        ('array<array<string>>', dt.Array(dt.Array(dt.string))),
        ('map<string, double>', dt.Map(dt.string, dt.double)),
        (
            'map<int64, array<map<string, int8>>>',
            dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))),
        ),
        ('set<uint8>', dt.Set(dt.uint8)),
        ([dt.uint8], dt.Array(dt.uint8)),
        ([dt.float32, dt.float64], dt.Array(dt.float64)),
        ({dt.string}, dt.Set(dt.string)),
        ('point', dt.point),
        ('point;4326', dt.point),
        ('point;4326:geometry', dt.point),
        ('point;4326:geography', dt.point),
        ('linestring', dt.linestring),
        ('linestring;4326', dt.linestring),
        ('linestring;4326:geometry', dt.linestring),
        ('linestring;4326:geography', dt.linestring),
        ('polygon', dt.polygon),
        ('polygon;4326', dt.polygon),
        ('polygon;4326:geometry', dt.polygon),
        ('polygon;4326:geography', dt.polygon),
示例#4
0
    ("char(32)", dt.string),
    ("char byte", dt.binary),
    ("varchar(42)", dt.string),
    ("mediumtext", dt.string),
    ("text", dt.string),
    ("binary(42)", dt.binary),
    ("varbinary(42)", dt.binary),
    ("bit(1)", dt.int8),
    ("bit(9)", dt.int16),
    ("bit(17)", dt.int32),
    ("bit(33)", dt.int64),
    # mariadb doesn't have a distinct json type
    ("json", dt.string),
    ("enum('small', 'medium', 'large')", dt.string),
    ("inet6", dt.string),
    ("set('a', 'b', 'c', 'd')", dt.Set(dt.string)),
    ("mediumblob", dt.binary),
    ("blob", dt.binary),
    ("uuid", dt.string),
]


@pytest.mark.parametrize(
    ("mysql_type", "expected_type"),
    [
        param(mysql_type, ibis_type, id=mysql_type)
        for mysql_type, ibis_type in MYSQL_TYPES
    ],
)
def test_get_schema_from_query(con, mysql_type, expected_type):
    raw_name = ibis.util.guid()
示例#5
0
import ibis.expr.datatypes as dt
from ibis.common import IbisTypeError


def test_validate_type():
    assert dt.validate_type is dt.dtype


@pytest.mark.parametrize(
    ('spec', 'expected'),
    [('ARRAY<DOUBLE>', dt.Array(dt.double)),
     ('array<array<string>>', dt.Array(dt.Array(dt.string))),
     ('map<string, double>', dt.Map(dt.string, dt.double)),
     ('map<int64, array<map<string, int8>>>',
      dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))),
     ('set<uint8>', dt.Set(dt.uint8)), ([dt.uint8], dt.Array(dt.uint8)),
     ([dt.float32, dt.float64], dt.Array(dt.float64)),
     ({dt.string}, dt.Set(dt.string)), ('point', dt.point),
     ('point;4326', dt.point), ('point;4326:geometry', dt.point),
     ('point;4326:geography', dt.point), ('linestring', dt.linestring),
     ('linestring;4326', dt.linestring),
     ('linestring;4326:geometry', dt.linestring),
     ('linestring;4326:geography', dt.linestring), ('polygon', dt.polygon),
     ('polygon;4326', dt.polygon), ('polygon;4326:geometry', dt.polygon),
     ('polygon;4326:geography', dt.polygon), ('multipolygon', dt.multipolygon),
     ('multipolygon;4326', dt.multipolygon),
     ('multipolygon;4326:geometry', dt.multipolygon),
     ('multipolygon;4326:geography', dt.multipolygon)])
def test_dtype(spec, expected):
    assert dt.dtype(spec) == expected
示例#6
0
    "TIMESTAMP": lambda nullable: dt.Timestamp(
        timezone="UTC",
        nullable=nullable,
    ),
    "LONGLONG": dt.Int64,
    "INT24": dt.Int32,
    "DATE": dt.Date,
    "TIME": dt.Time,
    "DATETIME": dt.Timestamp,
    "YEAR": dt.Int16,
    "VARCHAR": dt.String,
    "BIT": dt.Int8,
    "JSON": dt.JSON,
    "NEWDECIMAL": dt.Decimal,
    "ENUM": dt.String,
    "SET": lambda nullable: dt.Set(dt.string, nullable=nullable),
    "TINY_BLOB": dt.Binary,
    "MEDIUM_BLOB": dt.Binary,
    "LONG_BLOB": dt.Binary,
    "BLOB": dt.Binary,
    "VAR_STRING": dt.String,
    "STRING": dt.String,
    "GEOMETRY": dt.Geometry,
}


class _FieldFlags:
    """Flags used to disambiguate field types.

    Gaps in the flag numbers are because we do not map in flags that are of no
    use in determining the field's type, such as whether the field is a primary
# import ibis_bigquery.datatypes
import pytest


@pytest.mark.parametrize(
    ("spec", "expected"),
    [
        ("ARRAY<DOUBLE>", dt.Array(dt.double)),
        ("array<array<string>>", dt.Array(dt.Array(dt.string))),
        ("map<string, double>", dt.Map(dt.string, dt.double)),
        (
            "map<int64, array<map<string, int8>>>",
            dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))),
        ),
        ("set<uint8>", dt.Set(dt.uint8)),
        ([dt.uint8], dt.Array(dt.uint8)),
        ([dt.float32, dt.float64], dt.Array(dt.float64)),
        ({dt.string}, dt.Set(dt.string)),
        ("geography", dt.Geography()),
        ("geography;4326", dt.Geography(srid=4326)),
        ("geography;2000", dt.Geography(srid=2000)),
        ("geometry", dt.Geometry()),
        ("point", dt.point),
        ("point;4326", dt.point),
        ("point;4326:geometry", dt.point),
        ("point;4326:geography", dt.point),
        ("linestring", dt.linestring),
        ("linestring;4326", dt.linestring),
        ("linestring;4326:geometry", dt.linestring),
        ("linestring;4326:geography", dt.linestring),