def _can_cast_implicit(self, typename): if not isinstance(typename, dt.Map): return False self_type = self.type() return (super(MapValue, self)._can_cast_implicit(typename) or self_type.equals(dt.Map(dt.null, dt.null)) or self_type.equals(dt.Map(dt.any, dt.any)))
def test_struct_with_string_types(): result = dt.Struct.from_tuples([ ('a', 'map<double, string>'), ('b', 'array<map<string, array<int32>>>'), ('c', 'array<string>'), ('d', 'int8'), ]) assert result == dt.Struct.from_tuples([ ('a', dt.Map(dt.double, dt.string)), ('b', dt.Array(dt.Map(dt.string, dt.Array(dt.int32)))), ('c', dt.Array(dt.string)), ('d', dt.int8), ])
def higher_precedence(left, right): left_name = left.name.lower() right_name = right.name.lower() if (left_name in _SCALAR_TYPE_PRECEDENCE and right_name in _SCALAR_TYPE_PRECEDENCE): left_prec = _SCALAR_TYPE_PRECEDENCE[left_name] right_prec = _SCALAR_TYPE_PRECEDENCE[right_name] _, highest_type = max(((left_prec, left), (right_prec, right)), key=first) return highest_type # TODO(phillipc): Ensure that left and right are API compatible if isinstance(left, dt.Array): return dt.Array(higher_precedence(left.value_type, right.value_type)) if isinstance(left, dt.Map): return dt.Map(higher_precedence(left.key_type, right.key_type), higher_precedence(left.value_type, right.value_type)) if isinstance(left, dt.Struct): if left.names != right.names: raise TypeError('Struct names are not equal') return dt.Struct(left.names, list(map(higher_precedence, left.types, right.types))) raise TypeError('Cannot compute precedence for {} and {} types'.format( left, right))
def from_pyarrow_map(arrow_type: pa.MapType, nullable: bool = True) -> dt.DataType: return dt.Map( dt.dtype(arrow_type.key_type), dt.dtype(arrow_type.item_type), nullable=nullable, )
def map(): yield dt.spaceless_string("map") yield dt.LPAREN key_type = yield ty yield dt.COMMA value_type = yield ty yield dt.RPAREN return dt.Map(key_type, value_type, nullable=False)
def test_complex_datatype_parse(benchmark): type_str = "array<struct<a: array<string>, b: map<string, array<int64>>>>" expected = dt.Array( dt.Struct.from_dict( dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64))))) assert dt.parse(type_str) == expected benchmark(dt.parse, type_str)
def infer_literal_type(value): import ibis.expr.rules as rules if value is None or value is null: return dt.null elif isinstance(value, bool): return dt.boolean elif isinstance(value, compat.integer_types): return rules.int_literal_class(value) elif isinstance(value, float): return dt.double elif isinstance(value, six.string_types): return dt.string elif isinstance(value, datetime.timedelta): return dt.interval elif isinstance(value, datetime.datetime): return dt.timestamp elif isinstance(value, datetime.date): return dt.date elif isinstance(value, datetime.time): return dt.time elif isinstance(value, list): if not value: return dt.Array(dt.null) return dt.Array( rules.highest_precedence_type(list(map(literal, value)))) elif isinstance(value, collections.OrderedDict): if not value: raise TypeError('Empty struct type not supported') return dt.Struct( list(value.keys()), [literal(element).type() for element in value.values()], ) elif isinstance(value, dict): if not value: return dt.Map(dt.null, dt.null) return dt.Map( rules.highest_precedence_type(list(map(literal, value.keys()))), rules.highest_precedence_type(list(map(literal, value.values()))), ) raise com.InputTypeError(value)
import ibis import ibis.expr.datatypes as dt from ibis.common.exceptions import IbisTypeError def test_validate_type(): assert dt.validate_type is dt.dtype @pytest.mark.parametrize( ('spec', 'expected'), [ ('ARRAY<DOUBLE>', dt.Array(dt.double)), ('array<array<string>>', dt.Array(dt.Array(dt.string))), ('map<string, double>', dt.Map(dt.string, dt.double)), ( 'map<int64, array<map<string, int8>>>', dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))), ), ('set<uint8>', dt.Set(dt.uint8)), ([dt.uint8], dt.Array(dt.uint8)), ([dt.float32, dt.float64], dt.Array(dt.float64)), ({dt.string}, dt.Set(dt.string)), ('point', dt.point), ('point;4326', dt.point), ('point;4326:geometry', dt.point), ('point;4326:geography', dt.point), ('linestring', dt.linestring), ('linestring;4326', dt.linestring), ('linestring;4326:geometry', dt.linestring),
[ pytest.param(dt.int8, 26, ibis.literal(26)), pytest.param(dt.int16, 26, ibis.literal(26)), pytest.param(dt.int32, 26, ibis.literal(26)), pytest.param(dt.int64, 26, ibis.literal(26)), pytest.param(dt.uint8, 26, ibis.literal(26)), pytest.param(dt.uint16, 26, ibis.literal(26)), pytest.param(dt.uint32, 26, ibis.literal(26)), pytest.param(dt.uint64, 26, ibis.literal(26)), pytest.param(dt.float32, 26, ibis.literal(26)), pytest.param(dt.float64, 26.4, ibis.literal(26.4)), pytest.param(dt.double, 26.3, ibis.literal(26.3)), pytest.param(dt.string, 'bar', ibis.literal('bar')), pytest.param(dt.Array(dt.float), [3.4, 5.6], ibis.literal([3.4, 5.6])), pytest.param( dt.Map(dt.string, dt.Array(dt.boolean)), { 'a': [True, False], 'b': [True] }, ibis.literal({ 'a': [True, False], 'b': [True] }), id='map_literal', ), ], ) def test_valid_value(dtype, value, expected): result = rlz.value(dtype, value) assert result.equals(expected)
def test_nested_map(): assert dt.validate_type('map<int64, array<map<string, int8>>>') == dt.Map( dt.int64, dt.Array(dt.Map(dt.string, dt.int8)))
def test_nested_map(): expected = dt.Map(dt.int64, dt.Array(dt.Map(dt.string, dt.int8))) assert dt.dtype('map<int64, array<map<string, int8>>>') == expected
C=dt.Timestamp('UTC'), D=dt.Timestamp('UTC'), E=dt.int8, F=dt.int8, G=dt.uint64, H=dt.uint32, I=dt.uint16, J=dt.uint8, K=dt.uuid, L=dt.string, M=dt.string, N=dt.string, O=dt.string, P=dt.string, Q=dt.Array(dt.int32), R=dt.Map(dt.string, dt.int64), S=dt.Struct.from_dict( dict( a=dt.int32, b=dt.string, c=dt.Array(dt.Map(dt.string, dt.Array(dt.float64))), ) ), ) @pytest.mark.parametrize( ("column", "type"), [ param(colname, type, id=type.lower()) for colname, type in [
def spark_map_dtype_to_ibis_dtype(spark_type_obj, nullable=True): key_type = dt.dtype(spark_type_obj.keyType) value_type = dt.dtype(spark_type_obj.valueType, nullable=spark_type_obj.valueContainsNull) return dt.Map(key_type, value_type, nullable=nullable)
def test_complex_datatype_builtins(benchmark, func): datatype = dt.Array( dt.Struct.from_dict( dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64))))) benchmark(func, datatype)
[ pytest.param( [ obj for _, obj in inspect.getmembers( dt, lambda obj: isinstance(obj, dt.DataType), ) ], id="singletons", ), pytest.param( dt.Array( dt.Struct.from_dict( dict( a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)), ))), id="complex", ), ], ) def test_eq_datatypes(benchmark, dtypes): def eq(a, b): assert a == b benchmark(eq, dtypes, copy.deepcopy(dtypes)) def multiple_joins(table, num_joins): for _ in range(num_joins): table = table.mutate(dummy=ibis.literal(""))
def _validate(self, args, i): arg = super(MapValueTyped, self)._validate(args, i) type, = self.types if arg.type().equals(dt.Map(dt.any, dt.any)): return arg.cast(type) return arg
def __init__(self, key_type, value_type, *args, **kwargs): super(MapValueTyped, self).__init__(dt.Map(key_type, value_type), *args, **kwargs)
def test_scalar_param_map(backend, con): value = {'a': 'ghi', 'b': 'def', 'c': 'abc'} param = ibis.param(dt.Map(dt.string, dt.string)) result = con.execute(param['b'], params={param: value}) assert result == value['b']
def test_array_with_string_value_type(): assert dt.Array('int32') == dt.Array(dt.int32) assert dt.Array(dt.Array('array<map<string, double>>')) == (dt.Array( dt.Array(dt.Array(dt.Map(dt.string, dt.double)))))
def test_map_with_string_value_type(): assert dt.Map('int32', 'double') == dt.Map(dt.int32, dt.double) assert dt.Map('int32', 'array<double>') == dt.Map(dt.int32, dt.Array(dt.double))
('dtype', 'value', 'expected'), [ pytest.param(dt.int8, 26, ibis.literal(26)), pytest.param(dt.int16, 26, ibis.literal(26)), pytest.param(dt.int32, 26, ibis.literal(26)), pytest.param(dt.int64, 26, ibis.literal(26)), pytest.param(dt.uint8, 26, ibis.literal(26)), pytest.param(dt.uint16, 26, ibis.literal(26)), pytest.param(dt.uint32, 26, ibis.literal(26)), pytest.param(dt.uint64, 26, ibis.literal(26)), pytest.param(dt.float32, 26, ibis.literal(26)), pytest.param(dt.float64, 26.4, ibis.literal(26.4)), pytest.param(dt.double, 26.3, ibis.literal(26.3)), pytest.param(dt.string, 'bar', ibis.literal('bar')), pytest.param(dt.Array(dt.float), [3.4, 5.6], ibis.literal([3.4, 5.6])), pytest.param(dt.Map(dt.string, dt.Array(dt.boolean)), { 'a': [True, False], 'b': [True] }, ibis.literal({ 'a': [True, False], 'b': [True] }), id='map_literal'), ], ) def test_valid_value(dtype, value, expected): result = rlz.value(dtype, value) assert result.equals(expected)
def df3(npartitions): pandas_df = pd.DataFrame({ 'key': list('ac'), 'other_value': [4.0, 6.0], 'key2': list('ae'), 'key3': list('fe'), }) return dd.from_pandas(pandas_df, npartitions=npartitions) t_schema = { 'decimal': dt.Decimal(4, 3), 'array_of_float64': dt.Array(dt.double), 'array_of_int64': dt.Array(dt.int64), 'array_of_strings': dt.Array(dt.string), 'map_of_strings_integers': dt.Map(dt.string, dt.int64), 'map_of_integers_strings': dt.Map(dt.int64, dt.string), 'map_of_complex_values': dt.Map(dt.string, dt.Array(dt.int64)), } @pytest.fixture(scope='module') def t(client): return client.table('df', schema=t_schema) @pytest.fixture(scope='module') def lahman(batting_df, awards_players_df): return connect({ 'batting': batting_df, 'awards_players': awards_players_df
def test_map_get_with_compatible_value_bigger(): value = ibis.literal({'A': 1, 'B': 2}) expr = value.get('C', 3000) assert value.type() == dt.Map(dt.string, dt.int8) assert expr.type() == dt.int16
decimal = value(dt.Decimal) floating = value(dt.float64) date = value(dt.date) time = value(dt.time) timestamp = value(dt.Timestamp) category = value(dt.category) temporal = one_of([timestamp, date, time]) strict_numeric = one_of([integer, floating, decimal]) soft_numeric = one_of([integer, floating, decimal, boolean]) numeric = soft_numeric set_ = value(dt.Set) array = value(dt.Array) struct = value(dt.Struct) mapping = value(dt.Map(dt.any, dt.any)) geospatial = value(dt.GeoSpatial) point = value(dt.Point) linestring = value(dt.LineString) polygon = value(dt.Polygon) multilinestring = value(dt.MultiLineString) multipoint = value(dt.MultiPoint) multipolygon = value(dt.MultiPolygon) @validator def interval(arg, units=None): arg = value(dt.Interval, arg) unit = arg.type().unit if units is not None and unit not in units:
# pandas types (pd.Timestamp('2015-01-01 12:00:00', tz='US/Eastern'), dt.Timestamp('US/Eastern')), # parametric types (list('abc'), dt.Array(dt.string)), ([1, 2, 3], dt.Array(dt.int8)), ([1, 128], dt.Array(dt.int16)), ([1, 128, 32768], dt.Array(dt.int32)), ([1, 128, 32768, 2147483648], dt.Array(dt.int64)), ({ 'a': 1, 'b': 2, 'c': 3 }, dt.Map(dt.string, dt.int8)), ({ 1: 2, 3: 4, 5: 6 }, dt.Map(dt.int8, dt.int8)), ({ 'a': [1.0, 2.0], 'b': [], 'c': [3.0] }, dt.Map(dt.string, dt.Array(dt.double))), (OrderedDict([('a', 1), ('b', list('abc')), ('c', OrderedDict([('foo', [1.0, 2.0])]))]), dt.Struct.from_tuples( [('a', dt.int8), ('b', dt.Array(dt.string)), ('c', dt.Struct.from_tuples([('foo', dt.Array(dt.double))]))]))
def test_map_get_with_compatible_value_smaller(): value = ibis.literal({'A': 1000, 'B': 2000}) expr = value.get('C', 3) assert value.type() == dt.Map(dt.string, dt.int16) assert expr.type() == dt.int16
def test_whole_schema(): customers = ibis.table( [('cid', 'int64'), ('mktsegment', 'string'), ('address', ('struct<city: string, street: string, ' 'street_number: int32, zip: int16>')), ('phone_numbers', 'array<string>'), ('orders', """array<struct< oid: int64, status: string, totalprice: decimal(12, 2), order_date: string, items: array<struct< iid: int64, name: string, price: decimal(12, 2), discount_perc: decimal(12, 2), shipdate: string >> >>"""), ('web_visits', ('map<string, struct<user_agent: string, ' 'client_ip: string, visit_date: string, ' 'duration_ms: int32>>')), ('support_calls', ('array<struct<agent_id: int64, ' 'call_date: string, duration_ms: int64, ' 'issue_resolved: boolean, ' 'agent_comment: string>>'))], name='customers', ) expected = ibis.Schema.from_tuples([ ('cid', dt.int64), ('mktsegment', dt.string), ( 'address', dt.Struct.from_tuples([('city', dt.string), ('street', dt.string), ('street_number', dt.int32), ('zip', dt.int16)]), ), ('phone_numbers', dt.Array(dt.string)), ('orders', dt.Array( dt.Struct.from_tuples([('oid', dt.int64), ('status', dt.string), ('totalprice', dt.Decimal(12, 2)), ('order_date', dt.string), ('items', dt.Array( dt.Struct.from_tuples([ ('iid', dt.int64), ('name', dt.string), ('price', dt.Decimal(12, 2)), ('discount_perc', dt.Decimal( 12, 2)), ('shipdate', dt.string), ])))]))), ('web_visits', dt.Map( dt.string, dt.Struct.from_tuples([ ('user_agent', dt.string), ('client_ip', dt.string), ('visit_date', dt.string), ('duration_ms', dt.int32), ]))), ('support_calls', dt.Array( dt.Struct.from_tuples([('agent_id', dt.int64), ('call_date', dt.string), ('duration_ms', dt.int64), ('issue_resolved', dt.boolean), ('agent_comment', dt.string)]))), ], ) assert customers.schema() == expected
def test_map_get_with_null_on_not_nullable(null_value): map_type = dt.Map(dt.string, dt.Int16(nullable=False)) value = ibis.literal({'A': 1000, 'B': 2000}).cast(map_type) assert value.type() == map_type with pytest.raises(IbisTypeError): assert value.get('C', null_value)
def test_map(): assert dt.validate_type('map<string, double>') == dt.Map( dt.string, dt.double)