def decode(cls, encoding, spec, df): return cls(df, spec['expression'], encoding.decode('dtype', spec['dtype']), encoding.decode('dtype', spec['dtype_item']), flatten=spec['flatten'], unique_limit=spec['unique_limit'], selection=spec['selection'])
def decode(cls, encoding, spec, df, nthreads): return cls(df, spec['expression'], encoding.decode('dtype', spec['dtype']), encoding.decode('dtype', spec['dtype_item']), flatten=spec['flatten'], unique_limit=spec['unique_limit'], selection=spec['selection'], return_inverse=spec['return_inverse'], nthreads=nthreads)
def decode(cls, encoding, spec, df): spec = spec.copy() spec['op'] = encoding.decode('_op', spec['op']) spec['dtype'] = encoding.decode('dtype', spec['dtype']) selection_waslist = spec.pop('selection_waslist') if selection_waslist: spec['selection'] = spec.pop('selections') else: spec['selection'] = spec.pop('selections')[0] spec['limits'] = list(zip(spec.pop('minima'), spec.pop('maxima'))) return cls(df, **spec)
def decode(cls, encoding, spec, df): grid = encoding.decode('grid', spec['grid']) task = cls(df, grid) aggs = encoding.decode_list('aggregation', spec['aggregations']) for agg in aggs: agg._prepare_types(df) task.add_aggregation_operation(agg) return task
def test_encoding(): encoding = vaex.encoding.Encoding() data = encoding.encode('blobtest', {'someblob': b'1234'}) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) values = encoding.decode('blobtest', data) assert values['someblob'] == b'1234'
def decode(cls, encoding, spec, df): # aggs = [vaex.agg._from_spec(agg_spec) for agg_spec in spec['aggregations']] aggs = encoding.decode_list('aggregation', spec['aggregations']) dtypes = encoding.decode_dict('dtype', spec['dtypes']) grid = encoding.decode('grid', spec['grid']) # dtypes = {expr: _deserialize_type(type_spec) for expr, type_spec in spec['dtypes'].items()} for agg in aggs: agg._prepare_types(df) return cls(df, grid, aggs, dtypes)
def rebuild_dataframe_vaex(df): # encoding and decode encoding = vaex.encoding.Encoding() data = encoding.encode('dataframe', df) blob = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(blob, encoding) return encoding.decode('dataframe', data)
def rebuild_dataset_vaex(ds): # encoding and decode encoding = vaex.encoding.Encoding() data = encoding.encode('dataset', ds) blob = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(blob, encoding) return encoding.decode('dataset', data)
def test_encoding_numpy(): x = np.arange(10, dtype='>f4') encoding = vaex.encoding.Encoding() data = encoding.encode('ndarray', x) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) value = encoding.decode('ndarray', data) assert np.all(value == x)
def test_encoding_numpy_datetime(): x = np.arange('2001', '2005', dtype='M') encoding = vaex.encoding.Encoding() data = encoding.encode('ndarray', x) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) value = encoding.decode('ndarray', data) assert np.all(value == x)
def test_encoding_numpy_string_objects(): x = np.array(['vaex', 'is', None, 'fast']) encoding = vaex.encoding.Encoding() data = encoding.encode('ndarray', x) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) value = encoding.decode('ndarray', data) assert np.all(value == x)
def test_encoding_arrow(array_factory_arrow): x = array_factory_arrow(np.arange(10, dtype='f4')) encoding = vaex.encoding.Encoding() data = encoding.encode('arrow-array', x) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) value = encoding.decode('arrow-array', data) assert value.to_pylist() == x.to_pylist()
def test_encoding_dtype(): dtype = np.dtype('>f8') encoding = vaex.encoding.Encoding() data = encoding.encode('dtype', dtype) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) print(data) value = encoding.decode('dtype', data) assert value == dtype assert value.is_numpy
def decode(encoding, binner_spec, nthreads): type = binner_spec['binner-type'] dtype = encoding.decode('dtype', binner_spec['dtype']) if type == 'ordinal': cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerOrdinal_", dtype) return cls(nthreads, binner_spec['expression'], binner_spec['count'], binner_spec['minimum'], False) elif type == 'scalar': cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerScalar_", dtype) return cls(nthreads, binner_spec["expression"], binner_spec["minimum"], binner_spec["maximum"], binner_spec["count"]) elif type == "hash": cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerHash_", dtype) hash_map_unique_id = binner_spec["hash_map_unique"] if encoding.has_object(hash_map_unique_id): hash_map_unique = encoding.get_object(hash_map_unique_id) else: hash_map_unique_spec = encoding.get_object_spec(hash_map_unique_id) hash_map_unique = encoding.decode("hash-map-unique", hash_map_unique_spec) encoding.set_object(hash_map_unique_id, hash_map_unique) return cls(binner_spec["expression"], hash_map_unique._internal) else: raise ValueError('Cannot deserialize: %r' % binner_spec)
def create_task_part(): nonlocal memory_usage task_part = encoding.decode('task-part-cpu', spec, df=task.df, nthreads=nthreads) memory_usage += task_part.memory_usage() for task_checker in task_checkers: task_checker.add_task(task) if task.requires_fingerprint: task_part.fingerprint = task.fingerprint() return task_part
def test_encoding_numpy_masked(): x = np.arange(10, dtype='>f4') mask = x > 4 x = np.ma.array(x, mask=mask) encoding = vaex.encoding.Encoding() data = encoding.encode('ndarray', x) wiredata = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() data = vaex.encoding.deserialize(wiredata, encoding) value = encoding.decode('ndarray', data) assert np.all(value == x) assert np.all(value.mask == x.mask)
def rebuild_with_skip(ds, skip): repr(ds) # for coverage # encoding and decode encoding = vaex.encoding.Encoding() encoding.set_object_spec(skip.id, None) # this will cause it to skip serialization data = encoding.encode('dataset', ds) assert encoding._object_specs[skip.id] is None del encoding._object_specs[skip.id] blob = vaex.encoding.serialize(data, encoding) encoding = vaex.encoding.Encoding() encoding.set_object(skip.id, skip) data = vaex.encoding.deserialize(blob, encoding) return encoding.decode('dataset', data)
def decode(encoding, binner_spec): type = binner_spec['binner-type'] dtype = encoding.decode('dtype', binner_spec['dtype']) if type == 'ordinal': cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerOrdinal_", dtype) return cls(binner_spec['expression'], binner_spec['count'], binner_spec['minimum']) elif type == 'scalar': cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerScalar_", dtype) return cls(binner_spec['expression'], binner_spec['minimum'], binner_spec['maximum'], binner_spec['count']) else: raise ValueError('Cannot deserialize: %r' % binner_spec)
def decode(cls, encoding, spec, df): binners = tuple(encoding.decode_list('binner', spec['binners'])) agg = encoding.decode('aggregation', spec['aggregation']) task = cls(df, binners, agg) return task
def decode(cls, encoding, spec, df, nthreads): spec = spec.copy() spec['op'] = encoding.decode('_op', spec['op']) spec['dtype'] = encoding.decode('dtype', spec['dtype']) return cls(df, **spec)
def decode(cls, encoding, spec, df): spec = spec.copy() del spec['task'] spec['op'] = encoding.decode('_op', spec['op']) spec['dtype'] = encoding.decode('dtype', spec['dtype']) return cls(df, **spec)
def create_task_part(): return encoding.decode('task-part-cpu', spec, df=run.df)