def test_compression_takes_advantage_of_itemsize(): pytest.importorskip('lz4') blosc = pytest.importorskip('blosc') x = np.arange(1000000, dtype='i8') assert (len(blosc.compress(x.data, typesize=8)) < len(blosc.compress(x.data, typesize=1))) _, a = serialize(x) aa = [maybe_compress(frame)[1] for frame in a] _, b = serialize(x.view('u1')) bb = [maybe_compress(frame)[1] for frame in b] assert sum(map(nbytes, aa)) < sum(map(nbytes, bb))
def serialize_numpy_ndarray(x): if x.dtype.hasobject: header = {'pickle': True} frames = [pickle.dumps(x)] return header, frames size = itemsize(x.dtype) if x.dtype.kind == 'V': dt = x.dtype.descr else: dt = x.dtype.str x = np.ascontiguousarray(x) # cannot get .data attribute from discontiguous header = {'dtype': dt, 'strides': x.strides, 'shape': x.shape} data = x.view('u1').data if blosc: frames = frame_split_size([data]) if sys.version_info.major == 2: frames = [ensure_bytes(frame) for frame in frames] frames = [blosc.compress(frame, typesize=size, cname='lz4', clevel=5) for frame in frames] header['compression'] = ['blosc'] * len(frames) else: frames = [data] header['lengths'] = [x.nbytes] return header, frames
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if needs_i8_conversion(dtype): values = values.view('i8') v = values.ravel() # convert object if dtype == np.object_: return v.tolist() if compressor == 'zlib': # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return zlib.compress(v) elif compressor == 'blosc' and _BLOSC: # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return blosc.compress(v, typesize=dtype.itemsize) # ndarray (on original dtype) return v.tostring()
def recv_loop(self): last_pos = None np_array = self.sharedmem_stream['shared_array'].to_numpy_array() half_size = np_array.shape[1]/2 while self.running: message = self.recv_socket.recv() abs_pos = msgpack.loads(message) if last_pos == None: last_pos = abs_pos continue new = (abs_pos-last_pos) if new>half_size: new = half_size head = abs_pos%half_size+half_size tail = head - new chunk = np_array[self.channel_mask, tail:head].transpose() if self.compress is None: buf = chunk elif self.compress is 'blosc': buf = blosc.compress(chunk.tostring(), typesize = chunk.dtype.itemsize, clevel= 9) #~ print 'sended', abs_pos, len(chunk.tostring()), len(chunk.tostring()), chunk.shape self.send_socket.send_multipart([msgpack.dumps(abs_pos), buf ])#, flags = zmq.NOBLOCK) last_pos = abs_pos
def setUp(self): """ Copy params from the Layer setUpClass """ # Setup config self.kvio_config = self.layer.kvio_config self.state_config = self.layer.state_config self.object_store_config = self.layer.object_store_config self.user = self.layer.user # Log Django User in self.client.force_login(self.user) if not self.test_data_loaded: # Flush cache between tests client = redis.StrictRedis(host=self.kvio_config['cache_host'], port=6379, db=1, decode_responses=False) client.flushdb() client = redis.StrictRedis(host=self.state_config['cache_state_host'], port=6379, db=1, decode_responses=False) client.flushdb() # load some data for reading self.test_data_8 = np.random.randint(1, 254, (16, 1024, 1024), dtype=np.uint8) bb = blosc.compress(self.test_data_8, typesize=8) # Post data to the database factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/channel1/0/0:1024/0:1024/0:16/', bb, content_type='application/blosc') force_authenticate(request, user=self.user) _ = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel1', resolution='0', x_range='0:1024', y_range='0:1024', z_range='0:16', t_range=None) self.test_data_loaded = True
def test_bitshuffle_not_multiple(self): # Check the fix for #133 x = numpy.ones(27266, dtype='uint8') xx = x.tobytes() zxx = blosc.compress(xx, typesize=8, shuffle=blosc.BITSHUFFLE) last_xx = blosc.decompress(zxx)[-3:] self.assertEqual(last_xx, b'\x01\x01\x01')
def convert(values): """ convert the numpy values to a list """ dtype = values.dtype if needs_i8_conversion(dtype): values = values.view("i8") v = values.ravel() if compressor == "zlib": # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return zlib.compress(v) elif compressor == "blosc" and _BLOSC: # return string arrays like they are if dtype == np.object_: return v.tolist() # convert to a bytes array v = v.tostring() return blosc.compress(v, typesize=dtype.itemsize) # ndarray (on original dtype) if dtype == "float64" or dtype == "int64": return v # as a list return v.tolist()
def test_get_object_bounding_box_single_cuboid(self): """ Test getting the bounding box of a object""" test_mat = np.ones((128, 128, 16)) test_mat[0:128, 0:128, 0:16] = 4 test_mat = test_mat.astype(np.uint64) test_mat = test_mat.reshape((16, 128, 128)) bb = blosc.compress(test_mat, typesize=64) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (16, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat) # get the bounding box # Create request factory = APIRequestFactory() request = factory.get('/' + version + '/boundingbox/col1/exp1/bbchan1/0/4') # log in user force_authenticate(request, user=self.user) # Make request response = BoundingBox.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', id='4') self.assertEqual(response.status_code, status.HTTP_200_OK) bb = response.data self.assertEqual(bb['t_range'], [0, 1]) self.assertEqual(bb['x_range'], [1536, 2048]) self.assertEqual(bb['y_range'], [1536, 2048]) self.assertEqual(bb['z_range'], [0, 16])
def test_compress_input_types(self): import numpy as np # assume the expected answer was compressed from bytes expected = blosc.compress(b'0123456789', typesize=1) if not PY3X: # Python 3 can't compress unicode self.assertEqual(expected, blosc.compress(u'0123456789', typesize=1)) # And the basic string is unicode self.assertEqual(expected, blosc.compress('0123456789', typesize=1)) # now for all the things that support the buffer interface if not PY3X: # Python 3 no longer has the buffer self.assertEqual(expected, blosc.compress( buffer(b'0123456789'), typesize=1)) if not PY26: # memoryview doesn't exist on Python 2.6 self.assertEqual(expected, blosc.compress( memoryview(b'0123456789'), typesize=1)) self.assertEqual(expected, blosc.compress( bytearray(b'0123456789'), typesize=1)) self.assertEqual(expected, blosc.compress( np.array([b'0123456789']), typesize=1))
def pickle(obj, filepath): arr = pickle_.dumps(obj, -1) with open(filepath, 'wb') as f: s = 0 while s < len(arr): e = min(s + blosc.MAX_BUFFERSIZE, len(arr)) carr = blosc.compress(arr[s:e], typesize=8) f.write(carr) s = e
def test_get_cbuffer_sizes(self): s = b'0123456789' * 100000 blosc.set_blocksize(2**16) c = blosc.compress(s, typesize=1) t = blosc.get_cbuffer_sizes(c) self.assertEqual(t[0], 1000000) # One cannot be sure of the exact compressed bytes, so round to KB self.assertEqual(t[1] // 2**10, 4354 // 2**10) self.assertEqual(t[2], 2**16)
def test_all_filters(self): s = b'0123456789'*100 filters = [blosc.NOSHUFFLE, blosc.SHUFFLE] # BITFILTER only works properly from 1.8.0 on if LooseVersion(blosc.blosclib_version) >= LooseVersion("1.8.0"): filters.append(blosc.BITSHUFFLE) for filter_ in filters: c = blosc.compress(s, typesize=1, shuffle=filter_) d = blosc.decompress(c) self.assertEqual(s, d)
def write_one(self, arr, n): conn = self.engine.connect() if self.compress: #~ buf = zlib.compress(np.getbuffer(arr), 1) buf = blosc.compress(arr.tostring(), typesize = arr.dtype.itemsize, clevel= blosc_comp) #~ print arr.size*arr.dtype.itemsize, len(buf) else: buf = np.getbuffer(arr) ins = self.table.insert().values(arraybuffer = buf, num = n) result = conn.execute(ins)
def test_channel_uint64_cuboid_aligned_no_offset_no_time_blosc(self): """ Test uint64 data, cuboid aligned, no offset, no time samples""" test_mat = np.ones((128, 128, 16)) test_mat = test_mat.astype(np.uint64) test_mat = test_mat.reshape((16, 128, 128)) bb = blosc.compress(test_mat, typesize=64) # Create request factory = APIRequestFactory() request = factory.post('/' + version + '/cutout/col1/exp1/layer1/0/0:128/0:128/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/layer1/0/0:128/0:128/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (16, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat) # get the bounding box # Create request factory = APIRequestFactory() request = factory.get('/' + version + '/ids/col1/exp1/layer1/0/0:128/0:128/0:16/') # log in user force_authenticate(request, user=self.user) # Make request response = Ids.as_view()(request, collection='col1', experiment='exp1', channel='layer1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data['ids'], ['1'])
def test_full_volume(self): # Create cubes_arn and populate with one target cube sqs = boto3.client('sqs') sqs.create_queue(QueueName = 'cubes_arn') sqs.send_message(QueueUrl = 'cubes_arn', MessageBody = '[0,0,0]') # Create s3_index table ddb = boto3.client('dynamodb') ddb.create_table(TableName = 's3_index', AttributeDefinitions = [ { "AttributeName": "object-key", "AttributeType": "S" }, { "AttributeName": "version-node", "AttributeType": "N" }, { "AttributeName": "lookup-key", "AttributeType": "S" } ], KeySchema = [ { "AttributeName": "object-key", "KeyType": "HASH" }, { "AttributeName": "version-node", "KeyType": "RANGE" } ], GlobalSecondaryIndexes = [ { "IndexName": "lookup-key-index", "KeySchema": [ { "AttributeName": "lookup-key", "KeyType": "HASH" } ], "Projection": { "ProjectionType": "KEYS_ONLY" }, "ProvisionedThroughput": { "ReadCapacityUnits": 15, "WriteCapacityUnits": 15 } } ], ProvisionedThroughput = { "ReadCapacityUnits": 15, "WriteCapacityUnits": 15 }) # Create the s3_bucket Bucket s3 = boto3.client('s3') s3.create_bucket(Bucket = 's3_bucket') # Create cube of data data = np.zeros([16,512,512], dtype=np.uint8, order='C') data = blosc.compress(data, typesize=8) # Put cube data for the target cubes for key in [dsv.HashedKey(None, 1,1,1,0,0,0,version=0), dsv.HashedKey(None, 1,1,1,0,0,1,version=0), dsv.HashedKey(None, 1,1,1,0,0,2,version=0), dsv.HashedKey(None, 1,1,1,0,0,3,version=0) ]: s3.put_object(Bucket = 's3_bucket', Key = key, Body = data) dsv.handler(args, None)
def create_protein(record, raw_record): return UniprotProtein( id=record.id, genome=record.annotations['organism'], taxid=record.annotations['ncbi_taxid'][0], description=record.description, updated=_get_date(record), raw_record=blosc.compress(raw_record, 1, cname='zlib', clevel=9), **get_refs(record) )
def fset(self, inst, value): nprow = getattr(inst, 'NumpyArrayTable__'+self.name) #~ print 'fset',self.name, nprow, value if nprow is None: nprow = self.NumpyArrayTableClass() setattr(inst, 'NumpyArrayTable__'+self.name, nprow) if value is None: if hasattr(inst, self.name+'_array') : delattr(inst, self.name+'_array') nprow.shape = None nprow.dtype = None nprow.blob = None nprow.units = None nprow.compress = None return if self.arraytype == np.ndarray: assert (type(value) == np.ndarray) or (type(value) == np.memmap) , 'Value is not np.array or np.memmap but {}'.format(type(value)) if self.arraytype == pq.Quantity: assert type(value) == pq.Quantity , '{} {} {} value is not pq.Quantity'.format(inst.__class__.__name__, self.name, value) shape = ('{},'*value.ndim)[:-1].format(*value.shape) if shape.endswith(',') : shape = shape[:-1] nprow.shape = shape nprow.dtype = value.dtype.str if self.compress == 'blosc': blob = blosc.compress(value.tostring(), typesize = value.dtype.itemsize, clevel= 9) else: if not value.flags['C_CONTIGUOUS']: #~ buf = np.getbuffer(np.array(value, copy = True)) buf = np.array(value, copy=True).data else: #~ buf = np.getbuffer(value) buf = value.data if self.compress == 'zlib': blob = zlib.compress(buf) elif self.compress == 'lz4': blob = lz4.compress(buf) elif self.compress == 'snappy': blob = snappy.compress(buf) else : blob = buf nprow.compress = self.compress nprow.blob = blob if self.arraytype == pq.Quantity: nprow.units = value.dimensionality.string setattr(inst, self.name+'_array', value)
def test_decode_blosc_header_basic(): array_ = np.linspace(0, 100, 2e4).tostring() blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize} header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def compress(data, method, *args, **kwds): if method == '': return data _check_method(method) if method.startswith('blosc-'): kwds['cname'] = method[6:] data = blosc.compress(data, *args, **kwds) else: raise ValueError("Unknown compression method '%s'" % method) return data
def test_decode_blosc_header(): array_ = np.linspace(0, 100, 2e4).tostring() # basic test case blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header) # deactivate shuffle blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header) # uncompressible data array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 88, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header)
def test_decode_blosc_header_deactivate_shuffle(): array_ = np.ones(16000, dtype=np.uint8) blosc_args = BloscArgs() blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize} header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def write_one(self, arr, n): conn = self.engine.connect() ins = self.table.insert().values(num = n, arrsize = arr.size) result = conn.execute(ins) table1_id = result.inserted_primary_key[0] pos = 0 for buffernum in range(arr.size/self.chunksize+1): arr_chunk = arr[pos:pos+self.chunksize] smallbuffer = blosc.compress(arr_chunk.tostring(), typesize = arr.dtype.itemsize, clevel= blosc_comp) ins = self.table2.insert().values(table1_id = table1_id, buffernum = buffernum, smallbuffer = smallbuffer) result = conn.execute(ins) pos += self.chunksize
def test_cbuffer_validate(self): import numpy as np expected = b'0123456789' * 1000 compressed = blosc.compress(expected) # now for all the things that support the buffer interface self.assertTrue(blosc.cbuffer_validate(compressed)) if not PY3X: # Python 3 no longer has the buffer self.assertTrue(blosc.cbuffer_validate(buffer(compressed))) self.assertTrue(blosc.cbuffer_validate(memoryview(compressed))) self.assertTrue(blosc.cbuffer_validate(bytearray(compressed))) self.assertTrue(blosc.cbuffer_validate(np.array([compressed])))
def pack_array(self, data): """Method to serialize and compress data using the blosc compressor. Assumes the datatype of the passed in array if the datatype property is not set Args: data (np.ndarray): The array to pack Returns: (bytes): The resulting serialized and compressed byte array """ if not self.datatype: self.datatype = data.dtype return blosc.compress(data, typesize=(np.dtype(self.datatype).itemsize * 8))
def maybe_compress(payload, min_size=1e4, sample_size=1e4, nsamples=5): """ Maybe compress payload 1. We don't compress small messages 2. We sample the payload in a few spots, compress that, and if it doesn't do any good we return the original 3. We then compress the full original, it it doesn't compress well then we return the original 4. We return the compressed result """ compression = dask.config.get('distributed.comm.compression') if compression == 'auto': compression = default_compression if not compression: return None, payload if len(payload) < min_size: return None, payload if len(payload) > 2**31: # Too large, compression libraries often fail return None, payload min_size = int(min_size) sample_size = int(sample_size) compress = compressions[compression]['compress'] # Compress a sample, return original if not very compressed sample = byte_sample(payload, sample_size, nsamples) if len(compress(sample)) > 0.9 * len(sample): # sample not very compressible return None, payload if type(payload) is memoryview: nbytes = payload.itemsize * len(payload) else: nbytes = len(payload) if default_compression and blosc and type(payload) is memoryview: # Blosc does itemsize-aware shuffling, resulting in better compression compressed = blosc.compress(payload, typesize=payload.itemsize, cname='lz4', clevel=5) compression = 'blosc' else: compressed = compress(ensure_bytes(payload)) if len(compressed) > 0.9 * nbytes: # full data not very compressible return None, payload else: return compression, compressed
def test_decode_blosc_header_uncompressible_data(): array_ = np.asarray(np.random.randn(255), dtype=np.float32).tostring() blosc_args = BloscArgs() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 1016, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 0x13, # 1 for shuffle 2 for non-compressed 4 for small blocksize 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header)
def test_decompress_input_types(self): import numpy as np # assume the expected answer was compressed from bytes expected = b'0123456789' compressed = blosc.compress(expected, typesize=1) # now for all the things that support the buffer interface self.assertEqual(expected, blosc.decompress(compressed)) if not PY3X: # Python 3 no longer has the buffer self.assertEqual(expected, blosc.decompress(buffer(compressed))) self.assertEqual(expected, blosc.decompress(memoryview(compressed))) self.assertEqual(expected, blosc.decompress(bytearray(compressed))) self.assertEqual(expected, blosc.decompress(np.array([compressed])))
def stateByAddingScreen(self, screen, frameNumber): screen = np.dot(screen, np.array([.299, .587, .114])).astype(np.uint8) screen = ndimage.zoom(screen, (0.4, 0.525)) screen.resize((84, 84, 1)) #self.saveScreenAsPNG('screen', screen, frameNumber) if State.useCompression: screen = blosc.compress(np.reshape(screen, 84 * 84).tobytes(), typesize=1) newState = State() if hasattr(self, 'screens'): newState.screens = self.screens[:3] newState.screens.insert(0, screen) else: newState.screens = [screen, screen, screen, screen] return newState
def pack_file(x, fn, encoding='utf8'): """ Pack numpy array into filename Supports binary data with bloscpack and text data with msgpack+blosc >>> pack_file(np.array([1, 2, 3]), 'foo.blp') # doctest: +SKIP See also: unpack_file """ if x.dtype != 'O': bloscpack.pack_ndarray_file(x, fn) else: bytes = blosc.compress(msgpack.packb(x.tolist(), encoding=encoding), 1) with open(fn, 'wb') as f: f.write(bytes)
def __init__(self, map_name, *args, **kwargs): from zombie.common import map_file_path, udp_socket from zombie.constants import CLIENT_CODES from blosc import compress super(Server, self).__init__(*args, **kwargs) self.map_name = map_name self.map_file_path = map_file_path(map_name) self.map_data = compress(open(self.map_file_path, 'rb').read(), 8) #self.map_data_send = bytes((len(self.map_data),)) + self.map_data self.client_socket = udp_socket() self.clients = {} self.client_id_counter = 1 self.objects = {} self.object_id_counter = 0 self.map_objects = {} self.zombies = {} self.zombie_id_counter = 0 self.client_code_actions = { CLIENT_CODES['start_move_forwards']: self.start_move_forwards, CLIENT_CODES['end_move_forwards']: self.end_move, CLIENT_CODES['start_move_backwards']: self.start_move_backwards, CLIENT_CODES['end_move_backwards']: self.end_move, CLIENT_CODES['start_strafe_left']: self.start_strafe_left, CLIENT_CODES['end_strafe_left']: self.end_strafe, CLIENT_CODES['start_strafe_right']: self.start_strafe_right, CLIENT_CODES['end_strafe_right']: self.end_strafe, CLIENT_CODES['start_attack']: self.start_attack, CLIENT_CODES['end_attack']: self.end_attack, CLIENT_CODES['start_use']: self.start_use, CLIENT_CODES['end_use']: self.end_use, CLIENT_CODES['throw_equipped_item']: self.throw_equipped_item, CLIENT_CODES['drop_equipped_item']: self.drop_equipped_item, CLIENT_CODES['cursor_motion']: self.cursor_motion, } self.setup_physics()
def compress(s): import blosc return blosc.compress(s, typesize=8)
def test_start_downsample_get_status_and_check_data(self): """A large complex test that verifies all the pluming for downsample. Does not validate data integrity, but does make sure data exists at different levels and iso vs. aniso.""" self.dbsetup.insert_downsample_data() # Post some data to the channel test_mat = np.random.randint(1, 254, (16, 1024, 1024)) test_mat = test_mat.astype(np.uint8) h = test_mat.tobytes() bb = blosc.compress(h, typesize=8) factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/0/0:1024/0:1024/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='0', x_range='0:1024', y_range='0:1024', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Wait for data to be written request = factory.get( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/0/0:1024/0:1024/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='0', x_range='0:1024', y_range='0:1024', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Trigger downsample factory = APIRequestFactory() request = factory.post('/' + version + '/downsample/col1/exp_ds_aniso/channel1/', content_type='application/json') # log in user force_authenticate(request, user=self.user) # Make request response = Downsample.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1') self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Make Sure status has changed factory = APIRequestFactory() request = factory.get('/' + version + '/downsample/col1/exp_ds_aniso/channel1/', content_type='application/json') # log in user force_authenticate(request, user=self.user) # Make request response = Downsample.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1').render() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["num_hierarchy_levels"], 5) self.assertEqual(response.data["status"], "IN_PROGRESS") for _ in range(0, 30): # Make request response = Downsample.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1').render() self.assertEqual(response.status_code, status.HTTP_200_OK) if response.data["status"] != "IN_PROGRESS": break time.sleep(2) # Verify now downsampled response = Downsample.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1').render() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["num_hierarchy_levels"], 5) self.assertEqual(response.data["status"], "DOWNSAMPLED") # Get data at res 1 and verify it's non-zero request = factory.get( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/1/0:512/0:512/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='1', x_range='0:512', y_range='0:512', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) raw_data = blosc.decompress(response.content) data_mat_res1_aniso = np.fromstring(raw_data, dtype=np.uint8) data_mat_res1_aniso = np.reshape(data_mat_res1_aniso, (16, 512, 512), order='C') # Make sure not blank self.assertGreater(data_mat_res1_aniso.sum(), 100) # Get data at res 1 with iso flag and verify it's non-zero and the same as without flag request = factory.get( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/1/0:512/0:512/0:16/?iso=true', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='1', x_range='0:512', y_range='0:512', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) raw_data = blosc.decompress(response.content) data_mat_res1_iso = np.fromstring(raw_data, dtype=np.uint8) data_mat_res1_iso = np.reshape(data_mat_res1_iso, (16, 512, 512), order='C') # Make sure not blank self.assertGreater(data_mat_res1_iso.sum(), 100) np.testing.assert_array_equal(data_mat_res1_iso, data_mat_res1_aniso) # Get data at res 4 with iso flag and verify it's non-zero and DIFFERENT than without flag request = factory.get( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/4/0:256/0:256/0:8/?iso=false', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='4', x_range='0:256', y_range='0:256', z_range='0:8', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) raw_data = blosc.decompress(response.content) data_mat_res4_aniso = np.fromstring(raw_data, dtype=np.uint8) data_mat_res4_aniso = np.reshape(data_mat_res4_aniso, (8, 256, 256), order='C') # Make sure not blank self.assertGreater(data_mat_res4_aniso.sum(), 1) # Get data at res 4 with iso flag and verify it's non-zero and DIFFERENT than without flag request = factory.get( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/4/0:256/0:256/0:8/?iso=true', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='4', x_range='0:256', y_range='0:256', z_range='0:8', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) raw_data = blosc.decompress(response.content) data_mat_res4_iso = np.fromstring(raw_data, dtype=np.uint8) data_mat_res4_iso = np.reshape(data_mat_res4_iso, (8, 256, 256), order='C') # Make sure not blank self.assertGreater(data_mat_res4_iso.sum(), 1) self.assertRaises(AssertionError, np.testing.assert_array_equal, data_mat_res4_aniso, data_mat_res4_iso) # Post data, invalidating the downsample operation request = factory.post( '/' + version + '/cutout/col1/exp_ds_aniso/channel1/0/0:1024/0:1024/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1', resolution='0', x_range='0:1024', y_range='0:1024', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Verify now NOT downsampled request = factory.get('/' + version + '/downsample/col1/exp_ds_aniso/channel1/', content_type='application/json') # log in user force_authenticate(request, user=self.user) response = Downsample.as_view()(request, collection='col1', experiment='exp_ds_aniso', channel='channel1').render() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["num_hierarchy_levels"], 5) self.assertEqual(response.data["status"], "NOT_DOWNSAMPLED")
def pack_list(in_list, meta_info, out_file, blosc_args, offsets=DEFAULT_OFFSETS, checksum=DEFAULT_CHECKSUM): """ Main function for compressing a list of buffers. Parameters ---------- in_list : list the list of buffers meta_info : dict dictionary with the associated metainfo out_file : str the name of the output file blosc_args : dict dictionary of blosc keyword args offsets : bool Wheather to include offsets. checksum : str Which checksum to use. """ # XXX Check for empty lists # calculate chunk sizes nchunks = len(in_list) chunk_size = len(in_list[0]) last_chunk_size = len(in_list[-1]) in_list_size = nchunks * chunk_size + last_chunk_size print_verbose('input file size: %s' % double_pretty_size(in_list_size)) # calculate header options = create_options(offsets=offsets) if offsets: offsets_storage = list(itertools.repeat(0, nchunks)) # set the checksum impl checksum_impl = CHECKSUMS_LOOKUP[checksum] raw_bloscpack_header = create_bloscpack_header( options=options, checksum=CHECKSUMS_AVAIL.index(checksum), typesize=blosc_args['typesize'], chunk_size=chunk_size, last_chunk=last_chunk_size, nchunks=nchunks ) print_verbose('raw_bloscpack_header: %s' % repr(raw_bloscpack_header), level=DEBUG) # write the chunks to the file with open(out_file, 'wb') as output_fp: output_fp.write(raw_bloscpack_header) # preallocate space for the offsets if offsets: output_fp.write(encode_int64(-1) * nchunks) # if nchunks == 1 the last_chunk_size is the size of the single chunk for i in xrange(nchunks): # store the current position in the file if offsets: offsets_storage[i] = output_fp.tell() current_chunk = in_list[i] # do compression compressed = blosc.compress(current_chunk, **blosc_args) # write compressed data output_fp.write(compressed) print_verbose("chunk '%d'%s written, in: %s out: %s ratio: %s" % (i, ' (last)' if i == nchunks - 1 else '', double_pretty_size(len(current_chunk)), double_pretty_size(len(compressed)), "%0.3f" % (len(compressed) / len(current_chunk)) if len(current_chunk) != 0 else "N/A"), level=DEBUG) tail_mess = "" if checksum_impl.size > 0: # compute the checksum on the compressed data digest = checksum_impl(compressed) # write digest output_fp.write(digest) tail_mess += ('checksum (%s): %s ' % (checksum, repr(digest))) if offsets: tail_mess += ("offset: '%d'" % offsets_storage[i]) if len(tail_mess) > 0: print_verbose(tail_mess, level=DEBUG) if offsets: # seek to 32 bits into the file output_fp.seek(BLOSCPACK_HEADER_LENGTH, 0) print_verbose("Writing '%d' offsets: '%s'" % (len(offsets_storage), repr(offsets_storage)), level=DEBUG) # write the offsets encoded into the reserved space in the file encoded_offsets = "".join([encode_int64(i) for i in offsets_storage]) print_verbose("Raw offsets: %s" % repr(encoded_offsets), level=DEBUG) output_fp.write(encoded_offsets) # write the metadata at the end output_fp.seek(0, 2) json.dump(meta_info, output_fp) out_file_size = path.getsize(out_file) print_verbose('output file size: %s' % double_pretty_size(out_file_size)) print_verbose('compression ratio: %f' % (out_file_size/in_list_size))
def test_get_cbuffer_sizes(self): s = b'0123456789' * 100000 blosc.set_blocksize(2**16) c = blosc.compress(s, typesize=1) t = blosc.get_cbuffer_sizes(c) self.assertEqual(t, (1000000, 4354, 2**16))
raw = t.arrays(["/Muon_(pt|eta|phi|mass|charge)/"], outputtype=dict, namedecode="ascii") table = awkward.Table(raw) print(table.nbytes / 1e6) for i in range(20): data = [] for fcomp, fdecomp, label in tqdm([ [ lambda x: backports.lzma.compress(x), backports.lzma.decompress, "lzma" ], [lambda x: blosc.compress(x), blosc.decompress, "blosc"], # default should be blosc.SHUFFLE [ lambda x: blosc.compress(x, shuffle=blosc.NOSHUFFLE), blosc.decompress, "blosc_noshuffle" ], [ lambda x: blosc.compress(x, shuffle=blosc.SHUFFLE), blosc.decompress, "blosc_shuffle" ], [ lambda x: blosc.compress(x, shuffle=blosc.BITSHUFFLE), blosc.decompress, "blosc_bitshuffle" ], [ lambda x: blosc.compress(x, cname="zlib"), blosc.decompress,
def fset(self, inst, value): nprow = getattr(inst, 'NumpyArrayTable__' + self.name) #~ print 'fset',self.name, nprow, value if nprow is None: nprow = self.NumpyArrayTableClass() setattr(inst, 'NumpyArrayTable__' + self.name, nprow) if value is None: if hasattr(inst, self.name + '_array'): delattr(inst, self.name + '_array') nprow.shape = None nprow.dtype = None nprow.blob = None nprow.units = None nprow.compress = None return if self.arraytype == np.ndarray: assert (type(value) == np.ndarray) or ( type(value) == np.memmap ), 'Value is not np.array or np.memmap but {}'.format(type(value)) if self.arraytype == pq.Quantity: assert type( value ) == pq.Quantity, '{} {} {} value is not pq.Quantity'.format( inst.__class__.__name__, self.name, value) shape = ('{},' * value.ndim)[:-1].format(*value.shape) if shape.endswith(','): shape = shape[:-1] nprow.shape = shape nprow.dtype = value.dtype.str if self.compress == 'blosc': blob = blosc.compress(value.tostring(), typesize=value.dtype.itemsize, clevel=9) else: if not value.flags['C_CONTIGUOUS']: #~ buf = np.getbuffer(np.array(value, copy = True)) buf = np.array(value, copy=True).data else: #~ buf = np.getbuffer(value) buf = value.data if self.compress == 'zlib': blob = zlib.compress(buf) elif self.compress == 'lz4': blob = lz4.compress(buf) elif self.compress == 'snappy': blob = snappy.compress(buf) else: blob = buf nprow.compress = self.compress nprow.blob = blob if self.arraytype == pq.Quantity: nprow.units = value.dimensionality.string setattr(inst, self.name + '_array', value)
def decompress(): cx = blosc.compress(array, typesize, clevel=0) blosc.decompress(cx)
def compress(): blosc.compress(array, typesize, clevel=0)
def test_basic_codec(self): s = b'0123456789' c = blosc.compress(s, typesize=1) d = blosc.decompress(c) self.assertEqual(s, d)
def _store(self, array, index): self._data[index] = blosc.compress(array.tostring(), np.dtype(self.dtype).itemsize) self.chunkstatus[index] = self.CHUNKSET return
def test_get_clib(self): s = b'0123456789' for cname in blosc.compressor_list(): c = blosc.compress(s, typesize=1, cname=cname) clib = blosc.get_clib(c) self.assert_(clib == blosc.cname2clib[cname])
def test_all_compressors(self): s = b'0123456789' * 100 for cname in blosc.compressor_list(): c = blosc.compress(s, typesize=1, cname=cname) d = blosc.decompress(c) self.assertEqual(s, d)
def test_delayed_write(self): """A test with multiple writes to the same cuboid""" test_mat = np.random.randint(1, 254, (16, 128, 128)) test_mat = test_mat.astype(np.uint8) h = test_mat.tobytes() bb = blosc.compress(h, typesize=8) # Create request factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/channel1/0/0:128/0:128/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/channel1/0/0:128/0:128/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) test_mat[1, 20, 40] = 5 test_mat[1, 20, 41] = 5 test_mat[1, 20, 42] = 5 h = test_mat.tobytes() bb = blosc.compress(h, typesize=8) factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/channel1/0/0:128/0:128/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get('/' + version + '/cutout/col1/exp1/channel1/0/0:128/0:128/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel1', resolution='0', x_range='0:128', y_range='0:128', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint8) data_mat = np.reshape(data_mat, (16, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def test_get_object_bounding_box_single_cuboid(self): """ Test getting the bounding box of a object""" test_mat = np.ones((128, 128, 16)) test_mat[0:128, 0:128, 0:16] = 4 test_mat = test_mat.astype(np.uint64) test_mat = test_mat.reshape((16, 128, 128)) bb = blosc.compress(test_mat, typesize=64) # Create request factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None) self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get( '/' + version + '/cutout/col1/exp1/bbchan1/0/1536:1664/1536:1664/0:16/', accepts='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', x_range='1536:1664', y_range='1536:1664', z_range='0:16', t_range=None).render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint64) data_mat = np.reshape(data_mat, (16, 128, 128), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat) # get the bounding box # Create request factory = APIRequestFactory() request = factory.get('/' + version + '/boundingbox/col1/exp1/bbchan1/0/4') # log in user force_authenticate(request, user=self.user) # Make request response = BoundingBox.as_view()(request, collection='col1', experiment='exp1', channel='bbchan1', resolution='0', id='4') self.assertEqual(response.status_code, status.HTTP_200_OK) bb = response.data self.assertEqual(bb['t_range'], [0, 1]) self.assertEqual(bb['x_range'], [1536, 2048]) self.assertEqual(bb['y_range'], [1536, 2048]) self.assertEqual(bb['z_range'], [0, 16])
def setUpTestData(cls): """ get_some_resource() is slow, to avoid calling it for each test use setUpClass() and store the result as class variable """ # Setup the helper to create temporary AWS resources cls.setup_helper = SetupTests() cls.setup_helper.mock = False # Create a user in django dbsetup = SetupTestDB() cls.user = dbsetup.create_user('testuser') dbsetup.add_role('resource-manager') dbsetup.set_user(cls.user) # Populate django models DB dbsetup.insert_spatialdb_test_data() try: cls.setup_helper.create_index_table( OBJECTIO_CONFIG["s3_index_table"], cls.setup_helper.DYNAMODB_SCHEMA) except ClientError: cls.setup_helper.delete_index_table( OBJECTIO_CONFIG["s3_index_table"]) cls.setup_helper.create_index_table( OBJECTIO_CONFIG["s3_index_table"], cls.setup_helper.DYNAMODB_SCHEMA) try: cls.setup_helper.create_cuboid_bucket( OBJECTIO_CONFIG["cuboid_bucket"]) except ClientError: cls.setup_helper.delete_cuboid_bucket( OBJECTIO_CONFIG["cuboid_bucket"]) cls.setup_helper.create_cuboid_bucket( OBJECTIO_CONFIG["cuboid_bucket"]) try: OBJECTIO_CONFIG[ "s3_flush_queue"] = cls.setup_helper.create_flush_queue( FLUSH_QUEUE_NAME) except ClientError: try: cls.setup_helper.delete_flush_queue( OBJECTIO_CONFIG["s3_flush_queue"]) except: pass time.sleep(61) OBJECTIO_CONFIG[ "s3_flush_queue"] = cls.setup_helper.create_flush_queue( FLUSH_QUEUE_NAME) # load some data for reading cls.test_data_8 = np.random.randint(1, 254, (16, 1024, 1024), dtype=np.uint8) bb = blosc.compress(cls.test_data_8, typesize=8) # Post data to the database factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/channel1/0/0:1024/0:1024/0:16/', bb, content_type='application/blosc') force_authenticate(request, user=cls.user) _ = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel1', resolution='0', x_range='0:1024', y_range='0:1024', z_range='0:16', t_range=None)
def decode(self, _id, feature, cls, timeslice): f = feature(_id=_id, persistence=cls, decoder=Decoder()) f = BytesIO(blosc.compress(f.read(), typesize=32)) return DecodeResult(f, {'Content-Type': 'application/octet-stream'})
def send_blosc_pickle(socket, obj, flags=0, protocol=-1): p = pickle.dumps(obj, protocol) b = blosc.compress(p) return socket.send(b, flags=flags)
def write_one(self, arr, n): f = io.open(self.filename.format(n), 'wb') buf = blosc.compress(arr.tostring(), typesize = arr.dtype.itemsize, clevel= blosc_comp) f.write(buf) f.close()
def encode(self, data): return blosc.compress( msgpack.packb(data, default=self.serialize_obj, use_bin_type=True), TYPESIZE)
def dumps(self, obj): return blosc.compress(obj, typesize=8)
def test_channel_uint16_cuboid_unaligned_offset_time_blosc(self): """ Test uint16 data, not cuboid aligned, offset, time samples, blosc interface Test Requires >=2GB of memory! """ test_mat = np.random.randint(1, 2**16 - 1, (3, 17, 300, 500)) test_mat = test_mat.astype(np.uint16) h = test_mat.tobytes() bb = blosc.compress(h, typesize=16) # Create request factory = APIRequestFactory() request = factory.post( '/' + version + '/cutout/col1/exp1/channel2/0/100:600/450:750/20:37/0:3', bb, content_type='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='0:3') self.assertEqual(response.status_code, status.HTTP_201_CREATED) # Create Request to get data you posted request = factory.get( '/' + version + '/cutout/col1/exp1/channel2/0/100:600/450:750/20:37/0:3', HTTP_ACCEPT='application/blosc') # log in user force_authenticate(request, user=self.user) # Make request response = Cutout.as_view()(request, collection='col1', experiment='exp1', channel='channel2', resolution='0', x_range='100:600', y_range='450:750', z_range='20:37', t_range='0:3').render() self.assertEqual(response.status_code, status.HTTP_200_OK) # Decompress raw_data = blosc.decompress(response.content) data_mat = np.fromstring(raw_data, dtype=np.uint16) data_mat = np.reshape(data_mat, (3, 17, 300, 500), order='C') # Test for data equality (what you put in is what you got back!) np.testing.assert_array_equal(data_mat, test_mat)
def test_get_blocksize(self): s = b'0123456789' * 1000 blosc.set_blocksize(2**14) blosc.compress(s, typesize=1) d = blosc.get_blocksize() self.assertEqual(d, 2**14)
def serializejson_ndarray(inst): # inst = numpy.ascontiguousarray(inst) dtype = inst.dtype # compression = serialize_parameters.bytes_compression if dtype.fields is None: dtype_str = str(dtype) max_size = serialize_parameters.numpy_array_readable_max_size if isinstance(max_size, dict): if dtype_str in max_size: max_size = max_size[dtype_str] else: max_size = 0 if max_size is None or inst.size <= max_size: return ( "numpy.array", (inst.tolist(), dtype_str), None, ) # A REVOIR : pass genial car va tester ultérieurement si tous les elements sont du même type.... else: dtype_str = dtype.descr # return ("numpy.array", (RawJSON(numpy.array2string(inst,separator =',')), dtype_str), None) plus lent. if serialize_parameters.numpy_array_use_numpyB64: if dtype == bool: data = numpy.packbits(inst.astype(numpy.uint8)) if inst.ndim == 1: len_or_shape = len(inst) else: len_or_shape = list(inst.shape) else: data = inst if inst.ndim == 1: len_or_shape = None else: len_or_shape = list(inst.shape) compression = serialize_parameters.bytes_compression if compression and data.nbytes >= serialize_parameters.bytes_size_compression_threshold: blosc_compression = blosc_compressions.get(compression, None) if blosc_compression: compressed = blosc.compress( numpy.ascontiguousarray(data), data.itemsize, cname=blosc_compression, clevel=serialize_parameters.bytes_compression_level, ) compression = "blosc" else: raise Exception(f"{compression} compression unknow") if len(compressed) < data.nbytes: if len_or_shape is None: return ( "numpyB64", (b64encode_as_string(compressed), dtype_str, compression), None, ) else: return ( "numpyB64", (b64encode_as_string(compressed), dtype_str, len_or_shape, compression), None, ) if len_or_shape is None: return ( "numpyB64", (b64encode_as_string(numpy.ascontiguousarray(data)), dtype_str), None, ) else: return ( "numpyB64", (b64encode_as_string(numpy.ascontiguousarray(data)), dtype_str, len_or_shape), None, ) else: # if False :#inst.ndim == 1: # return (numpy.frombuffer, (bytearray(inst), dtype_str), None) # else: return ( "numpy.ndarray", (list(inst.shape), dtype_str, bytearray(inst)), None, )
def _compress(self, packed_binary_frame): if self._input_params.compression_scheme == 0: #zlib return zlib.compress(packed_binary_frame, self._input_params.compression_level) elif self._input_params.compression_scheme == 1: #zstd return self._compressor_context.compress(packed_binary_frame) elif self._input_params.compression_scheme == 2: #lz4 return lz4.frame.compress( packed_binary_frame, compression_level=self._input_params.compression_level, store_size=False) elif self._input_params.compression_scheme == 3: #snappy return snappy.compress(packed_binary_frame) elif self._input_params.compression_scheme == 4: #bzip return bz2.compress( packed_binary_frame, compresslevel=self._input_params.compression_level) elif self._input_params.compression_scheme == 5: #lzma return lzma.compress(packed_binary_frame, preset=self._input_params.compression_level) elif self._input_params.compression_scheme == 6: #blosc_zlib return blosc.compress(packed_binary_frame, clevel=self._input_params.compression_level, cname='zlib', shuffle=blosc.BITSHUFFLE) elif self._input_params.compression_scheme == 7: #blosc_zstd return blosc.compress(packed_binary_frame, clevel=self._input_params.compression_level, cname='zstd', shuffle=blosc.BITSHUFFLE) elif self._input_params.compression_scheme == 8: #blosc_lz4 return blosc.compress(packed_binary_frame, clevel=self._input_params.compression_level, cname='lz4', shuffle=blosc.BITSHUFFLE) elif self._input_params.compression_scheme == 9: #blosc_snappy return blosc.compress(packed_binary_frame, clevel=self._input_params.compression_level, cname='snappy', shuffle=blosc.BITSHUFFLE) elif self._input_params.compression_scheme == 10: #blosclz return blosc.compress(packed_binary_frame, clevel=self._input_params.compression_level, cname='blosclz', shuffle=blosc.BITSHUFFLE) elif self._input_params.compression_scheme == 11: #blosc_lz4hc return blosc.compress(packed_binary_frame, clevel=self._input_params.compression_level, cname='lz4hc', shuffle=blosc.BITSHUFFLE) else: raise NotImplementedError('compression scheme not implemented')
def serialize_blosc(obj): p = pickle.dumps(obj) b = blosc.compress(p) return b