def test_decode_blosc_header(): array_ = np.linspace(0, 100, 2e4).tostring() # basic test case blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header) # deactivate shuffle blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header) # uncompressible data array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'blocksize': 88, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header)
def test_decode_blosc_header_basic(): array_ = np.linspace(0, 100, 2e4).tostring() blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize} header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def test_decode_blosc_header(): array_ = np.linspace(0, 100, 2e4).tostring() # basic test case blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header) # deactivate shuffle blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header) # uncompressible data array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 88, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([ 736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995 ], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = { 'versionlz': 1, 'blocksize': 262144, 'ctbytes': 265108, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8 } blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal( [96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355], offsets)
def test_decode_blosc_header_deactivate_shuffle(): array_ = np.ones(16000, dtype=np.uint8) blosc_args = BloscArgs() blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize} header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def test_alternate_cname(): for cname, int_id in [ ('blosclz', 0), ('lz4', 1), ('lz4hc', 1), ('snappy', 2), ('zlib', 3), ]: blosc_args = BloscArgs(cname=cname) array_ = np.linspace(0, 1, 2e6) sink = CompressedMemorySink() pack_ndarray(array_, sink, blosc_args=blosc_args) blosc_header = decode_blosc_header(sink.chunks[0]) yield nt.assert_equal, blosc_header['flags'] >> 5, int_id
def test_decode_blosc_header_basic(): array_ = np.linspace(0, 100, int(2e4)).tostring() blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize } header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def test_decode_blosc_header_deactivate_shuffle(): array_ = np.ones(16000, dtype=np.uint8) blosc_args = BloscArgs() blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize } header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def test_decode_blosc_header_uncompressible_data(): array_ = np.asarray(np.random.randn(255), dtype=np.float32).tostring() blosc_args = BloscArgs() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 1016, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 0x13, # 1 for shuffle 2 for non-compressed 4 for small blocksize 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file_to_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal(736, offsets[0]) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = { 'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8 } blosc_header = decode_blosc_header(blosc_header_raw) blosc_header_slice = dict( (k, blosc_header[k]) for k in expected.keys()) nt.assert_equal(expected, blosc_header_slice) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal(96, offsets[0])
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([736, 368207, 633319, 902306, 1173771, 1419535, 1666981, 1913995], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 262144, 'ctbytes': 265108, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal([96, 367567, 632679, 901666, 1173131, 1418895, 1666341, 1913355], offsets)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([736, 418578, 736870, 1050327, 1363364, 1660766, 1959218, 2257703], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 131072, 'ctbytes': 318288, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal([96, 417938, 736230, 1049687, 1362724, 1660126, 1958578, 2257063], offsets)
def test_decode_blosc_header_uncompressible_data_dont_split_false(): array_ = np.asarray(np.random.randn(256), dtype=np.float32).tostring() blosc_args = BloscArgs() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'version': 2, 'blocksize': 1024, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'flags': 0x3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header)
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = _read_bloscpack_header(input_fp) total_entries = bloscpack_header.total_prospective_chunks offsets = _read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal(736, offsets[0]) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) blosc_header_slice = dict((k, blosc_header[k]) for k in expected.keys()) nt.assert_equal(expected, blosc_header_slice) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = BloscpackArgs(max_app_chunks=0) source = PlainFPSource(input_fp) sink = CompressedFPSink(output_fp) pack(source, sink, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = _read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header.max_app_chunks) offsets = _read_offsets(output_fp, bloscpack_header) nt.assert_equal(96, offsets[0])