def pack_unpack_fp(repeats, nchunks=None, chunk_size=None, progress=False, metadata=None): blosc_args = DEFAULT_BLOSC_ARGS offsets = DEFAULT_OFFSETS checksum = DEFAULT_CHECKSUM in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO() if progress: print("Creating test array") create_array_fp(repeats, in_fp, progress=progress) in_fp_size = in_fp.tell() if progress: print("Compressing") in_fp.seek(0) bloscpack._pack_fp(in_fp, out_fp, in_fp_size, blosc_args, metadata, nchunks, chunk_size, offsets, checksum, DEFAULT_METADATA_ARGS) out_fp.seek(0) if progress: print("Decompressing") metadata = bloscpack._unpack_fp(out_fp, dcmp_fp) if progress: print("Verifying") cmp_fp(in_fp, dcmp_fp) if metadata: return metadata
def test_offsets(): with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) bloscpack.pack_file(in_file, out_file, chunk_size='2M') with open(out_file, 'r+b') as input_fp: bloscpack_header = bloscpack._read_bloscpack_header(input_fp) total_entries = bloscpack_header['nchunks'] + \ bloscpack_header['max_app_chunks'] offsets = bloscpack._read_offsets(input_fp, bloscpack_header) # First chunks should start after header and offsets first = BLOSCPACK_HEADER_LENGTH + 8 * total_entries # We assume that the others are correct nt.assert_equal(offsets[0], first) nt.assert_equal([736, 418578, 736870, 1050327, 1363364, 1660766, 1959218, 2257703], offsets) # try to read the second header input_fp.seek(offsets[1], 0) blosc_header_raw = input_fp.read(BLOSC_HEADER_LENGTH) expected = {'versionlz': 1, 'blocksize': 131072, 'ctbytes': 318288, 'version': 2, 'flags': 1, 'nbytes': 2097152, 'typesize': 8} blosc_header = decode_blosc_header(blosc_header_raw) nt.assert_equal(expected, blosc_header) # now check the same thing again, but w/o any max_app_chunks input_fp, output_fp = StringIO(), StringIO() create_array_fp(1, input_fp) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(input_fp.tell(), chunk_size='2M') input_fp.seek(0, 0) bloscpack_args = DEFAULT_BLOSCPACK_ARGS.copy() bloscpack_args['max_app_chunks'] = 0 bloscpack._pack_fp(input_fp, output_fp, nchunks, chunk_size, last_chunk_size, bloscpack_args=bloscpack_args ) output_fp.seek(0, 0) bloscpack_header = bloscpack._read_bloscpack_header(output_fp) nt.assert_equal(0, bloscpack_header['max_app_chunks']) offsets = bloscpack._read_offsets(output_fp, bloscpack_header) nt.assert_equal([96, 417938, 736230, 1049687, 1362724, 1660126, 1958578, 2257063], offsets)
def test_metadata_opportunisitic_compression(): # make up some metadata that can be compressed with benefit test_metadata = ("{'dtype': 'float64', 'shape': [1024], 'others': []," "'original_container': 'carray'}") in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO() create_array_fp(1, in_fp) in_fp_size = in_fp.tell() in_fp.seek(0) bloscpack._pack_fp(in_fp, out_fp, in_fp_size, DEFAULT_BLOSC_ARGS, test_metadata, 1, None, DEFAULT_OFFSETS, DEFAULT_CHECKSUM, DEFAULT_METADATA_ARGS) out_fp.seek(0) raw_header = out_fp.read(32) header = decode_bloscpack_header(raw_header) raw_options = header['options'] options = decode_options(raw_options) #nt.assert_true(options['compress_meta']) # now do the same thing, but use badly compressible metadata test_metadata = "abc" in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO() create_array_fp(1, in_fp) in_fp_size = in_fp.tell() in_fp.seek(0) bloscpack._pack_fp(in_fp, out_fp, in_fp_size, DEFAULT_BLOSC_ARGS, test_metadata, 1, None, DEFAULT_OFFSETS, DEFAULT_CHECKSUM, DEFAULT_METADATA_ARGS) out_fp.seek(0) raw_header = out_fp.read(32) header = decode_bloscpack_header(raw_header) raw_options = header['options'] options = decode_options(raw_options)
def pack_unpack_fp(repeats, chunk_size=DEFAULT_CHUNK_SIZE, progress=False, metadata=None): in_fp, out_fp, dcmp_fp = StringIO(), StringIO(), StringIO() if progress: print("Creating test array") create_array_fp(repeats, in_fp, progress=progress) in_fp_size = in_fp.tell() if progress: print("Compressing") in_fp.seek(0) nchunks, chunk_size, last_chunk_size = \ calculate_nchunks(in_fp_size, chunk_size) bloscpack._pack_fp(in_fp, out_fp, nchunks, chunk_size, last_chunk_size, metadata=metadata) out_fp.seek(0) if progress: print("Decompressing") metadata = bloscpack._unpack_fp(out_fp, dcmp_fp) if progress: print("Verifying") cmp_fp(in_fp, dcmp_fp) if metadata: return metadata