def test_typesize_is_set_correctly_with_custom_blosc_args(): a = np.array([1, 2, 3], dtype='uint8') sink = CompressedMemorySink() input_args = BloscArgs(clevel=9) pack_ndarray(a, sink, blosc_args=input_args) expected_args = BloscArgs(clevel=9, typesize=1) nt.assert_equal(expected_args, sink.blosc_args)
def test_decode_blosc_header_deactivate_shuffle(): array_ = np.ones(16000, dtype=np.uint8) blosc_args = BloscArgs() blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize} header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def test_decode_blosc_header_deactivate_shuffle(): array_ = np.ones(16000, dtype=np.uint8) blosc_args = BloscArgs() blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize } header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def test_decode_blosc_header_uncompressible_data(): array_ = np.asarray(np.random.randn(255), dtype=np.float32).tostring() blosc_args = BloscArgs() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 1016, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 0x13, # 1 for shuffle 2 for non-compressed 4 for small blocksize 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header)
def test_decode_blosc_header_uncompressible_data_dont_split_false(): array_ = np.asarray(np.random.randn(256), dtype=np.float32).tostring() blosc_args = BloscArgs() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'version': 2, 'blocksize': 1024, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'flags': 0x3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header)
def test_invalid_format(): blosc_args = BloscArgs() with create_tmp_files() as (tdir, in_file, out_file, dcmp_file): create_array(1, in_file) pack_file_to_file(in_file, out_file, blosc_args=blosc_args) nt.assert_raises(FormatVersionMismatch, unpack_file_from_file, out_file, dcmp_file)
def test_append_mix_shuffle(): orig, new, new_size, dcmp = prep_array_for_append() # use the typesize from the file # deactivate shuffle # crank up the clevel to ensure compression happens, otherwise the flags # will be screwed later on blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9) reset_append_fp(orig, new, new_size, blosc_args=blosc_args) source = CompressedFPSource(orig) sink = PlainFPSink(dcmp) unpack(source, sink) orig.seek(0) dcmp.seek(0) new.seek(0) new_str = new.read() dcmp_str = dcmp.read() nt.assert_equal(len(dcmp_str), len(new_str * 2)) nt.assert_equal(dcmp_str, new_str * 2) # now get the first and the last chunk and check that the shuffle doesn't # match bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3] orig.seek(offsets[0]) checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']] compressed_zero, blosc_header_zero, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_zero = blosc.decompress(compressed_zero) orig.seek(offsets[-1]) compressed_last, blosc_header_last, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_last = blosc.decompress(compressed_last) # first chunk has shuffle active nt.assert_equal(blosc_header_zero['flags'], 1) # last chunk doesn't nt.assert_equal(blosc_header_last['flags'], 0)
def test_mixing_clevel(): # the first set of chunks has max compression blosc_args = BloscArgs(clevel=9) orig, new, new_size, dcmp = prep_array_for_append() # get the original size orig.seek(0, 2) orig_size = orig.tell() orig.seek(0) # get a backup of the settings bloscpack_header, metadata, metadata_header, offsets = \ reset_read_beginning(orig) # compressed size of the last chunk, including checksum last_chunk_compressed_size = orig_size - offsets[-1] # do append # use the typesize from the file and # make the second set of chunks have no compression blosc_args = BloscArgs(typesize=None, clevel=0) nchunks = append_fp(orig, new, new_size, blosc_args=blosc_args) # get the final size orig.seek(0, 2) final_size = orig.tell() orig.seek(0) # the original file minus the compressed size of the last chunk discounted_orig_size = orig_size - last_chunk_compressed_size # size of the appended data # * raw new size, since we have no compression # * uncompressed size of the last chunk # * nchunks + 1 times the blosc and checksum overhead appended_size = new_size + bloscpack_header['last_chunk'] + (nchunks + 1) * (16 + 4) # final size should be original plus appended data nt.assert_equal(final_size, appended_size + discounted_orig_size) # check by unpacking source = CompressedFPSource(orig) sink = PlainFPSink(dcmp) unpack(source, sink) dcmp.seek(0) new.seek(0) new_str = new.read() dcmp_str = dcmp.read() nt.assert_equal(len(dcmp_str), len(new_str * 2)) nt.assert_equal(dcmp_str, new_str * 2)
def test_decode_blosc_header(): array_ = np.linspace(0, 100, 2e4).tostring() # basic test case blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header) # deactivate shuffle blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header) # uncompressible data array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'blocksize': 88, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize } nt.assert_equal(expected, header)
def test_decode_blosc_header(): array_ = np.linspace(0, 100, 2e4).tostring() # basic test case blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header) # deactivate shuffle blosc_args.shuffle = False compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 160000, 'ctbytes': len(compressed), 'version': 2, 'flags': 0, # no shuffle flag 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header) # uncompressible data array_ = np.asarray(np.random.randn(23), dtype=np.float32).tostring() blosc_args.shuffle = True compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = {'versionlz': 1, 'blocksize': 88, 'ctbytes': len(array_) + 16, # original + 16 header bytes 'version': 2, 'flags': 3, # 1 for shuffle 2 for non-compressed 'nbytes': len(array_), 'typesize': blosc_args.typesize} nt.assert_equal(expected, header)
def test_append_mix_shuffle(): orig, new, new_size, dcmp = prep_array_for_append() # use the typesize from the file # deactivate shuffle # crank up the clevel to ensure compression happens, otherwise the flags # will be screwed later on blosc_args = BloscArgs(typesize=None, shuffle=False, clevel=9) # need to create something that will be compressible even without shuffle, # the linspace used in 'new' doesn't work anymore as of python-blosc 1.6.1 to_append = np.zeros(int(2e6)) to_append_fp = StringIO() to_append_fp.write(to_append.tostring()) to_append_fp_size = to_append_fp.tell() to_append_fp.seek(0) # now do the append reset_append_fp(orig, to_append_fp, to_append_fp_size, blosc_args=blosc_args) # decompress 'orig' so that we can examine it source = CompressedFPSource(orig) sink = PlainFPSink(dcmp) unpack(source, sink) orig.seek(0) dcmp.seek(0) new.seek(0) new_str = new.read() dcmp_str = dcmp.read() # now sanity check the length and content of the decompressed nt.assert_equal(len(dcmp_str), len(new_str) + to_append_fp_size) nt.assert_equal(dcmp_str, new_str + to_append.tostring()) # now get the first and the last chunk and check that the shuffle doesn't # match bloscpack_header, offsets = reset_read_beginning(orig)[0:4:3] orig.seek(offsets[0]) checksum_impl = CHECKSUMS_LOOKUP[bloscpack_header['checksum']] compressed_zero, blosc_header_zero, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_zero = blosc.decompress(compressed_zero) orig.seek(offsets[-1]) compressed_last, blosc_header_last, digest = \ _read_compressed_chunk_fp(orig, checksum_impl) decompressed_last = blosc.decompress(compressed_last) # first chunk has shuffle active nt.assert_equal(blosc_header_zero['flags'], 1) # last chunk doesn't nt.assert_equal(blosc_header_last['flags'], 0)
def test_alternate_cname(): for cname, int_id in [ ('blosclz', 0), ('lz4', 1), ('lz4hc', 1), ('snappy', 2), ('zlib', 3), ]: blosc_args = BloscArgs(cname=cname) array_ = np.linspace(0, 1, 2e6) sink = CompressedMemorySink() pack_ndarray(array_, sink, blosc_args=blosc_args) blosc_header = decode_blosc_header(sink.chunks[0]) yield nt.assert_equal, blosc_header['flags'] >> 5, int_id
def test_decode_blosc_header_basic(): array_ = np.linspace(0, 100, int(2e4)).tostring() blosc_args = BloscArgs() compressed = blosc.compress(array_, **blosc_args) header = decode_blosc_header(compressed) expected = { 'versionlz': 1, 'version': 2, 'flags': 1, 'nbytes': len(array_), 'typesize': blosc_args.typesize } header_slice = dict((k, header[k]) for k in expected.keys()) nt.assert_equal(expected, header_slice)
def prep_array_for_append(blosc_args=BloscArgs(), bloscpack_args=BloscpackArgs()): orig, new, dcmp = StringIO(), StringIO(), StringIO() create_array_fp(1, new) new_size = new.tell() new.seek(0) chunking = calculate_nchunks(new_size) source = PlainFPSource(new) sink = CompressedFPSink(orig) pack(source, sink, *chunking, blosc_args=blosc_args, bloscpack_args=bloscpack_args) orig.seek(0) new.seek(0) return orig, new, new_size, dcmp
def test_typesize_is_set_correctly_with_default_blosc_args(): a = np.array([1, 2, 3], dtype='uint8') sink = CompressedMemorySink() pack_ndarray(a, sink) expected_args = BloscArgs(typesize=1) nt.assert_equal(expected_args, sink.blosc_args)
def test_init(self): blosc_args = BloscArgs() self.assertEqual(DEFAULT_TYPESIZE, blosc_args.typesize) self.assertEqual(DEFAULT_CLEVEL, blosc_args.clevel) self.assertEqual(DEFAULT_SHUFFLE, blosc_args.shuffle) self.assertEqual(DEFAULT_CNAME, blosc_args.cname)