def test_buffer_with_segments_collection_input(self): cctx = zstd.ZstdCompressor(write_checksum=True) original = [ b'foo1', b'foo2' * 2, b'foo3' * 3, b'foo4' * 4, b'foo5' * 5, ] frames = [cctx.compress(c) for c in original] b = b''.join([original[0], original[1]]) b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', 0, len(original[0]), len(original[0]), len(original[1]))) b = b''.join([original[2], original[3], original[4]]) b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', 0, len(original[2]), len(original[2]), len(original[3]), len(original[2]) + len(original[3]), len(original[4]))) c = zstd.BufferWithSegmentsCollection(b1, b2) result = cctx.multi_compress_to_buffer(c) self.assertEqual(len(result), len(frames)) for i, frame in enumerate(frames): self.assertEqual(result[i].tobytes(), frame)
def test_argument_validation(self): with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): zstd.BufferWithSegmentsCollection(None) with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)), None) with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
def test_arguments(self): with self.assertRaises(TypeError): zstd.BufferWithSegments() with self.assertRaises(TypeError): zstd.BufferWithSegments(b'foo') # Segments data should be a multiple of 16. with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): zstd.BufferWithSegments(b'foo', b'\x00\x00')
def test_arguments(self): with self.assertRaises(TypeError): zstd.BufferWithSegments() with self.assertRaises(TypeError): zstd.BufferWithSegments(b"foo") # Segments data should be a multiple of 16. with self.assertRaisesRegex( ValueError, "segments array size is not a multiple of 16"): zstd.BufferWithSegments(b"foo", b"\x00\x00")
def test_buffer_with_segments_collection_input(self): cctx = zstd.ZstdCompressor() original = [ b"foo0" * 2, b"foo1" * 3, b"foo2" * 4, b"foo3" * 5, b"foo4" * 6, ] frames = cctx.multi_compress_to_buffer(original) # Check round trip. dctx = zstd.ZstdDecompressor() decompressed = dctx.multi_decompress_to_buffer(frames, threads=3) self.assertEqual(len(decompressed), len(original)) for i, data in enumerate(original): self.assertEqual(data, decompressed[i].tobytes()) # And a manual mode. b = b"".join([frames[0].tobytes(), frames[1].tobytes()]) b1 = zstd.BufferWithSegments( b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])), ) b = b"".join( [frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) b2 = zstd.BufferWithSegments( b, struct.pack( "=QQQQQQ", 0, len(frames[2]), len(frames[2]), len(frames[3]), len(frames[2]) + len(frames[3]), len(frames[4]), ), ) c = zstd.BufferWithSegmentsCollection(b1, b2) dctx = zstd.ZstdDecompressor() decompressed = dctx.multi_decompress_to_buffer(c) self.assertEqual(len(decompressed), 5) for i in range(5): self.assertEqual(decompressed[i].tobytes(), original[i])
def test_arguments(self): if not hasattr(zstd, 'BufferWithSegments'): self.skipTest('BufferWithSegments not available') with self.assertRaises(TypeError): zstd.BufferWithSegments() with self.assertRaises(TypeError): zstd.BufferWithSegments(b'foo') # Segments data should be a multiple of 16. with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): zstd.BufferWithSegments(b'foo', b'\x00\x00')
def test_argument_validation(self): if not hasattr(zstd, 'BufferWithSegmentsCollection'): self.skipTest('BufferWithSegmentsCollection not available') with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): zstd.BufferWithSegmentsCollection(None) with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)), None) with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
def test_arguments(self): if not hasattr(zstd, "BufferWithSegments"): self.skipTest("BufferWithSegments not available") with self.assertRaises(TypeError): zstd.BufferWithSegments() with self.assertRaises(TypeError): zstd.BufferWithSegments(b"foo") # Segments data should be a multiple of 16. with self.assertRaisesRegexp( ValueError, "segments array size is not a multiple of 16"): zstd.BufferWithSegments(b"foo", b"\x00\x00")
def test_argument_validation(self): with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): zstd.BufferWithSegmentsCollection(None) with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): zstd.BufferWithSegmentsCollection( zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None) with self.assertRaisesRegex(ValueError, "ZstdBufferWithSegments cannot be empty"): zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments( b"", b""))
def test_length(self): b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), ss.pack(3, 3)])) c = zstd.BufferWithSegmentsCollection(b1) self.assertEqual(len(c), 1) self.assertEqual(c.size(), 3) c = zstd.BufferWithSegmentsCollection(b2) self.assertEqual(len(c), 2) self.assertEqual(c.size(), 6) c = zstd.BufferWithSegmentsCollection(b1, b2) self.assertEqual(len(c), 3) self.assertEqual(c.size(), 9)
def bench_discrete_compression(chunks, zparams, cover=False, dict_data=None, batch_threads=None): total_size = sum(map(len, chunks)) if dict_data: if cover: prefix = "compress discrete cover dict" else: prefix = "compress discrete dict" else: prefix = "compress discrete" for fn in get_benches("discrete", "compress"): chunks_arg = chunks kwargs = {} if fn.threads_arg: kwargs["threads"] = batch_threads if fn.chunks_as_buffer: s = struct.Struct("=QQ") offsets = io.BytesIO() current_offset = 0 for chunk in chunks: offsets.write(s.pack(current_offset, len(chunk))) current_offset += len(chunk) chunks_arg = zstd.BufferWithSegments(b"".join(chunks), offsets.getvalue()) results = timer(lambda: fn(chunks_arg, zparams, **kwargs)) format_results(results, fn.title, prefix, total_size)
def bench_discrete_compression(chunks, opts, cover=False, threads=None): total_size = sum(map(len, chunks)) if 'dict_data' in opts: if cover: prefix = 'compress discrete cover dict' else: prefix = 'compress discrete dict' else: prefix = 'compress discrete' for fn in get_benches('discrete', 'compress'): chunks_arg = chunks kwargs = {} if fn.threads_arg: kwargs['threads'] = threads if fn.chunks_as_buffer: s = struct.Struct('=QQ') offsets = io.BytesIO() current_offset = 0 for chunk in chunks: offsets.write(s.pack(current_offset, len(chunk))) current_offset += len(chunk) chunks_arg = zstd.BufferWithSegments(b''.join(chunks), offsets.getvalue()) results = timer(lambda: fn(chunks_arg, opts, **kwargs)) format_results(results, fn.title, prefix, total_size)
def test_invalid_offset(self): if not hasattr(zstd, "BufferWithSegments"): self.skipTest("BufferWithSegments not available") with self.assertRaisesRegexp( ValueError, "offset within segments array references memory"): zstd.BufferWithSegments(b"foo", ss.pack(0, 4))
def test_getitem(self): b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), ss.pack(3, 3)])) c = zstd.BufferWithSegmentsCollection(b1, b2) with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'): c[3] with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'): c[4] self.assertEqual(c[0].tobytes(), b'foo') self.assertEqual(c[1].tobytes(), b'bar') self.assertEqual(c[2].tobytes(), b'baz')
def test_buffer_with_segments_sizes(self): cctx = zstd.ZstdCompressor(write_content_size=False) original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] frames = [cctx.compress(d) for d in original] sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) dctx = zstd.ZstdDecompressor() segments = struct.pack( "=QQQQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]), len(frames[0]) + len(frames[1]), len(frames[2]), ) b = zstd.BufferWithSegments(b"".join(frames), segments) result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) self.assertEqual(len(result), len(frames)) self.assertEqual(result.size(), sum(map(len, original))) for i, data in enumerate(original): self.assertEqual(result[i].tobytes(), data)
def test_argument_validation(self): if not hasattr(zstd, "BufferWithSegmentsCollection"): self.skipTest("BufferWithSegmentsCollection not available") with self.assertRaisesRegexp(TypeError, "arguments must be BufferWithSegments"): zstd.BufferWithSegmentsCollection(None) with self.assertRaisesRegexp(TypeError, "arguments must be BufferWithSegments"): zstd.BufferWithSegmentsCollection( zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None) with self.assertRaisesRegexp(ValueError, "ZstdBufferWithSegments cannot be empty"): zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments( b"", b""))
def test_getitem(self): b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) b2 = zstd.BufferWithSegments(b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])) c = zstd.BufferWithSegmentsCollection(b1, b2) with self.assertRaisesRegex(IndexError, "offset must be less than 3"): c[3] with self.assertRaisesRegex(IndexError, "offset must be less than 3"): c[4] self.assertEqual(c[0].tobytes(), b"foo") self.assertEqual(c[1].tobytes(), b"bar") self.assertEqual(c[2].tobytes(), b"baz")
def test_single(self): b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) self.assertEqual(len(b), 1) self.assertEqual(b.size, 3) self.assertEqual(b.tobytes(), b'foo') self.assertEqual(len(b[0]), 3) self.assertEqual(b[0].offset, 0) self.assertEqual(b[0].tobytes(), b'foo')
def test_length(self): if not hasattr(zstd, 'BufferWithSegmentsCollection'): self.skipTest('BufferWithSegmentsCollection not available') b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), ss.pack(3, 3)])) c = zstd.BufferWithSegmentsCollection(b1) self.assertEqual(len(c), 1) self.assertEqual(c.size(), 3) c = zstd.BufferWithSegmentsCollection(b2) self.assertEqual(len(c), 2) self.assertEqual(c.size(), 6) c = zstd.BufferWithSegmentsCollection(b1, b2) self.assertEqual(len(c), 3) self.assertEqual(c.size(), 9)
def test_multiple(self): b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)])) self.assertEqual(len(b), 3) self.assertEqual(b.size, 12) self.assertEqual(b.tobytes(), b'foofooxfooxy') self.assertEqual(b[0].tobytes(), b'foo') self.assertEqual(b[1].tobytes(), b'foox') self.assertEqual(b[2].tobytes(), b'fooxy')
def test_invalid_getitem(self): b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'): test = b[-10] with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): test = b[1] with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): test = b[2]
def test_length(self): if not hasattr(zstd, "BufferWithSegmentsCollection"): self.skipTest("BufferWithSegmentsCollection not available") b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) b2 = zstd.BufferWithSegments( b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) ) c = zstd.BufferWithSegmentsCollection(b1) self.assertEqual(len(c), 1) self.assertEqual(c.size(), 3) c = zstd.BufferWithSegmentsCollection(b2) self.assertEqual(len(c), 2) self.assertEqual(c.size(), 6) c = zstd.BufferWithSegmentsCollection(b1, b2) self.assertEqual(len(c), 3) self.assertEqual(c.size(), 9)
def test_single(self): if not hasattr(zstd, "BufferWithSegments"): self.skipTest("BufferWithSegments not available") b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) self.assertEqual(len(b), 1) self.assertEqual(b.size, 3) self.assertEqual(b.tobytes(), b"foo") self.assertEqual(len(b[0]), 3) self.assertEqual(b[0].offset, 0) self.assertEqual(b[0].tobytes(), b"foo")
def test_single(self): if not hasattr(zstd, 'BufferWithSegments'): self.skipTest('BufferWithSegments not available') b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) self.assertEqual(len(b), 1) self.assertEqual(b.size, 3) self.assertEqual(b.tobytes(), b'foo') self.assertEqual(len(b[0]), 3) self.assertEqual(b[0].offset, 0) self.assertEqual(b[0].tobytes(), b'foo')
def test_invalid_getitem(self): if not hasattr(zstd, "BufferWithSegments"): self.skipTest("BufferWithSegments not available") b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) with self.assertRaisesRegex(IndexError, "offset must be non-negative"): test = b[-10] with self.assertRaisesRegex(IndexError, "offset must be less than 1"): test = b[1] with self.assertRaisesRegex(IndexError, "offset must be less than 1"): test = b[2]
def test_multiple(self): b = zstd.BufferWithSegments( b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]), ) self.assertEqual(len(b), 3) self.assertEqual(b.size, 12) self.assertEqual(b.tobytes(), b"foofooxfooxy") self.assertEqual(b[0].tobytes(), b"foo") self.assertEqual(b[1].tobytes(), b"foox") self.assertEqual(b[2].tobytes(), b"fooxy")
def test_invalid_getitem(self): if not hasattr(zstd, 'BufferWithSegments'): self.skipTest('BufferWithSegments not available') b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'): test = b[-10] with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): test = b[1] with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): test = b[2]
def test_multiple(self): if not hasattr(zstd, 'BufferWithSegments'): self.skipTest('BufferWithSegments not available') b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)])) self.assertEqual(len(b), 3) self.assertEqual(b.size, 12) self.assertEqual(b.tobytes(), b'foofooxfooxy') self.assertEqual(b[0].tobytes(), b'foo') self.assertEqual(b[1].tobytes(), b'foox') self.assertEqual(b[2].tobytes(), b'fooxy')
def bench_discrete_decompression( orig_chunks, compressed_chunks, total_size, zparams, dict_data=None, batch_threads=None, ): dopts = {} if dict_data: dopts["dict_data"] = dict_data prefix = "decompress discrete dict" else: prefix = "decompress discrete" for fn in get_benches("discrete", "decompress"): if not zparams.write_content_size and fn.require_content_size: continue chunks_arg = compressed_chunks kwargs = {} if fn.threads_arg: kwargs["threads"] = batch_threads # Pass compressed frames in a BufferWithSegments rather than a list # of bytes. if fn.chunks_as_buffer: s = struct.Struct("=QQ") offsets = io.BytesIO() current_offset = 0 for chunk in compressed_chunks: offsets.write(s.pack(current_offset, len(chunk))) current_offset += len(chunk) chunks_arg = zstd.BufferWithSegments( b"".join(compressed_chunks), offsets.getvalue() ) if fn.decompressed_sizes_arg: # Ideally we'd use array.array here. But Python 2 doesn't support the # Q format. s = struct.Struct("=Q") kwargs["decompressed_sizes"] = b"".join( s.pack(len(c)) for c in orig_chunks ) results = timer(lambda: fn(chunks_arg, dopts, **kwargs)) format_results(results, fn.title, prefix, total_size)
def test_multiple(self): if not hasattr(zstd, "BufferWithSegments"): self.skipTest("BufferWithSegments not available") b = zstd.BufferWithSegments( b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]), ) self.assertEqual(len(b), 3) self.assertEqual(b.size, 12) self.assertEqual(b.tobytes(), b"foofooxfooxy") self.assertEqual(b[0].tobytes(), b"foo") self.assertEqual(b[1].tobytes(), b"foox") self.assertEqual(b[2].tobytes(), b"fooxy")