Exemplo n.º 1
0
    def test_buffer_with_segments_collection_input(self):
        cctx = zstd.ZstdCompressor(write_checksum=True)

        original = [
            b'foo1',
            b'foo2' * 2,
            b'foo3' * 3,
            b'foo4' * 4,
            b'foo5' * 5,
        ]

        frames = [cctx.compress(c) for c in original]

        b = b''.join([original[0], original[1]])
        b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
                                                    0, len(original[0]),
                                                    len(original[0]), len(original[1])))
        b = b''.join([original[2], original[3], original[4]])
        b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
                                                    0, len(original[2]),
                                                    len(original[2]), len(original[3]),
                                                    len(original[2]) + len(original[3]), len(original[4])))

        c = zstd.BufferWithSegmentsCollection(b1, b2)

        result = cctx.multi_compress_to_buffer(c)

        self.assertEqual(len(result), len(frames))

        for i, frame in enumerate(frames):
            self.assertEqual(result[i].tobytes(), frame)
Exemplo n.º 2
0
    def test_argument_validation(self):
        with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
            zstd.BufferWithSegmentsCollection(None)

        with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
            zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
                                              None)

        with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
            zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
Exemplo n.º 3
0
    def test_arguments(self):
        with self.assertRaises(TypeError):
            zstd.BufferWithSegments()

        with self.assertRaises(TypeError):
            zstd.BufferWithSegments(b'foo')

        # Segments data should be a multiple of 16.
        with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
            zstd.BufferWithSegments(b'foo', b'\x00\x00')
Exemplo n.º 4
0
    def test_arguments(self):
        with self.assertRaises(TypeError):
            zstd.BufferWithSegments()

        with self.assertRaises(TypeError):
            zstd.BufferWithSegments(b"foo")

        # Segments data should be a multiple of 16.
        with self.assertRaisesRegex(
                ValueError, "segments array size is not a multiple of 16"):
            zstd.BufferWithSegments(b"foo", b"\x00\x00")
    def test_buffer_with_segments_collection_input(self):
        cctx = zstd.ZstdCompressor()

        original = [
            b"foo0" * 2,
            b"foo1" * 3,
            b"foo2" * 4,
            b"foo3" * 5,
            b"foo4" * 6,
        ]

        frames = cctx.multi_compress_to_buffer(original)

        # Check round trip.
        dctx = zstd.ZstdDecompressor()

        decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)

        self.assertEqual(len(decompressed), len(original))

        for i, data in enumerate(original):
            self.assertEqual(data, decompressed[i].tobytes())

        # And a manual mode.
        b = b"".join([frames[0].tobytes(), frames[1].tobytes()])
        b1 = zstd.BufferWithSegments(
            b,
            struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]),
                        len(frames[1])),
        )

        b = b"".join(
            [frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
        b2 = zstd.BufferWithSegments(
            b,
            struct.pack(
                "=QQQQQQ",
                0,
                len(frames[2]),
                len(frames[2]),
                len(frames[3]),
                len(frames[2]) + len(frames[3]),
                len(frames[4]),
            ),
        )

        c = zstd.BufferWithSegmentsCollection(b1, b2)

        dctx = zstd.ZstdDecompressor()
        decompressed = dctx.multi_decompress_to_buffer(c)

        self.assertEqual(len(decompressed), 5)
        for i in range(5):
            self.assertEqual(decompressed[i].tobytes(), original[i])
Exemplo n.º 6
0
    def test_arguments(self):
        if not hasattr(zstd, 'BufferWithSegments'):
            self.skipTest('BufferWithSegments not available')

        with self.assertRaises(TypeError):
            zstd.BufferWithSegments()

        with self.assertRaises(TypeError):
            zstd.BufferWithSegments(b'foo')

        # Segments data should be a multiple of 16.
        with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
            zstd.BufferWithSegments(b'foo', b'\x00\x00')
Exemplo n.º 7
0
    def test_argument_validation(self):
        if not hasattr(zstd, 'BufferWithSegmentsCollection'):
            self.skipTest('BufferWithSegmentsCollection not available')

        with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
            zstd.BufferWithSegmentsCollection(None)

        with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
            zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
                                              None)

        with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
            zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
    def test_arguments(self):
        if not hasattr(zstd, "BufferWithSegments"):
            self.skipTest("BufferWithSegments not available")

        with self.assertRaises(TypeError):
            zstd.BufferWithSegments()

        with self.assertRaises(TypeError):
            zstd.BufferWithSegments(b"foo")

        # Segments data should be a multiple of 16.
        with self.assertRaisesRegexp(
                ValueError, "segments array size is not a multiple of 16"):
            zstd.BufferWithSegments(b"foo", b"\x00\x00")
Exemplo n.º 9
0
    def test_argument_validation(self):
        with self.assertRaisesRegex(TypeError,
                                    "arguments must be BufferWithSegments"):
            zstd.BufferWithSegmentsCollection(None)

        with self.assertRaisesRegex(TypeError,
                                    "arguments must be BufferWithSegments"):
            zstd.BufferWithSegmentsCollection(
                zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None)

        with self.assertRaisesRegex(ValueError,
                                    "ZstdBufferWithSegments cannot be empty"):
            zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(
                b"", b""))
Exemplo n.º 10
0
    def test_length(self):
        b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
        b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
                                                          ss.pack(3, 3)]))

        c = zstd.BufferWithSegmentsCollection(b1)
        self.assertEqual(len(c), 1)
        self.assertEqual(c.size(), 3)

        c = zstd.BufferWithSegmentsCollection(b2)
        self.assertEqual(len(c), 2)
        self.assertEqual(c.size(), 6)

        c = zstd.BufferWithSegmentsCollection(b1, b2)
        self.assertEqual(len(c), 3)
        self.assertEqual(c.size(), 9)
Exemplo n.º 11
0
def bench_discrete_compression(chunks,
                               zparams,
                               cover=False,
                               dict_data=None,
                               batch_threads=None):
    total_size = sum(map(len, chunks))

    if dict_data:
        if cover:
            prefix = "compress discrete cover dict"
        else:
            prefix = "compress discrete dict"
    else:
        prefix = "compress discrete"

    for fn in get_benches("discrete", "compress"):
        chunks_arg = chunks

        kwargs = {}
        if fn.threads_arg:
            kwargs["threads"] = batch_threads

        if fn.chunks_as_buffer:
            s = struct.Struct("=QQ")
            offsets = io.BytesIO()
            current_offset = 0
            for chunk in chunks:
                offsets.write(s.pack(current_offset, len(chunk)))
                current_offset += len(chunk)

            chunks_arg = zstd.BufferWithSegments(b"".join(chunks),
                                                 offsets.getvalue())

        results = timer(lambda: fn(chunks_arg, zparams, **kwargs))
        format_results(results, fn.title, prefix, total_size)
Exemplo n.º 12
0
def bench_discrete_compression(chunks, opts, cover=False, threads=None):
    total_size = sum(map(len, chunks))

    if 'dict_data' in opts:
        if cover:
            prefix = 'compress discrete cover dict'
        else:
            prefix = 'compress discrete dict'
    else:
        prefix = 'compress discrete'

    for fn in get_benches('discrete', 'compress'):
        chunks_arg = chunks

        kwargs = {}
        if fn.threads_arg:
            kwargs['threads'] = threads

        if fn.chunks_as_buffer:
            s = struct.Struct('=QQ')
            offsets = io.BytesIO()
            current_offset = 0
            for chunk in chunks:
                offsets.write(s.pack(current_offset, len(chunk)))
                current_offset += len(chunk)

            chunks_arg = zstd.BufferWithSegments(b''.join(chunks),
                                                 offsets.getvalue())

        results = timer(lambda: fn(chunks_arg, opts, **kwargs))
        format_results(results, fn.title, prefix, total_size)
    def test_invalid_offset(self):
        if not hasattr(zstd, "BufferWithSegments"):
            self.skipTest("BufferWithSegments not available")

        with self.assertRaisesRegexp(
                ValueError, "offset within segments array references memory"):
            zstd.BufferWithSegments(b"foo", ss.pack(0, 4))
Exemplo n.º 14
0
    def test_getitem(self):
        b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
        b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
                                                          ss.pack(3, 3)]))

        c = zstd.BufferWithSegmentsCollection(b1, b2)

        with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
            c[3]

        with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
            c[4]

        self.assertEqual(c[0].tobytes(), b'foo')
        self.assertEqual(c[1].tobytes(), b'bar')
        self.assertEqual(c[2].tobytes(), b'baz')
    def test_buffer_with_segments_sizes(self):
        cctx = zstd.ZstdCompressor(write_content_size=False)
        original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
        frames = [cctx.compress(d) for d in original]
        sizes = struct.pack("=" + "Q" * len(original), *map(len, original))

        dctx = zstd.ZstdDecompressor()

        segments = struct.pack(
            "=QQQQQQ",
            0,
            len(frames[0]),
            len(frames[0]),
            len(frames[1]),
            len(frames[0]) + len(frames[1]),
            len(frames[2]),
        )
        b = zstd.BufferWithSegments(b"".join(frames), segments)

        result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)

        self.assertEqual(len(result), len(frames))
        self.assertEqual(result.size(), sum(map(len, original)))

        for i, data in enumerate(original):
            self.assertEqual(result[i].tobytes(), data)
    def test_argument_validation(self):
        if not hasattr(zstd, "BufferWithSegmentsCollection"):
            self.skipTest("BufferWithSegmentsCollection not available")

        with self.assertRaisesRegexp(TypeError,
                                     "arguments must be BufferWithSegments"):
            zstd.BufferWithSegmentsCollection(None)

        with self.assertRaisesRegexp(TypeError,
                                     "arguments must be BufferWithSegments"):
            zstd.BufferWithSegmentsCollection(
                zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None)

        with self.assertRaisesRegexp(ValueError,
                                     "ZstdBufferWithSegments cannot be empty"):
            zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(
                b"", b""))
Exemplo n.º 17
0
    def test_getitem(self):
        b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
        b2 = zstd.BufferWithSegments(b"barbaz",
                                     b"".join([ss.pack(0, 3),
                                               ss.pack(3, 3)]))

        c = zstd.BufferWithSegmentsCollection(b1, b2)

        with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
            c[3]

        with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
            c[4]

        self.assertEqual(c[0].tobytes(), b"foo")
        self.assertEqual(c[1].tobytes(), b"bar")
        self.assertEqual(c[2].tobytes(), b"baz")
Exemplo n.º 18
0
    def test_single(self):
        b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
        self.assertEqual(len(b), 1)
        self.assertEqual(b.size, 3)
        self.assertEqual(b.tobytes(), b'foo')

        self.assertEqual(len(b[0]), 3)
        self.assertEqual(b[0].offset, 0)
        self.assertEqual(b[0].tobytes(), b'foo')
Exemplo n.º 19
0
    def test_length(self):
        if not hasattr(zstd, 'BufferWithSegmentsCollection'):
            self.skipTest('BufferWithSegmentsCollection not available')

        b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
        b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
                                                          ss.pack(3, 3)]))

        c = zstd.BufferWithSegmentsCollection(b1)
        self.assertEqual(len(c), 1)
        self.assertEqual(c.size(), 3)

        c = zstd.BufferWithSegmentsCollection(b2)
        self.assertEqual(len(c), 2)
        self.assertEqual(c.size(), 6)

        c = zstd.BufferWithSegmentsCollection(b1, b2)
        self.assertEqual(len(c), 3)
        self.assertEqual(c.size(), 9)
Exemplo n.º 20
0
    def test_multiple(self):
        b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
                                                               ss.pack(3, 4),
                                                               ss.pack(7, 5)]))
        self.assertEqual(len(b), 3)
        self.assertEqual(b.size, 12)
        self.assertEqual(b.tobytes(), b'foofooxfooxy')

        self.assertEqual(b[0].tobytes(), b'foo')
        self.assertEqual(b[1].tobytes(), b'foox')
        self.assertEqual(b[2].tobytes(), b'fooxy')
Exemplo n.º 21
0
    def test_invalid_getitem(self):
        b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))

        with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
            test = b[-10]

        with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
            test = b[1]

        with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
            test = b[2]
Exemplo n.º 22
0
    def test_length(self):
        if not hasattr(zstd, "BufferWithSegmentsCollection"):
            self.skipTest("BufferWithSegmentsCollection not available")

        b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
        b2 = zstd.BufferWithSegments(
            b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
        )

        c = zstd.BufferWithSegmentsCollection(b1)
        self.assertEqual(len(c), 1)
        self.assertEqual(c.size(), 3)

        c = zstd.BufferWithSegmentsCollection(b2)
        self.assertEqual(len(c), 2)
        self.assertEqual(c.size(), 6)

        c = zstd.BufferWithSegmentsCollection(b1, b2)
        self.assertEqual(len(c), 3)
        self.assertEqual(c.size(), 9)
Exemplo n.º 23
0
    def test_single(self):
        if not hasattr(zstd, "BufferWithSegments"):
            self.skipTest("BufferWithSegments not available")

        b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
        self.assertEqual(len(b), 1)
        self.assertEqual(b.size, 3)
        self.assertEqual(b.tobytes(), b"foo")

        self.assertEqual(len(b[0]), 3)
        self.assertEqual(b[0].offset, 0)
        self.assertEqual(b[0].tobytes(), b"foo")
Exemplo n.º 24
0
    def test_single(self):
        if not hasattr(zstd, 'BufferWithSegments'):
            self.skipTest('BufferWithSegments not available')

        b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
        self.assertEqual(len(b), 1)
        self.assertEqual(b.size, 3)
        self.assertEqual(b.tobytes(), b'foo')

        self.assertEqual(len(b[0]), 3)
        self.assertEqual(b[0].offset, 0)
        self.assertEqual(b[0].tobytes(), b'foo')
Exemplo n.º 25
0
    def test_invalid_getitem(self):
        if not hasattr(zstd, "BufferWithSegments"):
            self.skipTest("BufferWithSegments not available")

        b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))

        with self.assertRaisesRegex(IndexError, "offset must be non-negative"):
            test = b[-10]

        with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
            test = b[1]

        with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
            test = b[2]
Exemplo n.º 26
0
    def test_multiple(self):
        b = zstd.BufferWithSegments(
            b"foofooxfooxy",
            b"".join([ss.pack(0, 3),
                      ss.pack(3, 4),
                      ss.pack(7, 5)]),
        )
        self.assertEqual(len(b), 3)
        self.assertEqual(b.size, 12)
        self.assertEqual(b.tobytes(), b"foofooxfooxy")

        self.assertEqual(b[0].tobytes(), b"foo")
        self.assertEqual(b[1].tobytes(), b"foox")
        self.assertEqual(b[2].tobytes(), b"fooxy")
Exemplo n.º 27
0
    def test_invalid_getitem(self):
        if not hasattr(zstd, 'BufferWithSegments'):
            self.skipTest('BufferWithSegments not available')

        b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))

        with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
            test = b[-10]

        with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
            test = b[1]

        with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
            test = b[2]
Exemplo n.º 28
0
    def test_multiple(self):
        if not hasattr(zstd, 'BufferWithSegments'):
            self.skipTest('BufferWithSegments not available')

        b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
                                                               ss.pack(3, 4),
                                                               ss.pack(7, 5)]))
        self.assertEqual(len(b), 3)
        self.assertEqual(b.size, 12)
        self.assertEqual(b.tobytes(), b'foofooxfooxy')

        self.assertEqual(b[0].tobytes(), b'foo')
        self.assertEqual(b[1].tobytes(), b'foox')
        self.assertEqual(b[2].tobytes(), b'fooxy')
Exemplo n.º 29
0
def bench_discrete_decompression(
    orig_chunks,
    compressed_chunks,
    total_size,
    zparams,
    dict_data=None,
    batch_threads=None,
):
    dopts = {}
    if dict_data:
        dopts["dict_data"] = dict_data
        prefix = "decompress discrete dict"
    else:
        prefix = "decompress discrete"

    for fn in get_benches("discrete", "decompress"):
        if not zparams.write_content_size and fn.require_content_size:
            continue

        chunks_arg = compressed_chunks

        kwargs = {}
        if fn.threads_arg:
            kwargs["threads"] = batch_threads

        # Pass compressed frames in a BufferWithSegments rather than a list
        # of bytes.
        if fn.chunks_as_buffer:
            s = struct.Struct("=QQ")
            offsets = io.BytesIO()
            current_offset = 0
            for chunk in compressed_chunks:
                offsets.write(s.pack(current_offset, len(chunk)))
                current_offset += len(chunk)

            chunks_arg = zstd.BufferWithSegments(
                b"".join(compressed_chunks), offsets.getvalue()
            )

        if fn.decompressed_sizes_arg:
            # Ideally we'd use array.array here. But Python 2 doesn't support the
            # Q format.
            s = struct.Struct("=Q")
            kwargs["decompressed_sizes"] = b"".join(
                s.pack(len(c)) for c in orig_chunks
            )

        results = timer(lambda: fn(chunks_arg, dopts, **kwargs))
        format_results(results, fn.title, prefix, total_size)
Exemplo n.º 30
0
    def test_multiple(self):
        if not hasattr(zstd, "BufferWithSegments"):
            self.skipTest("BufferWithSegments not available")

        b = zstd.BufferWithSegments(
            b"foofooxfooxy",
            b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]),
        )
        self.assertEqual(len(b), 3)
        self.assertEqual(b.size, 12)
        self.assertEqual(b.tobytes(), b"foofooxfooxy")

        self.assertEqual(b[0].tobytes(), b"foo")
        self.assertEqual(b[1].tobytes(), b"foox")
        self.assertEqual(b[2].tobytes(), b"fooxy")