示例#1
0
    def test_invalid_type(self):
        with self.assertRaises(TypeError):
            zstd.get_frame_parameters(None)

        # Python 3 doesn't appear to convert unicode to Py_buffer.
        if sys.version_info[0] >= 3:
            with self.assertRaises(TypeError):
                zstd.get_frame_parameters(u'foobarbaz')
        else:
            # CPython will convert unicode to Py_buffer. But CFFI won't.
            if zstd.backend == 'cffi':
                with self.assertRaises(TypeError):
                    zstd.get_frame_parameters(u'foobarbaz')
            else:
                with self.assertRaises(zstd.ZstdError):
                    zstd.get_frame_parameters(u'foobarbaz')
    def test_multithreaded(self):
        chunk_size = multithreaded_chunk_size(1)
        source = b''.join([b'x' * chunk_size, b'y' * chunk_size])

        cctx = zstd.ZstdCompressor(level=1, threads=2)
        compressed = cctx.compress(source)

        params = zstd.get_frame_parameters(compressed)
        self.assertEqual(params.content_size, chunk_size * 2)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        dctx = zstd.ZstdDecompressor()
        self.assertEqual(dctx.decompress(compressed), source)
    def test_write_checksum(self):
        source = io.BytesIO(b'foobar')
        no_checksum = io.BytesIO()

        cctx = zstd.ZstdCompressor(level=1)
        cctx.copy_stream(source, no_checksum)

        source.seek(0)
        with_checksum = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
        cctx.copy_stream(source, with_checksum)

        self.assertEqual(len(with_checksum.getvalue()),
                         len(no_checksum.getvalue()) + 4)

        no_params = zstd.get_frame_parameters(no_checksum.getvalue())
        with_params = zstd.get_frame_parameters(with_checksum.getvalue())
        self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(no_params.dict_id, 0)
        self.assertEqual(with_params.dict_id, 0)
        self.assertFalse(no_params.has_checksum)
        self.assertTrue(with_params.has_checksum)
    def test_empty(self):
        buffer = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
        with cctx.stream_writer(buffer) as compressor:
            compressor.write(b'')

        result = buffer.getvalue()
        self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')

        params = zstd.get_frame_parameters(result)
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 524288)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)
    def test_invalid_type(self):
        with self.assertRaises(TypeError):
            zstd.get_frame_parameters(None)

        # Python 3 doesn't appear to convert unicode to Py_buffer.
        if sys.version_info[0] >= 3:
            with self.assertRaises(TypeError):
                zstd.get_frame_parameters(u'foobarbaz')
        else:
            # CPython will convert unicode to Py_buffer. But CFFI won't.
            if zstd.backend == 'cffi':
                with self.assertRaises(TypeError):
                    zstd.get_frame_parameters(u'foobarbaz')
            else:
                with self.assertRaises(zstd.ZstdError):
                    zstd.get_frame_parameters(u'foobarbaz')
示例#6
0
    def test_write_content_size(self):
        no_size = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
        with cctx.stream_writer(no_size, closefd=False) as compressor:
            self.assertEqual(compressor.write(b"foobar" * 256),
                             len(b"foobar" * 256))

        with_size = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=1)
        with cctx.stream_writer(with_size, closefd=False) as compressor:
            self.assertEqual(compressor.write(b"foobar" * 256),
                             len(b"foobar" * 256))

        # Source size is not known in streaming mode, so header not
        # written.
        self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))

        # Declaring size will write the header.
        with_size = io.BytesIO()
        with cctx.stream_writer(with_size,
                                size=len(b"foobar" * 256),
                                closefd=False) as compressor:
            self.assertEqual(compressor.write(b"foobar" * 256),
                             len(b"foobar" * 256))

        no_params = zstd.get_frame_parameters(no_size.getvalue())
        with_params = zstd.get_frame_parameters(with_size.getvalue())
        self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(with_params.content_size, 1536)
        self.assertEqual(no_params.dict_id, 0)
        self.assertEqual(with_params.dict_id, 0)
        self.assertFalse(no_params.has_checksum)
        self.assertFalse(with_params.has_checksum)

        self.assertEqual(len(with_size.getvalue()),
                         len(no_size.getvalue()) + 1)
    def test_read_large(self):
        cctx = zstd.ZstdCompressor(level=1)

        source = io.BytesIO()
        source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
        source.write(b'o')
        source.seek(0)

        # Creating an iterator should not perform any compression until
        # first read.
        it = cctx.read_to_iter(source, size=len(source.getvalue()))
        self.assertEqual(source.tell(), 0)

        # We should have exactly 2 output chunks.
        chunks = []
        chunk = next(it)
        self.assertIsNotNone(chunk)
        self.assertEqual(source.tell(),
                         zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
        chunks.append(chunk)
        chunk = next(it)
        self.assertIsNotNone(chunk)
        chunks.append(chunk)

        self.assertEqual(source.tell(), len(source.getvalue()))

        with self.assertRaises(StopIteration):
            next(it)

        # And again for good measure.
        with self.assertRaises(StopIteration):
            next(it)

        # We should get the same output as the one-shot compression mechanism.
        self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))

        params = zstd.get_frame_parameters(b''.join(chunks))
        self.assertEqual(params.content_size, 0)
        self.assertEqual(params.window_size, 262144)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Now check the buffer protocol.
        it = cctx.read_to_iter(source.getvalue())
        chunks = list(it)
        self.assertEqual(len(chunks), 2)
        self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
    def test_compress_empty(self):
        cctx = zstd.ZstdCompressor(level=1)
        result = cctx.compress(b'')
        self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
        params = zstd.get_frame_parameters(result)
        self.assertEqual(params.content_size, 0)
        self.assertEqual(params.window_size, 524288)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum, 0)

        # TODO should be temporary until https://github.com/facebook/zstd/issues/506
        # is fixed.
        cctx = zstd.ZstdCompressor(write_content_size=True)
        with self.assertRaises(ValueError):
            cctx.compress(b'')

        cctx.compress(b'', allow_empty=True)
示例#9
0
    def test_compressobj_large(self):
        chunks = []
        for i in range(255):
            chunks.append(struct.Struct('>B').pack(i) * 16384)

        cctx = zstd.ZstdCompressor(level=3)
        cobj = cctx.compressobj()

        result = cobj.compress(b''.join(chunks)) + cobj.flush()
        self.assertEqual(len(result), 999)
        self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')

        params = zstd.get_frame_parameters(result)
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1048576)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)
示例#10
0
    def test_large_data(self):
        source = io.BytesIO()
        for i in range(255):
            source.write(struct.Struct('>B').pack(i) * 16384)
        source.seek(0)

        dest = io.BytesIO()
        cctx = zstd.ZstdCompressor()
        r, w = cctx.copy_stream(source, dest)

        self.assertEqual(r, 255 * 16384)
        self.assertEqual(w, 999)

        params = zstd.get_frame_parameters(dest.getvalue())
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1048576)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)
    def test_input_types(self):
        v = zstd.FRAME_HEADER + b'\x00\x00'

        mutable_array = bytearray(len(v))
        mutable_array[:] = v

        sources = [
            memoryview(v),
            bytearray(v),
            mutable_array,
        ]

        for source in sources:
            params = zstd.get_frame_parameters(source)
            self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
            self.assertEqual(params.window_size, 1024)
            self.assertEqual(params.dict_id, 0)
            self.assertFalse(params.has_checksum)
    def test_input_types(self):
        v = zstd.FRAME_HEADER + b"\x00\x00"

        mutable_array = bytearray(len(v))
        mutable_array[:] = v

        sources = [
            memoryview(v),
            bytearray(v),
            mutable_array,
        ]

        for source in sources:
            params = zstd.get_frame_parameters(source)
            self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
            self.assertEqual(params.window_size, 1024)
            self.assertEqual(params.dict_id, 0)
            self.assertFalse(params.has_checksum)
示例#13
0
    def test_multithreaded_dict(self):
        samples = []
        for i in range(128):
            samples.append(b'foo' * 64)
            samples.append(b'bar' * 64)
            samples.append(b'foobar' * 64)

        d = zstd.train_dictionary(1024, samples)

        cctx = zstd.ZstdCompressor(dict_data=d, threads=2)

        result = cctx.compress(b'foo')
        params = zstd.get_frame_parameters(result);
        self.assertEqual(params.content_size, 3);
        self.assertEqual(params.dict_id, d.dict_id())

        self.assertEqual(result,
                         b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00'
                         b'\x66\x6f\x6f')
示例#14
0
    def test_compression_params(self):
        params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10,
                                            zstd.STRATEGY_FAST)

        buffer = io.BytesIO()
        cctx = zstd.ZstdCompressor(compression_params=params)
        with cctx.write_to(buffer) as compressor:
            self.assertEqual(compressor.write(b'foo'), 0)
            self.assertEqual(compressor.write(b'bar'), 0)
            self.assertEqual(compressor.write(b'foobar' * 16384), 0)

        compressed = buffer.getvalue()

        params = zstd.get_frame_parameters(compressed)
        self.assertEqual(params.content_size, 0)
        self.assertEqual(params.window_size, 1048576)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        h = hashlib.sha1(compressed).hexdigest()
        self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99')
    def test_attributes(self):
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00")
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff")
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 255)
        self.assertFalse(params.has_checksum)

        # Lowest 3rd bit indicates if checksum is present.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00")
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 0)
        self.assertTrue(params.has_checksum)

        # Upper 2 bits indicate content size.
        params = zstd.get_frame_parameters(
            zstd.FRAME_HEADER + b"\x40\x00\xff\x00"
        )
        self.assertEqual(params.content_size, 511)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Window descriptor is 2nd byte after frame header.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40")
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 262144)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Set multiple things.
        params = zstd.get_frame_parameters(
            zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00"
        )
        self.assertEqual(params.content_size, 272)
        self.assertEqual(params.window_size, 262144)
        self.assertEqual(params.dict_id, 15)
        self.assertTrue(params.has_checksum)
示例#16
0
    def test_dictionary(self):
        samples = []
        for i in range(128):
            samples.append(b'foo' * 64)
            samples.append(b'bar' * 64)
            samples.append(b'foobar' * 64)

        d = zstd.train_dictionary(8192, samples)

        h = hashlib.sha1(d.as_bytes()).hexdigest()
        self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79')

        buffer = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=9, dict_data=d)
        with cctx.stream_writer(buffer) as compressor:
            self.assertEqual(compressor.write(b'foo'), 0)
            self.assertEqual(compressor.write(b'bar'), 0)
            self.assertEqual(compressor.write(b'foo' * 16384), 0)

        compressed = buffer.getvalue()

        params = zstd.get_frame_parameters(compressed)
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 2097152)
        self.assertEqual(params.dict_id, d.dict_id())
        self.assertFalse(params.has_checksum)

        h = hashlib.sha1(compressed).hexdigest()
        self.assertEqual(h, 'd118cd7a008c7b8f416aa3a5f609eab4c629af95')

        source = b'foo' + b'bar' + (b'foo' * 16384)

        dctx = zstd.ZstdDecompressor(dict_data=d)

        self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
                         source)
示例#17
0
    def from_file(cls: Type[DB], path: Union[str, PathLike], create_new=False) -> DB:
        """Load a Database from a path."""
        path = Path(path)
        if not path.exists() and create_new:
            logger = logging.getLogger(__name__)
            logger.warning(
                "Database file does not exist. Starting with blank database."
            )
            return cls()

        if path.suffix == ".gz":
            with gzip.open(path, "rb") as f:
                s = f.read()
        elif path.suffix == ".zst":
            with open(path, "rb") as f:
                c = f.read()
                has_checksum, checksum = (
                    zstd.get_frame_parameters(c).has_checksum,
                    c[-4:],
                )
                s = zstd.decompress(c)
                del c
                s_hash = xxhash.xxh64_digest(s)
                if has_checksum and checksum != s_hash[-4:][::-1]:
                    raise DatabaseException(
                        f"zstd content checksum verification failed: "
                        f"{checksum.hex()} != {s_hash.hex()}"
                    )
        else:
            with open(path, "rb") as f:
                s = f.read()

        db = orjson.loads(s)
        del s
        db = cls.from_dict(db)
        return db
示例#18
0
    def test_dictionary(self):
        samples = []
        for i in range(128):
            samples.append(b"foo" * 64)
            samples.append(b"bar" * 64)
            samples.append(b"foobar" * 64)

        d = zstd.train_dictionary(8192, samples)

        h = hashlib.sha1(d.as_bytes()).hexdigest()
        self.assertEqual(h, "e739fb6cecd613386b8fffc777f756f5e6115e73")

        buffer = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=9, dict_data=d)
        with cctx.stream_writer(buffer, closefd=False) as compressor:
            self.assertEqual(compressor.write(b"foo"), 3)
            self.assertEqual(compressor.write(b"bar"), 3)
            self.assertEqual(compressor.write(b"foo" * 16384), 3 * 16384)

        compressed = buffer.getvalue()

        params = zstd.get_frame_parameters(compressed)
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 2097152)
        self.assertEqual(params.dict_id, d.dict_id())
        self.assertFalse(params.has_checksum)

        h = hashlib.sha1(compressed).hexdigest()
        self.assertEqual(h, "8703b4316f274d26697ea5dd480f29c08e85d940")

        source = b"foo" + b"bar" + (b"foo" * 16384)

        dctx = zstd.ZstdDecompressor(dict_data=d)

        self.assertEqual(
            dctx.decompress(compressed, max_output_size=len(source)), source)
    def test_dictionary(self):
        samples = []
        for i in range(128):
            samples.append(b"foo" * 64)
            samples.append(b"bar" * 64)
            samples.append(b"foobar" * 64)

        d = zstd.train_dictionary(8192, samples)

        h = hashlib.sha1(d.as_bytes()).hexdigest()
        self.assertEqual(h, "a46d2f7a3bc3357c9d717d3dadf9a26fde23e93d")

        buffer = io.BytesIO()
        cctx = zstd.ZstdCompressor(level=9, dict_data=d)
        with cctx.stream_writer(buffer, closefd=False) as compressor:
            self.assertEqual(compressor.write(b"foo"), 3)
            self.assertEqual(compressor.write(b"bar"), 3)
            self.assertEqual(compressor.write(b"foo" * 16384), 3 * 16384)

        compressed = buffer.getvalue()

        params = zstd.get_frame_parameters(compressed)
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 4194304)
        self.assertEqual(params.dict_id, d.dict_id())
        self.assertFalse(params.has_checksum)

        h = hashlib.sha1(compressed).hexdigest()
        self.assertEqual(h, "f8ca6ebe269a822615e86d710c74d61cb4d4e3ca")

        source = b"foo" + b"bar" + (b"foo" * 16384)

        dctx = zstd.ZstdDecompressor(dict_data=d)

        self.assertEqual(
            dctx.decompress(compressed, max_output_size=len(source)), source)
    def test_attributes(self):
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 255)
        self.assertFalse(params.has_checksum)

        # Lowest 3rd bit indicates if checksum is present.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 0)
        self.assertTrue(params.has_checksum)

        # Upper 2 bits indicate content size.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
        self.assertEqual(params.content_size, 511)
        self.assertEqual(params.window_size, 1024)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Window descriptor is 2nd byte after frame header.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
        self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
        self.assertEqual(params.window_size, 262144)
        self.assertEqual(params.dict_id, 0)
        self.assertFalse(params.has_checksum)

        # Set multiple things.
        params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
        self.assertEqual(params.content_size, 272)
        self.assertEqual(params.window_size, 262144)
        self.assertEqual(params.dict_id, 15)
        self.assertTrue(params.has_checksum)
    def test_invalid_input_sizes(self):
        with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
            zstd.get_frame_parameters(b'')

        with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
            zstd.get_frame_parameters(zstd.FRAME_HEADER)
 def test_invalid_frame(self):
     with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
         zstd.get_frame_parameters(b"foobarbaz")
 def test_invalid_frame(self):
     with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
         zstd.get_frame_parameters(b'foobarbaz')
    def test_invalid_type(self):
        with self.assertRaises(TypeError):
            zstd.get_frame_parameters(None)

        with self.assertRaises(TypeError):
            zstd.get_frame_parameters(u"foobarbaz")
示例#25
0
 def test_invalid_frame(self):
     with self.assertRaisesRegexp(zstd.ZstdError,
                                  'Unknown frame descriptor'):
         zstd.get_frame_parameters(b'foobarbaz')