예제 #1
0
def decompress(zstd_compressed_data, compressiondict=None):
    if compressiondict is None:
        zstandard_decompressor = zstd.ZstdDecompressor()
    else:
        zstandard_decompressor = zstd.ZstdDecompressor(dict_data=compressiondict)
    uncompressed_data = zstandard_decompressor.decompress(zstd_compressed_data)
    return uncompressed_data
예제 #2
0
    def test_large_output(self):
        source = io.BytesIO()
        source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
        source.write(b'o')
        source.seek(0)

        cctx = zstd.ZstdCompressor(level=1)
        compressed = io.BytesIO(cctx.compress(source.getvalue()))
        compressed.seek(0)

        dctx = zstd.ZstdDecompressor()
        it = dctx.read_from(compressed)

        chunks = []
        chunks.append(next(it))
        chunks.append(next(it))

        with self.assertRaises(StopIteration):
            next(it)

        decompressed = b''.join(chunks)
        self.assertEqual(decompressed, source.getvalue())

        # And again with buffer protocol.
        it = dctx.read_from(compressed.getvalue())
        chunks = []
        chunks.append(next(it))
        chunks.append(next(it))

        with self.assertRaises(StopIteration):
            next(it)

        decompressed = b''.join(chunks)
        self.assertEqual(decompressed, source.getvalue())
예제 #3
0
    def test_skip_bytes(self):
        cctx = zstd.ZstdCompressor(write_content_size=False)
        compressed = cctx.compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3))
        self.assertEqual(output, b'foobar')
예제 #4
0
    def test_no_content_size_in_frame(self):
        cctx = zstd.ZstdCompressor(write_content_size=False)
        compressed = cctx.compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'):
            dctx.decompress(compressed)
예제 #5
0
    def parse(self):
        if self.reader is None:
            with open(self.filename, "rb") as f:
                data = f.read()
            self.reader = BinaryReader(data)

        magic = self.reader.read_bytes(4)
        if magic != b"LZC\0":
            raise ParserException("Unexpected magic number")
        self.reader.skip(4)
        file_count = self.reader.read_uint32()
        header_size = self.reader.read_uint32()

        self.reader.seek(header_size + 4)
        compressed_size = self.reader.read_uint32()
        zstd_data = self.reader.read_bytes(compressed_size)
        dctx = zstd.ZstdDecompressor()
        zstd_reader = BinaryReader(dctx.decompress(zstd_data))

        files = {}
        self.reader.seek(0x48)
        for _ in range(file_count):
            path_len = self.reader.read_uint32()
            file_path = self.reader.read_string(path_len)
            self.reader.skip(4)
            file_size = self.reader.read_uint32()
            file_offset = self.reader.read_uint32()
            zstd_reader.seek(file_offset)
            files[file_path] = zstd_reader.read_bytes(file_size)

        return files
def decompress_data_with_animecoin_zstd_func(
        animecoin_zstd_compressed_data, animecoin_zstd_compression_dictionary):
    zstandard_animecoin_decompressor = zstd.ZstdDecompressor(
        dict_data=animecoin_zstd_compression_dictionary)
    uncompressed_data = zstandard_animecoin_decompressor.decompress(
        animecoin_zstd_compressed_data)
    return uncompressed_data
예제 #7
0
def unzip(path, platform):
    """
    Unzips and untars the file for the specified platform into the right place
    in the repository.
    """
    logger.info(f"Unzipping {path}.")
    click.echo(f"Unzipping {path}.")
    tarfilename = path.replace(".zst", "")
    filesize = os.path.getsize(path)
    # Unzip to tar file.
    with open(tarfilename, "wb") as tf:
        with open(path, "rb") as fh:
            dctx = zstd.ZstdDecompressor()
            with click.progressbar(dctx.read_to_iter(fh, read_size=16384),
                                   length=filesize) as bar:
                for chunk in bar:
                    tf.write(chunk)
                    bar.update(len(chunk))
    # Untar the file.
    logger.info(f"Untarring {tarfilename}.")
    click.echo(f"Untarring {tarfilename}.")
    output_dir = os.path.join(os.path.dirname(path), platform)
    with tarfile.open(tarfilename) as tar:
        target = os.path.join(PYTHON_DIR, platform)
        tar.extractall(path=output_dir)
    # Copy the install directory from the package to the correct location in
    # the repository.
    source = os.path.join(output_dir, "python", "install")
    target = os.path.join(PYTHON_DIR, platform)
    logger.info(f"Copying Python from {source} to {target}.")
    click.echo(f"Copying Python from {source} to {target}.")
    shutil.rmtree(target)
    shutil.move(source, target)
예제 #8
0
    def test_memory_size(self):
        dctx = zstd.ZstdDecompressor()
        buffer = io.BytesIO()
        with dctx.write_to(buffer) as decompressor:
            size = decompressor.memory_size()

        self.assertGreater(size, 100000)
예제 #9
0
    def test_data_equivalence(self, original, threads, use_dict):
        kwargs = {}
        if use_dict:
            kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])

        cctx = zstd.ZstdCompressor(level=1,
                                   write_content_size=True,
                                   write_checksum=True,
                                   **kwargs)

        frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)

        dctx = zstd.ZstdDecompressor(**kwargs)

        result = dctx.multi_decompress_to_buffer(frames_buffer)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])

        frames_list = [f.tobytes() for f in frames_buffer]
        result = dctx.multi_decompress_to_buffer(frames_list)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])
예제 #10
0
    def test_content_size_present(self):
        cctx = zstd.ZstdCompressor(write_content_size=True)
        compressed = cctx.compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        decompressed = dctx.decompress(compressed)
        self.assertEqual(decompressed, b'foobar')
예제 #11
0
    def test_no_write(self):
        source = io.BytesIO()
        dest = object()

        dctx = zstd.ZstdDecompressor()
        with self.assertRaises(ValueError):
            dctx.copy_stream(source, dest)
예제 #12
0
    def test_read_write_size(self):
        source = OpCountingBytesIO(
            zstd.ZstdCompressor().compress(b'foobarfoobar'))
        dctx = zstd.ZstdDecompressor()
        for chunk in dctx.read_from(source, read_size=1, write_size=1):
            self.assertEqual(len(chunk), 1)

        self.assertEqual(source._read_count, len(source.getvalue()))
예제 #13
0
        def __init__(self, ct_stats):
            # CSV.ZSTD
            # CTID;ORG;IP;PRIV;PUB

            self.terms = {'-': 1}
            self.generic = {}
            self.tlds = {}
            self.cdns = {}
            self.freedns = {}

            with open(ct_stats, 'rb') as fh:
                dctx = zstd.ZstdDecompressor()
                with dctx.stream_reader(fh) as reader:
                    wrap = io.TextIOWrapper(io.BufferedReader(reader),
                                            encoding='utf8')
                    while True:
                        line = wrap.readline().lower()
                        line = line.strip()

                        if not line:
                            break

                        try:
                            _, _, _, priv, pub = line.split(';')
                        except ValueError:
                            continue

                        if priv == '*' or all(x in 'abcdef1234567890'
                                              for x in priv):
                            priv = ''

                        if not all(x in DNS_ALPHABET + '*;' for x in priv):
                            continue

                        if not all(x in DNS_ALPHABET + '*;' for x in pub):
                            continue

                        for freedns in FREEDNS:
                            if pub.endswith('.' + freedns):
                                self._inc_freedns(freedns)
                                break

                        for tld in TLDS:
                            if pub.endswith('.' + tld):
                                self._inc_tld(tld)

                                non_tld = pub[:-(len(tld) + 1)]
                                generic = '.'.join(x for x in [priv, non_tld]
                                                   if x)

                                self._inc_generic(generic)
                                for term in non_tld.split('.'):
                                    self._inc_term(term)

                                for term in priv.split('.'):
                                    self._inc_term(term)

                                break
예제 #14
0
    def test_buffer_with_segments_collection_input(self):
        cctx = zstd.ZstdCompressor(write_content_size=True)

        original = [
            b'foo0' * 2,
            b'foo1' * 3,
            b'foo2' * 4,
            b'foo3' * 5,
            b'foo4' * 6,
        ]

        frames = cctx.multi_compress_to_buffer(original)

        # Check round trip.
        dctx = zstd.ZstdDecompressor()
        decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)

        self.assertEqual(len(decompressed), len(original))

        for i, data in enumerate(original):
            self.assertEqual(data, decompressed[i].tobytes())

        # And a manual mode.
        b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
        b1 = zstd.BufferWithSegments(
            b,
            struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]),
                        len(frames[1])))

        b = b''.join(
            [frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
        b2 = zstd.BufferWithSegments(
            b,
            struct.pack('=QQQQQQ', 0, len(frames[2]), len(frames[2]),
                        len(frames[3]),
                        len(frames[2]) + len(frames[3]), len(frames[4])))

        c = zstd.BufferWithSegmentsCollection(b1, b2)

        dctx = zstd.ZstdDecompressor()
        decompressed = dctx.multi_decompress_to_buffer(c)

        self.assertEqual(len(decompressed), 5)
        for i in range(5):
            self.assertEqual(decompressed[i].tobytes(), original[i])
예제 #15
0
    def test_stupidly_large_output_buffer(self):
        cctx = zstd.ZstdCompressor(write_content_size=False)
        compressed = cctx.compress(b'foobar' * 256)
        dctx = zstd.ZstdDecompressor()

        # Will get OverflowError on some Python distributions that can't
        # handle really large integers.
        with self.assertRaises((MemoryError, OverflowError)):
            dctx.decompress(compressed, max_output_size=2**62)
예제 #16
0
def get_cvis(request):
    file = get_cvis_file(request.GET.get('cvis', ''),
                         request.GET.get('f', 'trace.json'))
    if file is None:
        raise Http404("No such file")
    fh = open(file, 'rb')
    cctx = zstd.ZstdDecompressor()
    reader = cctx.stream_reader(fh)
    return FileResponse(reader)
예제 #17
0
        def extract(self):
                """
                Extracts our payload from the compressed DZ file using ZLIB.
                self function could be particularly memory-intensive when used
                with large chunks, as the entire compressed chunk is loaded
                into RAM and decompressed.

                A better way to do self would be to chunk the zlib compressed
                data and decompress it with zlib.decompressor() and a while
                loop.

                I'm lazy though, and y'all have fast computers, so self is good
                enough.

                Starting with G7 KDZs, LG switched to zstandard compression.
                To keep comparibility with older KDZs, we are going to compare
                the compression header to the standard zlib header. If there, we 
                use zlib .. if not, we use zstandard.
                """

                # Seek to the beginning of the compressed data in the specified partition
                self.dz.dzfile.seek(self.dataOffset, io.SEEK_SET)

                zlib_magic = {'zlib': bytes([0x78, 0x01])}
                cmp_header = self.dz.dzfile.read(2)

                # Reset to the beginning of the compressed data
                self.dz.dzfile.seek(self.dataOffset, io.SEEK_SET)

                # Read the whole compressed segment into RAM
                zdata = self.dz.dzfile.read(self.dataSize)

                if cmp_header.startswith(zlib_magic['zlib']):

                    # Decompress the data with zlib
                    buf = zlib.decompress(zdata)

                else:
                    # decompress with zstandard
                    dctx = zstd.ZstdDecompressor()
                    buf = dctx.decompress(zdata, max_output_size=200000000)
    
                crc = crc32(buf) & 0xFFFFFFFF

                #if crc != self.crc32:
        ##              print("[!] Error: CRC32 of data doesn't match header ({:08X} vs {:08X})".format(crc, self.crc32), file=sys.stderr)
        #               sys.exit(1)

                md5 = hashlib.md5()
                md5.update(buf)

                if md5.digest() != self.md5:
                        print("[!] Error: MD5 of data doesn't match header ({:32s} vs {:32s})".format(md5.hexdigest(), b2a_hex(self.md5)), file=sys.stderr)
                        sys.exit(1)

                return buf
예제 #18
0
    def test_reuse(self):
        data = zstd.ZstdCompressor(level=1).compress(b'foobar')

        dctx = zstd.ZstdDecompressor()
        dobj = dctx.decompressobj()
        dobj.decompress(data)

        with self.assertRaisesRegexp(zstd.ZstdError,
                                     'cannot use a decompressobj'):
            dobj.decompress(data)
예제 #19
0
    def test_skip_bytes_too_large(self):
        dctx = zstd.ZstdDecompressor()

        with self.assertRaisesRegexp(
                ValueError, 'skip_bytes must be smaller than read_size'):
            b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1))

        with self.assertRaisesRegexp(
                ValueError, 'skip_bytes larger than first input chunk'):
            b''.join(dctx.read_from(b'foobar', skip_bytes=10))
예제 #20
0
 def untransform(self, buf):
     for trans_id in self.__read_transforms:
         if trans_id == TRANSFORM.ZLIB:
             buf = zlib.decompress(buf)
         elif trans_id == TRANSFORM.SNAPPY:
             buf = snappy.decompress(buf)
         elif trans_id == TRANSFORM.ZSTD:
             buf = zstd.ZstdDecompressor().decompress(buf)
         if trans_id not in self.__write_transforms:
             self.__write_transforms.append(trans_id)
     return buf
예제 #21
0
    def test_compress_write_to(self, data, level):
        """Random data from compress() roundtrips via write_to."""
        cctx = zstd.ZstdCompressor(level=level)
        compressed = cctx.compress(data)

        buffer = io.BytesIO()
        dctx = zstd.ZstdDecompressor()
        with dctx.write_to(buffer) as decompressor:
            decompressor.write(compressed)

        self.assertEqual(buffer.getvalue(), data)
예제 #22
0
 def _parse(self, stream, ctx, path):
     need_read_size = ctx._._.header.packed_size - (16 if ctx.first_part else 0) - (16 if ctx.second_part else 0)
     # ugly: align read size to 4 bytes
     need_read_size = need_read_size // 4 * 4
     deobfs_compressed_data = (ctx.first_part if ctx.first_part else '') + \
                              stream.getvalue()[ctx.middle_data_offset:ctx.middle_data_offset + need_read_size] + \
                              (ctx.second_part.data if ctx.second_part.data else '') + \
                              (ctx.align_tail if ctx.align_tail else '')
     dctx = zstd.ZstdDecompressor()
     decompressed_data = dctx.decompress(deobfs_compressed_data, max_output_size=ctx._._.header.original_size)
     ctx.parsed_data = vromfs_not_packed_body.parse(decompressed_data)
예제 #23
0
    def test_empty(self):
        source = io.BytesIO()
        dest = io.BytesIO()

        dctx = zstd.ZstdDecompressor()
        # TODO should this raise an error?
        r, w = dctx.copy_stream(source, dest)

        self.assertEqual(r, 0)
        self.assertEqual(w, 0)
        self.assertEqual(dest.getvalue(), b'')
예제 #24
0
    def test_read_write_size(self):
        source = OpCountingBytesIO(
            zstd.ZstdCompressor().compress(b'foobarfoobar'))

        dest = OpCountingBytesIO()
        dctx = zstd.ZstdDecompressor()
        r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)

        self.assertEqual(r, len(source.getvalue()))
        self.assertEqual(w, len(b'foobarfoobar'))
        self.assertEqual(source._read_count, len(source.getvalue()) + 1)
        self.assertEqual(dest._write_count, len(dest.getvalue()))
예제 #25
0
    def test_read_write_size_variance(self, original, level, read_size,
                                      write_size):
        cctx = zstd.ZstdCompressor(level=level)
        frame = cctx.compress(original)

        source = io.BytesIO(frame)

        dctx = zstd.ZstdDecompressor()
        chunks = list(
            dctx.read_from(source, read_size=read_size, write_size=write_size))

        self.assertEqual(b''.join(chunks), original)
예제 #26
0
    def test_type_validation(self):
        dctx = zstd.ZstdDecompressor()

        # Object with read() works.
        dctx.read_from(io.BytesIO())

        # Buffer protocol works.
        dctx.read_from(b'foobar')

        with self.assertRaisesRegexp(ValueError,
                                     'must pass an object with a read'):
            b''.join(dctx.read_from(True))
예제 #27
0
    def test_empty_input(self):
        dctx = zstd.ZstdDecompressor()

        source = io.BytesIO()
        it = dctx.read_from(source)
        # TODO this is arguably wrong. Should get an error about missing frame foo.
        with self.assertRaises(StopIteration):
            next(it)

        it = dctx.read_from(b'')
        with self.assertRaises(StopIteration):
            next(it)
예제 #28
0
    def test_compressor_write_to_decompressor_write_to_larger(self, data):
        compress_buffer = io.BytesIO()
        decompressed_buffer = io.BytesIO()

        cctx = zstd.ZstdCompressor(level=5)
        with cctx.write_to(compress_buffer) as compressor:
            compressor.write(data)

        dctx = zstd.ZstdDecompressor()
        with dctx.write_to(decompressed_buffer) as decompressor:
            decompressor.write(compress_buffer.getvalue())

        self.assertEqual(decompressed_buffer.getvalue(), data)
예제 #29
0
    def test_write_size(self):
        source = zstd.ZstdCompressor().compress(b'foobarfoobar')
        dest = OpCountingBytesIO()
        dctx = zstd.ZstdDecompressor()
        with dctx.write_to(dest, write_size=1) as decompressor:
            s = struct.Struct('>B')
            for c in source:
                if not isinstance(c, str):
                    c = s.pack(c)
                decompressor.write(c)

        self.assertEqual(dest.getvalue(), b'foobarfoobar')
        self.assertEqual(dest._write_count, len(dest.getvalue()))
예제 #30
0
    def test_invalid_input(self):
        dctx = zstd.ZstdDecompressor()

        source = io.BytesIO(b'foobar')
        it = dctx.read_from(source)
        with self.assertRaisesRegexp(zstd.ZstdError,
                                     'Unknown frame descriptor'):
            next(it)

        it = dctx.read_from(b'foobar')
        with self.assertRaisesRegexp(zstd.ZstdError,
                                     'Unknown frame descriptor'):
            next(it)