Exemplo n.º 1
0
    def unpack(self):
        unpacked_files = []
        if self.payload_format not in ['cpio', 'drpm']:
            return unpacked_files

        if self.compressor == 'bzip2':
            decompressor = bz2.BZ2Decompressor()
            payload = decompressor.decompress(self.data.payload)
        elif self.compressor == 'xz' or self.compressor == 'lzma':
            payload = lzma.decompress(self.data.payload)
        elif self.compressor == 'zstd':
            reader = zstandard.ZstdDecompressor().stream_reader(
                self.data.payload)
            payload = reader.read()
        else:
            payload = gzip.decompress(self.data.payload)

        if self.payload_format == 'drpm':
            out_labels = []
            file_path = pathlib.Path('drpm')
            outfile_rel = self.rel_unpack_dir / file_path
            outfile_full = self.scan_environment.unpack_path(outfile_rel)
            os.makedirs(outfile_full.parent, exist_ok=True)
            outfile = open(outfile_full, 'wb')
            outfile.write(payload)
            outfile.close()
            fr = FileResult(self.fileresult, self.rel_unpack_dir / file_path,
                            set(out_labels))
            unpacked_files.append(fr)
        else:
            # write the payload to a temporary file first
            temporary_file = tempfile.mkstemp(
                dir=self.scan_environment.temporarydirectory)
            os.write(temporary_file[0], payload)
            os.fdopen(temporary_file[0]).close()

            payloadfile = temporary_file[1]
            payloadfile_full = self.scan_environment.unpack_path(payloadfile)

            # create a file result object and pass it to the CPIO unpacker
            fr = FileResult(self.fileresult, payloadfile, set([]))
            fr.set_filesize(len(payload))

            # assuming that the CPIO data is always in "new ascii" format
            cpio_parser = cpio_unpack.CpioNewAsciiUnpackParser(
                fr, self.scan_environment, self.rel_unpack_dir, 0)
            try:
                cpio_parser.open()
                unpackresult = cpio_parser.parse_and_unpack()
            except UnpackParserException as e:
                raise UnpackParserException(e.args)
            finally:
                cpio_parser.close()

            # walk the results and add them.
            for i in unpackresult.unpacked_files:
                i.parent_path = self.fileresult.filename
                unpacked_files.append(i)

        return (unpacked_files)
Exemplo n.º 2
0
def download_bz2(link, dest):
    print 'Downloading', link
    compressed = urllib2.urlopen(link)
    with open(dest, 'wb') as f:
        decompressor = bz2.BZ2Decompressor()
        for data in iter(lambda: compressed.read(100 * 1024), b''):
            f.write(decompressor.decompress(data))
Exemplo n.º 3
0
 def decompress(self, dat):
     if dat.startswith('BZ'):
         try:
             return bz2.BZ2Decompressor().decompress(dat)
         except IOError:
             pass
     return dat
Exemplo n.º 4
0
 def __init__(self, coders: List[Dict[str, Any]], size: int, crc: Optional[int]) -> None:
     self.input_size = size
     self.consumed = 0  # type: int
     self.crc = crc
     self.digest = None  # type: Optional[int]
     filters = []  # type: List[Dict[str, Any]]
     try:
         for coder in coders:
             if coder['numinstreams'] != 1 or coder['numoutstreams'] != 1:
                 raise UnsupportedCompressionMethodError('Only a simple compression method is currently supported.')
             filter = self.lzma_methods_map.get(coder['method'], None)
             if filter is not None:
                 properties = coder.get('properties', None)
                 if properties is not None:
                     filters[:0] = [lzma._decode_filter_properties(filter, properties)]  # type: ignore
                 else:
                     filters[:0] = [{'id': filter}]
             else:
                 raise UnsupportedCompressionMethodError
     except UnsupportedCompressionMethodError as e:
         filter = self.alt_methods_map.get(coders[0]['method'], None)
         if len(coders) == 1 and filter is not None:
             if filter == self.FILTER_BZIP2:
                 self.decompressor = bz2.BZ2Decompressor()  # type: Union[bz2.BZ2Decompressor, lzma.LZMADecompressor]
             else:
                 raise e
             self.can_partial_decompress = False
         else:
             raise e
     else:
         self.decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters)
         self.can_partial_decompress = True
     self.filters = filters
Exemplo n.º 5
0
def _get_decoder(compression):
    if compression == 'zlib':
        try:
            import zlib
        except ImportError:
            raise ImportError(
                "Your Python does not have the zlib library, "
                "therefore the compressed block in this ASDF file "
                "can not be decompressed.")
        return zlib.decompressobj()
    elif compression == 'bzp2':
        try:
            import bz2
        except ImportError:
            raise ImportError(
                "Your Python does not have the bz2 library, "
                "therefore the compressed block in this ASDF file "
                "can not be decompressed.")
        return bz2.BZ2Decompressor()
    elif compression == 'lz4':
        try:
            import lz4.block
        except ImportError:
            raise ImportError(
                "lz4 library in not installed in your Python environment, "
                "therefore the compressed block in this ASDF file "
                "can not be decompressed.")
        return Lz4Decompressor(lz4.block)
    else:
        raise ValueError("Unknown compression type: '{0}'".format(compression))
Exemplo n.º 6
0
def extract(file):
    with open('sde.sqlite', 'wb') as newfile, open(file, 'rb') as file:
        decompressor = bz2.BZ2Decompressor()
        for data in iter(lambda: file.read(100 * 1024), b''):
            # noinspection PyArgumentList
            newfile.write(decompressor.decompress(data))
    return True
Exemplo n.º 7
0
    def _decompressor_stream(self):
        dwnld_file = None
        compression = self.parameters.get('compression', False)
        fname, _ = self._url_to_fname_suffix(self.path, compression)

        decompressor = None
        if compression:
            if compression == 'gz':
                decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
            elif compression == 'bz2':
                decompressor = bz2.BZ2Decompressor()
            elif compression == 'xz':
                decompressor = lzma.LZMADecompressor()  # pylint: disable=no-member
            self.logger.debug("Using %s decompression" % compression)
        else:
            self.logger.debug("No compression specified.")

        def write(buff):
            if decompressor:
                buff = decompressor.decompress(buff)
            dwnld_file.write(buff)

        try:
            dwnld_file = open(fname, 'wb')
            yield (write, fname)
        finally:
            if dwnld_file:
                dwnld_file.close()
Exemplo n.º 8
0
def _bunzip2_stream(fileobj, bufsize=1024):
    """Decompress gzipped data on the fly.

    :param fileobj: object supporting ``read()``
    :param bufsize: number of bytes to read from *fileobj* at a time.

    This yields decompressed chunks; it does *not* split on lines. To get
    lines, wrap this in :py:func:`buffer_iterator_to_line_iterator`.

    This will replace :py:func:`bunzip2_stream` in v0.5.0 as part of
    an effort to be less line-based (see #715).
    """
    if bz2 is None:
        raise Exception(
            'bz2 module was not successfully imported (likely not installed).')

    d = bz2.BZ2Decompressor()

    while True:
        chunk = fileobj.read(bufsize)
        if not chunk:
            return

        parts = d.decompress(chunk)
        for part in parts:
            yield part
Exemplo n.º 9
0
def _decompressor_stream(url, imgdir, decompress):
    fd = None
    decompressor = None

    fname, suffix = _url_to_fname_suffix(url, imgdir)

    if suffix == 'gz' and decompress:
        decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
    elif suffix == 'bz2' and decompress:
        decompressor = bz2.BZ2Decompressor()
    elif suffix == 'xz' and decompress:
        decompressor = lzma.LZMADecompressor()
    else:
        # don't remove the file's real suffix
        fname = '%s.%s' % (fname, suffix)

    def write(buff):
        if decompressor:
            buff = decompressor.decompress(buff)
        fd.write(buff)

    try:
        fd = open(fname, 'wb')
        yield (write, fname)
    finally:
        if fd:
            fd.close()
Exemplo n.º 10
0
 def _decomp(self):
     is_proc_running = True
     input_list = [self.task_queue._reader.fileno()]
     while is_proc_running:
         inready, outread, errready = select.select(input_list, [], [])
         if self.task_queue._reader.fileno() in inready:
             input_task = self.task_queue.get()
             if input_task == Const.QUEUE_SUCCESS_MESSAGE:
                 is_proc_running = False
                 break
             (comp_type, comp_data) = input_task
             start = time.time()
             comp_string = "Unknown Compression Algorithm!"
             if comp_type == Const.COMPRESSION_LZMA:
                 comp_string = "lzma"
                 decompressor = lzma.LZMADecompressor()
                 decomp_data = decompressor.decompress(comp_data)
                 decomp_data += decompressor.flush()
             elif comp_type == Const.COMPRESSION_BZIP2:
                 comp_string = "bzip2"
                 decompressor = bz2.BZ2Decompressor()
                 decomp_data = decompressor.decompress(comp_data)
             elif comp_type == Const.COMPRESSION_GZIP:
                 comp_string = "gzip"
                 decomp_data = zlib.decompress(comp_data,
                                               zlib.MAX_WBITS | 16)
             else:
                 raise CompressionError("Not valid compression option")
             self.analysis_queue.put("B,D(%s),%5.3f" %
                                     (comp_string, time.time() - start))
             self.output_queue.put(decomp_data)
     self.command_queue.put("Compressed processed everything")
Exemplo n.º 11
0
 def from_header(cls, header: bytes):
     if header == b'LZ00' and support['lzma']:
         return lzma.LZMADecompressor()
     elif header == b'BZ00' and support['bz2']:
         return bz2.BZ2Decompressor()
     else:
         return zlib.decompressobj()
Exemplo n.º 12
0
 def get(cls, algo: str):
     if algo == 'lzma' and support['lzma']:
         return lzma.LZMADecompressor()
     elif algo == 'bz2' and support['bz2']:
         return bz2.BZ2Decompressor()
     else:
         return zlib.decompressobj()
Exemplo n.º 13
0
    def extract(self, member, path=".", fileobj=None):
        """Extract and decompress `member` into `path` which defaults to the
        current directory."""
        self.fileobj.seek(member._offset)
        decomp = bz2.BZ2Decompressor()
        if fileobj:
            data = self.fileobj.read(member.size)
            fileobj.write(decomp.decompress(data))
            return

        dstpath = safe_join(path, member.name)
        dirname = os.path.dirname(dstpath)
        if not os.path.exists(dirname):
            os.makedirs(dirname)

        output = open(dstpath, "wb")
        toread = member.size
        while True:
            thisblock = min(128 * 1024, toread)
            block = self.fileobj.read(thisblock)
            if not block:
                break
            toread -= len(block)
            output.write(decomp.decompress(block))
        output.close()
        os.chmod(dstpath, member.flags)

        return dstpath
Exemplo n.º 14
0
class UrlZipReader:
    def __init__(self, _url):
        self._url = _url
        try:
            #self.fp = urllib2.urlopen(self._url)
            req = urllib2.Request(self._url)
            req.add_header(
                "User-Agent",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.37 Safari/537.36"
            )
            #req.add_header("User-agent", "Mozilla/5.0 (X11; U; FreeBSD i386; en-US; rv:1.7.8) Gecko/20050609 Firefox/1.0.4")
            req.add_header("Connection", "Keep-Alive")
            print req.headers
            self.fp = urllib2.urlopen(req)
        except urllib2.HTTPError, e:
            print e.code
            print e.msg
            return
        print self.fp.info()
        print self.fp.getcode()

        if self._url.endswith(".gz"):
            self.zip = zlib.decompressobj(31)
        elif self._url.endswith(".bz2"):
            self.zip = bz2.BZ2Decompressor()
            #self.zip = bz2.BZ2File(self.fp)
        elif self._url.endswith(".zip"):
            self.zip = zlib.decompressobj(15)
        else:
            self.zip = None
Exemplo n.º 15
0
def bunzip2_stream(fileobj, bufsize=1024):
    """Decompress gzipped data on the fly.

    :param fileobj: object supporting ``read()``
    :param bufsize: number of bytes to read from *fileobj* at a time.

    .. warning::

        This yields decompressed chunks; it does *not* split on lines. To get
        lines, wrap this in :py:func:`to_lines`.
    """
    if bz2 is None:
        raise Exception(
            'bz2 module was not successfully imported (likely not installed).')

    d = bz2.BZ2Decompressor()

    while True:
        chunk = fileobj.read(bufsize)
        if not chunk:
            return

        part = d.decompress(chunk)
        if part:
            yield part
Exemplo n.º 16
0
def decomp_overlayzip(overlay_path, outfilename):
    overlay_package = VMOverlayPackage(overlay_path)
    meta_raw = overlay_package.read_meta()
    meta_info = msgpack.unpackb(meta_raw)
    comp_overlay_files = meta_info[Const.META_OVERLAY_FILES]

    out_fd = open(outfilename, "w+b")
    for blob_info in comp_overlay_files:
        comp_filename = blob_info[Const.META_OVERLAY_FILE_NAME]
        comp_type = blob_info.get(
            Const.META_OVERLAY_FILE_COMPRESSION,
            Const.COMPRESSION_LZMA)
        if comp_type == Const.COMPRESSION_LZMA:
            comp_data = overlay_package.read_blob(comp_filename)
            decompressor = lzma.LZMADecompressor()
            decomp_data = decompressor.decompress(comp_data)
            decomp_data += decompressor.flush()
            out_fd.write(decomp_data)
        elif comp_type == Const.COMPRESSION_BZIP2:
            comp_data = overlay_package.read_blob(comp_filename)
            decompressor = bz2.BZ2Decompressor()
            decomp_data = decompressor.decompress(comp_data)
            out_fd.write(decomp_data)
        elif comp_type == Const.COMPRESSION_GZIP:
            comp_data = overlay_package.read_blob(comp_filename)
            decomp_data = zlib.decompress(comp_data, zlib.MAX_WBITS | 16)
            out_fd.write(decomp_data)
        else:
            raise CompressionError("Not valid compression option")

    out_fd.close()
    return meta_info
Exemplo n.º 17
0
    def _decomp(self):
        is_proc_running = True
        input_list = [self.task_queue._reader.fileno()]
        while is_proc_running:
            inready, outread, errready = select.select(input_list, [], [])
            if self.task_queue._reader.fileno() in inready:
                input_task = self.task_queue.get()
                if input_task == Const.QUEUE_SUCCESS_MESSAGE:
                    is_proc_running = False
                    break
                (comp_type, comp_data) = input_task

                if comp_type == Const.COMPRESSION_LZMA:
                    decompressor = lzma.LZMADecompressor()
                    decomp_data = decompressor.decompress(comp_data)
                    decomp_data += decompressor.flush()
                elif comp_type == Const.COMPRESSION_BZIP2:
                    decompressor = bz2.BZ2Decompressor()
                    decomp_data = decompressor.decompress(comp_data)
                elif comp_type == Const.COMPRESSION_GZIP:
                    decomp_data = zlib.decompress(
                        comp_data,
                        zlib.MAX_WBITS | 16)
                else:
                    raise CompressionError("Not valid compression option")
                LOG.debug("%f\tdecompress one blob" % (time.time()))
                self.output_queue.put(decomp_data)
        self.command_queue.put("Compressed processed everything")
Exemplo n.º 18
0
    def _decompressor_stream(self):  # pylint: disable=too-many-branches
        dwnld_file = None
        compression = False
        if 'images' in self.parameters and self.key in self.parameters[
                'images']:
            compression = self.parameters['images'][self.key].get(
                'compression', False)
        else:
            if self.key == 'ramdisk':
                self.logger.debug(
                    "Not decompressing ramdisk as can be used compressed.")
            else:
                compression = self.parameters[self.key].get(
                    'compression', False)

        fname, _ = self._url_to_fname_suffix(self.path, compression)
        if os.path.isdir(fname):
            raise JobError("Download '%s' is a directory, not a file" % fname)
        if os.path.exists(fname):
            os.remove(fname)

        decompressor = None
        if compression:
            if compression == 'gz':
                decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
            elif compression == 'bz2':
                decompressor = bz2.BZ2Decompressor()
            elif compression == 'xz':
                decompressor = lzma.LZMADecompressor()  # pylint: disable=no-member
            self.logger.debug("Using %s decompression" % compression)
        else:
            self.logger.debug("No compression specified.")

        def write(buff):
            if decompressor:
                try:
                    buff = decompressor.decompress(buff)
                except EOFError as eof_exc:
                    # EOFError can be raised when decompressing a bz2 archive
                    # generated by pbzip2. If there is something in unsused_data
                    # try to continue decompression.
                    if compression == 'bz2' and decompressor.unused_data:
                        buff = decompressor.unused_data
                    else:
                        error_message = str(eof_exc)
                        self.logger.exception(error_message)
                        raise JobError(error_message)
                except (IOError, lzma.error, zlib.error) as exc:  # pylint: disable=no-member
                    error_message = str(exc)
                    self.logger.exception(error_message)
                    raise JobError(error_message)
            dwnld_file.write(buff)

        try:
            with open(fname, 'wb') as dwnld_file:
                yield (write, fname)
        except (IOError, OSError) as exc:
            msg = "Unable to open %s: %s" % (fname, exc.strerror)
            self.logger.error(msg)
            raise InfrastructureError(msg)
Exemplo n.º 19
0
Arquivo: main.py Projeto: sbobade/mtda
    def storage_write_bz2(self, data, session=None):
        self.mtda.debug(3, "main.storage_write_bz2()")

        self._check_expired(session)
        if self.sdmux_controller is None:
            result = -1
        else:
            # Create a bz2 decompressor when called for the first time
            if self.bz2dec is None:
                self.bz2dec = bz2.BZ2Decompressor()

            cont = True
            start = time.monotonic()
            result = -1

            while cont is True:
                # Decompress and write newly received data
                try:
                    # Uncompress and write data
                    result = self._storage_write_bz2(data)
                    if result != 0:
                        # Either got an error or needing more data; escape from
                        # this loop to provide feedback
                        cont = False
                    else:
                        # Check if this loop has been running for quite
                        # some time, in which case we would to give our
                        # client an update
                        now = time.monotonic()
                        if (now - start) >= self.fbintvl:
                            cont = False
                        # If we should continue and do not need more data
                        # at this time, use an empty buffer for the next
                        # iteration
                        elif result == 0:
                            data = b''
                except EOFError:
                    # Handle multi-streams: create a new decompressor and
                    # we will start with data unused from the previous
                    # decompressor
                    data = self.bz2dec.unused_data
                    self.bz2dec = bz2.BZ2Decompressor()
                    cont = (len(data) > 0)  # loop only if we have unused data
                    result = 0  # we do not need more input data

        self.mtda.debug(3, "main.storage_write_bz2(): %s" % str(result))
        return result
Exemplo n.º 20
0
def data_for_op(op, out_file, old_file):
    args.payloadfile.seek(data_offset + op.data_offset)
    data = args.payloadfile.read(op.data_length)

    # assert hashlib.sha256(data).digest() == op.data_sha256_hash, 'operation data hash mismatch'

    if op.type == op.REPLACE_XZ:
        dec = lzma.LZMADecompressor()
        data = dec.decompress(data)
        out_file.seek(op.dst_extents[0].start_block * block_size)
        out_file.write(data)
    elif op.type == op.REPLACE_BZ:
        dec = bz2.BZ2Decompressor()
        data = dec.decompress(data)
        out_file.seek(op.dst_extents[0].start_block * block_size)
        out_file.write(data)
    elif op.type == op.REPLACE:
        out_file.seek(op.dst_extents[0].start_block * block_size)
        out_file.write(data)
    elif op.type == op.SOURCE_COPY:
        if not args.diff:
            print("SOURCE_COPY supported only for differential OTA")
            sys.exit(-2)
        out_file.seek(op.dst_extents[0].start_block * block_size)
        for ext in op.src_extents:
            old_file.seek(ext.start_block * block_size)
            data = old_file.read(ext.num_blocks * block_size)
            out_file.write(data)
    elif op.type == op.SOURCE_BSDIFF:
        if not args.diff:
            print("SOURCE_BSDIFF supported only for differential OTA")
            sys.exit(-3)
        out_file.seek(op.dst_extents[0].start_block * block_size)
        tmp_buff = io.BytesIO()
        for ext in op.src_extents:
            old_file.seek(ext.start_block * block_size)
            old_data = old_file.read(ext.num_blocks * block_size)
            tmp_buff.write(old_data)
        tmp_buff.seek(0)
        old_data = tmp_buff.read()
        tmp_buff.seek(0)
        tmp_buff.write(bsdiff4.patch(old_data, data))
        n = 0
        tmp_buff.seek(0)
        for ext in op.dst_extents:
            tmp_buff.seek(n * block_size)
            n += ext.num_blocks
            data = tmp_buff.read(ext.num_blocks * block_size)
            out_file.seek(ext.start_block * block_size)
            out_file.write(data)
    elif op.type == op.ZERO:
        for ext in op.dst_extents:
            out_file.seek(ext.start_block * block_size)
            out_file.write('\0' * ext.num_blocks * block_size)
    else:
        print("Unsupported type = %d" % op.type)
        sys.exit(-1)

    return data
Exemplo n.º 21
0
    def Parse(self, snbFile, metaOnly = False):
        # Read header
        vmbr = snbFile.read(44)
        (self.magic, self.rev80, self.revA3, self.revZ1,
         self.fileCount, self.vfatSize, self.vfatCompressed,
         self.binStreamSize, self.plainStreamSizeUncompressed,
         self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr)

        # Read FAT
        self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed))
        self.ParseFile(self.vfat, self.fileCount)

        # Read tail
        snbFile.seek(-16, os.SEEK_END)
        #plainStreamEnd = snbFile.tell()
        tailblock = snbFile.read(16)
        (self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock)
        snbFile.seek(self.tailOffset)
        self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize))
        self.tailSizeUncompressed = len(self.vTailUncompressed)
        self.ParseTail(self.vTailUncompressed, self.fileCount)

        # Uncompress file data
        # Read files
        binPos = 0
        plainPos = 0
        uncompressedData = None
        for f in self.files:
            if f.attr & 0x41000000 == 0x41000000:
                # Compressed Files
                if uncompressedData == None:
                    uncompressedData = ""
                    for i in range(self.plainBlock):
                        bzdc = bz2.BZ2Decompressor()
                        if (i < self.plainBlock - 1):
                            bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset
                        else:
                            bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset
                        snbFile.seek(self.blocks[self.binBlock + i].Offset)
                        try:
                            data = snbFile.read(bSize)
                            if len(data) < 32768:
                                uncompressedData += bzdc.decompress(data)
                            else:
                                uncompressedData += data
                        except Exception as e:
                            print e
                if len(uncompressedData) != self.plainStreamSizeUncompressed:
                    raise Exception()
                f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
                plainPos += f.fileSize
            elif f.attr & 0x01000000 == 0x01000000:
                # Binary Files
                snbFile.seek(44 + self.vfatCompressed + binPos)
                f.fileBody = snbFile.read(f.fileSize)
                binPos += f.fileSize
            else:
                print f.attr, f.fileName
                raise Exception("Invalid file")
Exemplo n.º 22
0
            def decompress(self, data):
                """ Create a bz2 decompressor and decompress some data.

                :return: data as a bytes object.
                """
                decompressor = bz2.BZ2Decompressor()
                data = decompressor.decompress(data)
                return data
Exemplo n.º 23
0
 def iter_decode(fileobj):
     """Iterate through decoded fragments of the file"""
     decompressor = bz2.BZ2Decompressor()
     for line in fileobj:
         try:
             yield decompressor.decompress(line)
         except EOFError:
             return
Exemplo n.º 24
0
    def __init__(self, fileobj):
        self._fileobj = fileobj
        self._bz2 = bz2.BZ2Decompressor()

        self._line_buffer = collections.deque([""])

        self._current_line = ""
        self._current_offset = 0
Exemplo n.º 25
0
 def _initialize_decompressor(self):
     if self._compression_type == CompressionTypes.BZIP2:
         self._decompressor = bz2.BZ2Decompressor()
     elif self._compression_type == CompressionTypes.DEFLATE:
         self._decompressor = zlib.decompressobj()
     else:
         assert self._compression_type == CompressionTypes.GZIP
         self._decompressor = zlib.decompressobj(self._gzip_mask)
Exemplo n.º 26
0
    def get_content(self) -> Generator[str, None, None]:
        """Yields processed pieces of content"""
        # https://docs.python.org/3.6/library/bz2.html#bz2.BZ2Decompressor
        decompressor = bz2.BZ2Decompressor()

        with self.fetch() as content:
            for chunk in content:
                yield decompressor.decompress(chunk)
Exemplo n.º 27
0
def bz2_stream(compressed, chunksize=100000):
    """ Stream lines from a chunk of compressed bz2 data """
    decompressor = bz2.BZ2Decompressor()
    for i in range(0, len(compressed), chunksize):
        chunk = compressed[i:i + chunksize]
        decompressed = decompressor.decompress(chunk).decode()
        for line in decompressed.split('\n'):
            yield line + '\n'
Exemplo n.º 28
0
def _decompress_arxiv(arxiv):
    inc_decompressor = bz2.BZ2Decompressor()
    logger.debug('Extracting archive {}'.format(arxiv))
    output_arxiv_filepath = arxiv.rsplit('.bz2')[0]
    with open(arxiv, 'rb') as arxiv_byte_stream:
        with open(output_arxiv_filepath, 'wb') as out_stream:
            for data in iter(lambda: arxiv_byte_stream.read(100 * 1024), b''):
                out_stream.write(inc_decompressor.decompress(data))
Exemplo n.º 29
0
Arquivo: dq.py Projeto: umutbb/hubble
 def decompress(self, dat):
     dat = encode_something_to_bytes(dat)
     if dat.startswith(b'BZ'):
         try:
             return bz2.BZ2Decompressor().decompress(dat)
         except IOError:
             pass
     return dat
Exemplo n.º 30
0
 def _read_bzip(self, coder, input, level, num_coders):
     dec = bz2.BZ2Decompressor()
     return self._read_from_decompressor(coder,
                                         dec,
                                         input,
                                         level,
                                         num_coders,
                                         can_partial_decompress=False)