def testReadChunk10(self): # "Test lzma.LZMAFile.read() in chunks of 10 bytes" self.createTempFile() lzmaf = lzma.LZMAFile(self.filename) text = '' while 1: str = lzmaf.read(10) if not str: break text += str self.assertEqual(text, text) lzmaf.close()
def decompress(reader: io.RawIOBase, path: Optional[str] = None, encoding: Optional[str] = None) -> BinaryIO: """Given a binary file-handle, decompress it if it is compressed. """ buffered = BufferedReader(reader) # Decompress the stream if it is compressed if buffered.peek().startswith(MAGIC_GZIP): decompressed = BufferedReader( typing.cast( io.RawIOBase, gzip.GzipFile(mode="rb", fileobj=typing.cast(BinaryIO, buffered)), )) elif buffered.peek().startswith(MAGIC_LZMA): decompressed = BufferedReader( typing.cast( io.RawIOBase, lzma.LZMAFile(typing.cast(BinaryIO, buffered), mode="rb"))) elif buffered.peek().startswith(MAGIC_BZIP2): decompressed = BufferedReader( typing.cast( io.RawIOBase, bz2.BZ2File(typing.cast(BinaryIO, buffered), mode="rb"))) else: decompressed = buffered # Attempt to detect the encoding and decode the stream det: Dict[str, Union[str, float]] = chardet.detect(decompressed.peek()) if encoding is not None: det = dict(encoding=encoding, confidence=1.0) elif det["encoding"] == "ascii": det["encoding"] = "UTF-8" if det["confidence"] == 1.0: return typing.cast( BinaryIO, BufferedReader( typing.cast( io.RawIOBase, EncodedFile( typing.cast(typing.BinaryIO, decompressed), "UTF-8", typing.cast(str, det["encoding"]), ), )), ) else: warnings.warn("could not find encoding, assuming UTF-8", UnicodeWarning, stacklevel=3) return typing.cast(BinaryIO, decompressed)
def testWriteLines(self): # "Test lzma.LZMAFile.writelines()" lzmaf = lzma.LZMAFile(self.filename, "w") self.assertRaises(TypeError, lzmaf.writelines) sio = StringIO(self.TEXT) lzmaf.writelines(sio.readlines()) lzmaf.close() # patch #1535500 self.assertRaises(ValueError, lzmaf.writelines, ["a"]) f = open(self.filename, 'rb') self.assertEqual(lzma.decompress(f.read()), self.TEXT) f.close()
def _open_compressed_file(self, path): f = None if self.format == 'gz': f = gzip.open(path, 'wb') elif self.format == 'bz2': f = bz2.BZ2File(path, 'wb') elif self.format == 'xz': f = lzma.LZMAFile(path, 'wb') else: self.module.fail_json(msg="%s is not a valid format" % self.format) return f
def extract_lzma_file(full_lzma_file_path, extract_to=None): if extract_to is None: extract_to = full_lzma_file_path[:-len('.xz')] print('Exacting %s to %s' % (full_lzma_file_path, extract_to)) l_file = lzma.LZMAFile(full_lzma_file_path, mode='rb') with open(extract_to, 'wb') as f: while True: data = l_file.read(4096) if not data: break f.write(data) l_file.close()
def _write_fileobject(filename, compress=("zlib", 3)): """Return the right compressor file object in write mode.""" compressmethod = compress[0] compresslevel = compress[1] if compressmethod == "gzip": return _buffered_write_file(BinaryGzipFile(filename, 'wb', compresslevel=compresslevel)) elif compressmethod == "bz2": return _buffered_write_file(bz2.BZ2File(filename, 'wb', compresslevel=compresslevel)) elif lzma is not None and compressmethod == "xz": return _buffered_write_file(lzma.LZMAFile(filename, 'wb', check=lzma.CHECK_NONE, preset=compresslevel)) elif lzma is not None and compressmethod == "lzma": return _buffered_write_file(lzma.LZMAFile(filename, 'wb', preset=compresslevel, format=lzma.FORMAT_ALONE)) else: return _buffered_write_file(BinaryZlibFile(filename, 'wb', compresslevel=compresslevel))
def is_lzma_file(path): """ Checks if file given by path has contents that suggests lzma file """ with lzma.LZMAFile(path, 'rb') as lzma_file: try: _ = lzma_file.read(1) except lzma.LZMAError: return False except EOFError: return False return True
def test_fetch_tarball_xz(self): self.requireFeature(LzmaFeature) self.requireFeature(XzFeature) import lzma os.mkdir("empty") make_new_upstream_tarball_xz("empty", "foo-1.0.tar.xz") source = TarfileSource("foo-1.0.tar.xz", "1.0") os.mkdir("bar") self.assertEquals(["bar/foo_1.0.orig.tar.xz"], source.fetch_tarballs("foo", "1.0", "bar")) self.assertPathExists("bar/foo_1.0.orig.tar.xz") lzma.LZMAFile("bar/foo_1.0.orig.tar.xz").close()
def main(self): rkpihdr = self._decode_rkpi_header(self.args.file_in.read(10)) if rkpihdr is None: print("Reekpie header not found or incomplete.") exit() # dump information to the TTY for describing the # aspects of the PCM data (as this is required # for parsing). NOTE: Put that into STDERR to avoid # it being piped with a program or a file by default. stderr.write(( "Sampleformat: {sample_format}\n" "Compression: {compression}\n" f"Bytedepth: {rkpihdr['bytedepth']}\n" f"Samplerate: {rkpihdr['samplerate']}\n" "Channellayout: {channel_layout}\n" "Endianness: {endianness}\n" f"Channels: {rkpihdr['channels']}\n").format_map({ 'channel_layout': ('interleaved', 'planar') \ [rkpihdr['channellayout']], 'endianness': ('little', 'big') \ [rkpihdr['endianness']], 'sample_format': ('unknown', 'unsigned', 'signed', 'float', 'adpcm', 'mu-law', 'a-law') \ [rkpihdr['sampleformat']], 'compression': ('none', 'zstd', 'brotli', 'lzma') \ [rkpihdr['compression']] })) if self.args.do_decompress or rkpihdr['compression'] == 0b00: if copyfile(self.args.file_in, self.args.file_out) == 0: stderr.write("Can’t do IO.") else: data_copied = 0 # how many bytes were copied during decompression. # Zstandard decompressor. if rkpihdr['compression'] == 0b01: data_copied = copyfile( zstandard.ZstdDecompressor().stream_reader( self.args.file_in), self.args.file_out) # Brotli decompressor. elif rkpihdr['compression'] == 0b10: data_copied = copyfilemap(self.args.file_in, self.args.file_out, brotli.Decompressor().decompress) # LZMA decompressor. elif rkpihdr['compression'] == 0b11: data_copied = copyfile(lzma.LZMAFile(self.args.file_in), self.args.file_out) if data_copied == 0: stderr.write("Decompressor failed or can’t do IO.")
def load(filename=DEFAULT_GEOZONES_FILE, drop=False): ''' Load a geozones archive from <filename> <filename> can be either a local path or a remote URL. ''' ts = datetime.now().isoformat().replace('-', '').replace(':', '').split('.')[0] prefix = 'geozones-{0}'.format(ts) if filename.startswith('http'): log.info('Downloading GeoZones bundle: %s', filename) # Use tmp.open to make sure that the directory exists in FS with tmp.open(GEOZONE_FILENAME, 'wb') as newfile: newfile.write(requests.get(filename).content) filename = tmp.path(GEOZONE_FILENAME) log.info('Extracting GeoZones bundle') with handle_error(prefix): with contextlib.closing(lzma.LZMAFile(filename)) as xz: with tarfile.open(fileobj=xz) as f: f.extractall(tmp.path(prefix)) log.info('Loading GeoZones levels') log.info('Loading levels.msgpack') levels_filepath = tmp.path(prefix + '/levels.msgpack') if drop and GeoLevel.objects.count(): name = '_'.join((GeoLevel._get_collection_name(), ts)) target = GeoLevel._get_collection_name() with switch_collection(GeoLevel, name): with handle_error(prefix, GeoLevel): total = load_levels(GeoLevel, levels_filepath) GeoLevel.objects._collection.rename(target, dropTarget=True) else: with handle_error(prefix): total = load_levels(GeoLevel, levels_filepath) log.info('Loaded {total} levels'.format(total=total)) log.info('Loading zones.msgpack') zones_filepath = tmp.path(prefix + '/zones.msgpack') if drop and GeoZone.objects.count(): name = '_'.join((GeoZone._get_collection_name(), ts)) target = GeoZone._get_collection_name() with switch_collection(GeoZone, name): with handle_error(prefix, GeoZone): total = load_zones(GeoZone, zones_filepath) GeoZone.objects._collection.rename(target, dropTarget=True) else: with handle_error(prefix): total = load_zones(GeoZone, zones_filepath) log.info('Loaded {total} zones'.format(total=total)) cleanup(prefix)
def _get_file_handle( filepath, mode, *, compression=None, encoding=None, errors=None, newline=None, ): """ Get a file handle for the given ``filepath`` and ``mode``, plus optional kwargs. """ if compression: mode_ = mode.replace("b", "").replace("t", "") if compression == "gzip": f = gzip.GzipFile(filepath, mode=mode_) elif compression == "bz2": f = bz2.BZ2File(filepath, mode=mode_) elif compression == "xz": f = lzma.LZMAFile(filepath, mode=mode_) elif compression == "zip": zip_file = zipfile.ZipFile(filepath, mode=mode_) zip_names = zip_file.namelist() if len(zip_names) == 1: f = zip_file.open(zip_names[0]) elif len(zip_names) == 0: raise ValueError(f"no files found in zip file '{filepath}'") else: raise ValueError( f"{len(zip_names)} files found in zip file '{filepath}', " "but only one file is allowed") else: valid_values = [None, "infer"] + sorted( _ext_to_compression.values()) raise ValueError( errors_.value_invalid_msg("compression", compression, valid_values)) if "t" in mode: f = io.TextIOWrapper(f, encoding=encoding, errors=errors, newline=newline) # no compression, file is opened as usual else: f = filepath.open(mode=mode, encoding=encoding, errors=errors, newline=newline) return f
def decompress_open(filename, mode='r'): file_obj = None if filename.endswith('.gz'): file_obj = gzip.open(filename, mode) elif filename.endswith('.bz2'): file_obj = bz2.BZ2File(filename, mode) elif filename.endswith('.xz'): # pylint: disable=F0401 import lzma file_obj = lzma.LZMAFile(filename, mode) else: file_obj = open(filename, mode) return file_obj
def get_unzip_function(filename, content_type='', unzip=True, digest=False): """ used in streaming web download to uncompress on the fly for simple compression schemes """ if unzip and not digest: if content_type == "application/gzip" or filename.endswith('.gz'): return filename[:-3], lambda f: gzip.GzipFile(fileobj=f) elif content_type == "application/bz2" or filename.endswith('.bz'): return filename[:-3], lambda f: bz2.BZ2File(f) elif content_type == "application/x-xz" or filename.endswith('.xz'): return filename[:-3], lambda f: lzma.LZMAFile(f) return filename, lambda f: f
def open_file(filename, mode): try: lzma = import_lzma() except ImportError: tobiko.skip( "Package lzma or backports.lzma is required to decompress " "{filename!r} (mode={mode!r}) XZ image file " "({python_version!r}).", filename=filename, mode=mode, python_version=sys.version) return lzma.LZMAFile(filename=filename, mode=mode)
def test_get_metadata_file_lzma(self): # create the test file source_file = os.path.join(self.working_dir, 'foo.xz') handle = lzma.LZMAFile(source_file, 'w') handle.write('apples') handle.close() self.metadata_files.metadata['foo'] = {'local_path': source_file} # validate it handle = self.metadata_files.get_metadata_file_handle('foo') data = handle.read() self.assertEquals(data, 'apples') handle.close()
def __init__(self, filename): self.deltainfo = {} if filename.endswith(".gz"): fo = gzip.open(filename) elif filename.endswith("xz"): fo = lzma.LZMAFile(filename, 'r') else: fo = open(filename, 'rt') for event, elem in iterparse(fo): if elem.tag == "newpackage": p = NewPackage(elem) self.deltainfo[p.nevra()] = p
def load_raw(self, buf): # Bundle File metadata: self.file_size = buf.read_uint() self.header_size = buf.read_int() self.total_chunk_count = buf.read_int() num_chunk_infos = buf.read_int() chunk_info = [] for _ in range(num_chunk_infos): compressed = buf.read_uint() decompressed = buf.read_uint() chunk_info.append(self.ChunkInfo(compressed, decompressed)) self.chunk_info = chunk_info if self.format_version >= 2: self.bundle_size = buf.read_uint() else: self.bundle_size = self.file_size if self.format_version >= 3: # Size of uncompressed bundle metadata header self.metadata_header_size = buf.read_uint() else: self.metadata_header_size = None _padding = buf.read_byte() assert buf.tell() == self.header_size # Packaged Bundle metadata: databuf = OffsetReader(buf.buf, endian=">") if self.compressed: databuf = BinaryReader(lzma.LZMAFile(filename=databuf), endian=">") self._databuf = databuf num_assets = databuf.read_int() assets = [] for _ in range(num_assets): name = databuf.read_string() offset = databuf.read_uint() size = databuf.read_uint() assets.append((name, offset, size)) for asset in assets: self.assets.append(Asset.from_bundle(self, databuf, *asset)) if not self.metadata_header_size: # NB: This won't include any padding. self.metadata_header_size = databuf.tell()
def processElement(self, ctx: Context, f): if f.fileName.endswith(".xz"): return f outbuf = io.BytesIO() with lzma.LZMAFile(filename=outbuf, mode="wb", format=lzma.FORMAT_XZ) as stream: rawData = f.readBinary() stream.write(rawData) f2 = InMemoryFile(f.relFilePath + ".xz", FileTypeInfo.guessFromFileName(".xz"), outbuf.getvalue()) return f2
def _extract(archive, compression, cmd, format, verbosity, outdir): """Extract an LZMA or XZ archive with the lzma Python module.""" targetname = util.get_single_outfile(outdir, archive) try: with lzma.LZMAFile(archive, format=format) as lzmafile: with open(targetname, 'wb') as targetfile: data = lzmafile.read(READ_SIZE_BYTES) while data: targetfile.write(data) data = lzmafile.read(READ_SIZE_BYTES) except Exception as err: msg = "error extracting %s to %s: %s" % (archive, targetname, err) raise util.PatoolError(msg) return None
def __init__(self, debfile): self.debfile = debfile self.fds = {} f = open(debfile, 'rb') a = arfile.ArFile(f) f2 = a.open('data.tar.xz') xz = lzma.LZMAFile(f2) tar = tarfile.open(fileobj=xz) self.store = tar print(tar.getmembers())
def ftrace_open(filename): if filename.endswith(".gz"): import gzip return gzip.open(filename, "r") elif filename.endswith(".lzma"): try: import lzma except: raise Exception( "lzma module could not be imported. Please install python-lzma to seamlessly open lzma compressed file: http://pypi.python.org/pypi/pyliblzma" ) return lzma.LZMAFile(filename, "r") else: return open(filename, "r")
def post(self, request): UPDATE_FILE_DICT["data"] = None UPDATE_FILE_DICT["version"] = None UPDATE_FILE_DICT["checksum"] = None UPDATE_FILE_DICT["platform_bits"] = None version = None checksum = None bits = None _file = request.FILES[list(request.FILES.keys())[0]] if _file.size < 100000000: import tarfile import lzma import hashlib import io try: data = _file.read() lf = lzma.LZMAFile(filename=io.BytesIO(data)) tf = tarfile.TarFile(fileobj=lf) if "hm_icsw_w64" in tf.getnames(): bits = "64" elif "hm_icsw_w32" in tf.getnames(): bits = "32" else: raise Exception("Invalid Update File") constants_py_str = tf.extractfile("Lib/site-packages/initat/constants.py").read().decode() version = constants_py_str.split('WINDOWS_HM_VERSION = ')[1].strip().replace("\"", "") sha3_512_digester_all = hashlib.new("sha3_512") path_list = [module for module in tf.getnames() if module.startswith("Lib/site-packages/initat/host_monitoring/modules/") and module.endswith(".py")] path_list.sort() for path in path_list: sha3_512_digester_all.update(tf.extractfile(path).read()) checksum = sha3_512_digester_all.hexdigest() except Exception as e: _ = e else: UPDATE_FILE_DICT["data"] = data UPDATE_FILE_DICT["version"] = version UPDATE_FILE_DICT["checksum"] = checksum UPDATE_FILE_DICT["platform_bits"] = bits return HttpResponse(json.dumps({"version": version, "checksum": checksum, "platform_bits": bits}))
def merge_files(filenames, input_encoding, output_filename, output_encoding, extract_row): output_fobj = io.TextIOWrapper( lzma.LZMAFile(output_filename, mode='wb', format=lzma.FORMAT_XZ), encoding='utf-8', ) data = read_files(filenames, extract_row) writer = None for row in tqdm(data): if writer is None: writer = csv.DictWriter(output_fobj, fieldnames=list(row.keys())) writer.writeheader() writer.writerow(row)
def open_file(file_name, mode='r'): """ .. versionadded:: 0.1.12 .. versionchanged:: 0.3.4 using *io.open*, always in binary mode .. versionchanged:: 0.4.2 when a file handle is detected, it is passed to :func:`compressed_handle` to detect if the handle is a compressed file Opens a file using the extension as a guide to which module to use. .. note:: Unicode makes for a slower `.translate` method in Python2, so it's best to use the `open` builtin. Arguments: file_name (str): file name mode (str): mode used to open the file Returns: file: file handle Raises: UnsupportedFormat: if the module to open the file is not available """ # Using io.Base was not working - possibly different versions of python? # better just assume that if it's not a string, it's a stream and just write to it. if getattr(file_name, 'endswith', None) is None: return compressed_handle(file_name) mode = mode + 'b' if 'b' not in mode else mode if file_name.endswith('.gz'): file_handle = gzip.GzipFile(file_name, mode) elif file_name.endswith('.bz2'): file_handle = bz2.BZ2File(file_name, mode) elif file_name.endswith('.xz'): if lzma is None: raise UnsupportedFormat("Cannot import lzma module") else: file_handle = lzma.LZMAFile(file_name, mode) else: file_handle = io.open(file_name, mode) return file_handle
def data_file(self): """Return the uncompressed raw CPIO data of the RPM archive.""" if self._data_file is None: fileobj = _SubFile(self._fileobj, self.data_offset) if self.headers["archive_compression"] == b"xz": if not getattr(sys.modules[__name__], 'lzma', False): raise NoLZMAModuleError('lzma module not present') self._data_file = lzma.LZMAFile(fileobj) else: self._data_file = gzip.GzipFile(fileobj=fileobj) return self._data_file
def save_xz(filename, URL): """ 1. Save .xz zipfile downloaded from an online database. 2. Unzip the zipped files. Args: URL: provide a URL of the database to look for the zipfile. filename: provide the name of the file; filename should end with '.xz'. """ URL(filename) zipfile = lzma.LZMAFile(filename).read() newfilepath = filename[:-3] fo = open(newfilepath + '.txt', 'wb').write(zipfile) os.remove(filename)
def testWriteChunks10(self): # "Test lzma.LZMAFile.write() with chunks of 10 bytes" lzmaf = lzma.LZMAFile(self.filename, "w") n = 0 while 1: str = self.TEXT[n * 10:(n + 1) * 10] if not str: break lzmaf.write(str) n += 1 lzmaf.close() f = open(self.filename, 'rb') self.assertEqual(lzma.decompress(f.read()), self.TEXT) f.close()
def unpack_dir(self, target_dir): """Unpack LZMA archive to a given target directory(target_dir).""" output_path = util.join_path(target_dir, os.path.basename(self.file_path)) ext = ".lzma" if self.type == "lzma" else ".xz" if output_path.endswith(ext): output_path = output_path[:-len(ext)] lzma_file = lzma.LZMAFile(self.file_path) output = open(output_path, "w") output.write(lzma_file.read().decode("utf-8")) output.close() lzma_file.close()
def test_csv_compress(bucket, compression): path = f"s3://{bucket}/test_csv_compress_{compression}/" wr.s3.delete_objects(path=path) df = get_df_csv() if compression == "gzip": buffer = BytesIO() with gzip.GzipFile(mode="w", fileobj=buffer) as zipped_file: df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None) s3_resource = boto3.resource("s3") s3_object = s3_resource.Object( bucket, f"test_csv_compress_{compression}/test.csv.gz") s3_object.put(Body=buffer.getvalue()) file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.gz" elif compression == "bz2": buffer = BytesIO() with bz2.BZ2File(mode="w", filename=buffer) as zipped_file: df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None) s3_resource = boto3.resource("s3") s3_object = s3_resource.Object( bucket, f"test_csv_compress_{compression}/test.csv.bz2") s3_object.put(Body=buffer.getvalue()) file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.bz2" elif compression == "xz": buffer = BytesIO() with lzma.LZMAFile(mode="w", filename=buffer) as zipped_file: df.to_csv(TextIOWrapper(zipped_file, "utf8"), index=False, header=None) s3_resource = boto3.resource("s3") s3_object = s3_resource.Object( bucket, f"test_csv_compress_{compression}/test.csv.xz") s3_object.put(Body=buffer.getvalue()) file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv.xz" else: file_path = f"s3://{bucket}/test_csv_compress_{compression}/test.csv" wr.s3.to_csv(df=df, path=file_path, index=False, header=None) wr.s3.wait_objects_exist(paths=[file_path]) df2 = wr.s3.read_csv(path=[file_path], names=df.columns) assert len(df2.index) == 3 assert len(df2.columns) == 10 dfs = wr.s3.read_csv(path=[file_path], names=df.columns, chunksize=1) for df3 in dfs: assert len(df3.columns) == 10 wr.s3.delete_objects(path=path)
def file_open(cls, file_path, mode="rb", compression="infer"): if isinstance(file_path, str): match = S3_ADDRESS_REGEX.search(file_path) if match is not None: if file_path[0] == "S": file_path = "{}{}".format("s", file_path[1:]) import s3fs as S3FS from botocore.exceptions import NoCredentialsError s3fs = S3FS.S3FileSystem(anon=False) try: return s3fs.open(file_path) except NoCredentialsError: s3fs = S3FS.S3FileSystem(anon=True) return s3fs.open(file_path) elif compression == "gzip": import gzip return gzip.open(file_path, mode=mode) elif compression == "bz2": import bz2 return bz2.BZ2File(file_path, mode=mode) elif compression == "xz": import lzma return lzma.LZMAFile(file_path, mode=mode) elif compression == "zip": import zipfile zf = zipfile.ZipFile(file_path, mode=mode.replace("b", "")) if zf.mode == "w": return zf elif zf.mode == "r": zip_names = zf.namelist() if len(zip_names) == 1: f = zf.open(zip_names.pop()) return f elif len(zip_names) == 0: raise ValueError( "Zero files found in ZIP file {}".format(file_path) ) else: raise ValueError( "Multiple files found in ZIP file." " Only one file per ZIP: {}".format(zip_names) ) return open(file_path, mode=mode)