def fetch_input_file(self, file): if not os.path.isdir(self.input_cache_dir): os.mkdir(self.input_cache_dir) #url = 'https://www.metanetx.org/cgi-bin/mnxget/mnxref/' url = 'ftp://ftp.vital-it.ch/databases/metanetx/MNXref/3.2/' # 3xCommon + rpReader if file in [ 'reac_xref.tsv', 'chem_xref.tsv', 'chem_prop.tsv', 'comp_xref.tsv' ]: urllib_request_urlretrieve(url + file, self.input_cache_dir + '/' + file) #TODO: need to add this file to the git or another location if file in ['rr_compounds.tsv', 'rxn_recipes.tsv']: urllib_request_urlretrieve( 'https://retrorules.org/dl/this/is/not/a/secret/path/rr02', self.input_cache_dir + '/rr02_more_data.tar.gz') tar = tarfile_open(self.input_cache_dir + '/rr02_more_data.tar.gz', 'r:gz') tar.extractall(self.input_cache_dir) tar.close() shutil_move(self.input_cache_dir + '/rr02_more_data/compounds.tsv', self.input_cache_dir + '/rr_compounds.tsv') shutil_move( self.input_cache_dir + '/rr02_more_data/rxn_recipes.tsv', self.input_cache_dir) os.remove(self.input_cache_dir + 'rr02_more_data.tar.gz') shutil_rmtree(self.input_cache_dir + 'rr02_more_data') if file == 'rules_rall.tsv': urllib_request_urlretrieve( 'https://retrorules.org/dl/preparsed/rr02/rp3/hs', self.input_cache_dir + '/retrorules_rr02_rp3_hs.tar.gz') tar = tarfile_open( self.input_cache_dir + '/retrorules_rr02_rp3_hs.tar.gz', 'r:gz') tar.extractall(self.input_cache_dir) tar.close() shutil_move( self.input_cache_dir + '/retrorules_rr02_rp3_hs/retrorules_rr02_flat_all.tsv', self.input_cache_dir + '/rules_rall.tsv') os.remove(self.input_cache_dir + '/retrorules_rr02_rp3_hs.tar.gz') shutil_rmtree(self.input_cache_dir + '/retrorules_rr02_rp3_hs')
def write(self, path: str, mode: str): with lz4.LZ4FrameFile(path, mode=mode[0]) as lz4c: archive = tarfile_open(mode=mode, fileobj=lz4c, **self.lz4_kwargs) try: yield archive finally: archive.close()
def copy_csharp_parser(): parser_path = 'kenja/lib/csharp/kenja-csharp-parser.exe' parser_digest = 'a3dfac7de0406e961d0c47f95d9bc522' parser_location = 'https://github.com/sdlab-naist/kenja-csharp-parser/releases/download/0.1/kenja-csharp-parser-0.1.tar.gz' parser_tar_digest = '583013bc78b3f1f158f094baa37b0808' confirm_text = None if not os.path.exists(parser_path): confirm_text = "{0} does not exist. Do you want to download it?[y/n]".format(parser_path) elif hashlib.md5(open(parser_path).read()).hexdigest() != parser_digest: confirm_text = "{0} is different from designated parser script. Do you want to overwrite it?[y/n]".format(parser_path) if confirm_text is not None: print(confirm_text) choice = raw_input().lower() yes = set(['yes', 'y', 'ye']) no = set(['no', 'n']) if choice in yes: (filename, _) = urllib.urlretrieve(parser_location) digest = hashlib.md5(open(filename, 'rb').read()).hexdigest() if parser_tar_digest != digest: print("md5 hash of {0} is incorrect! remove it and try again.".format(filename)) sys.exit(1) tarfile = tarfile_open(filename, 'r') tarfile.extractall('kenja/lib/csharp') if not os.path.exists(parser_path): print("java parser will not be installed.") print("You should disable java parser when you run kenja") else: data_files.append(("kenja/lib/csharp", glob.glob("kenja/lib/csharp/*")))
def open_write(self, path: str) -> IO[bytes]: mode = 'w|{}'.format(self.compression) with tarfile_open(path, mode) as archive: with NamedTemporaryFile() as buffer: yield buffer buffer.seek(0) archive.add(buffer.name, self.filename)
def logexplore(self, params): from os import walk from os.path import join as pjoin from tarfile import open as tarfile_open from contextlib import closing to_process = [] for path, dirs, files in walk('.'): for filename in files: if "log" in filename and "tgz" in filename: to_process.append(pjoin(path, filename)) bad_files = [] for f in to_process: with closing(tarfile_open(f)) as tar: for member in tar.getmembers(): if "stdout" not in member.path: continue contents = tar.extractfile(member).read() if not ("ERROR" in contents or "EXCEPTION" in contents): continue tar.extract(member, path="tmp") bad_files.append((f, member, contents)) if not bad_files: print "No bad files found" else: print len(bad_files), "bad files found" for f, member, contents in bad_files: print f, member.path, len(contents)
def download_certbot_config(config_bucket: str, config_key: str, certbot_config_dir: str, certbot_work_dir: str) -> None: """ Download the configuration tar file from S3 and extract it to the certbot config directory. """ with TemporaryFile(prefix="config", suffix=".tar.gz", dir=certbot_work_dir) as fd: try: result = s3.get_object(Bucket=config_bucket, Key=config_key) except ClientError as e: if e.response["Error"]["Code"] == "NoSuchKey": return raise body = result["Body"] first_chunk = True while True: chunk = body.read(65536) if not chunk: break if first_chunk: if chunk[:4] == b'\x50\x4b\x03\x04': # Legacy ZIP file -- don't use return first_chunk = False fd.write(chunk) fd.seek(0) with tarfile_open(fd, "r") as tf: tf.extractall(certbot_config_dir)
def read(self, path: str, mode: str): with lz4.LZ4FrameFile(path) as lz4d: archive = tarfile_open(mode=mode, fileobj=lz4d, **self.lz4_kwargs) try: yield archive finally: archive.close()
def compress(self, fobj: IO[bytes]) -> IO[bytes]: mode = 'w|{}'.format(self.compression) with tarfile_open(fileobj=fobj, mode=mode) as archive: with NamedTemporaryFile() as buffer: yield buffer buffer.seek(0) archive.add(buffer.name, self.filename)
def compress_tar(inpt: str): ## {{{ from os import path, chdir, listdir from tarfile import open as tarfile_open inpt = remove_trailing_slash(inpt) root, base = path.split(inpt) dest_dir = root dest_tar = f'{base}.tar' chdir(dest_dir) if path.isdir(inpt): with tarfile_open(dest_tar, 'w') as NEW_TAR: chdir(base) for i in listdir(): NEW_TAR.add(i) else: with tarfile_open(dest_tar, 'w') as NEW_TAR: NEW_TAR.add(base)
def download_parser(self): (filename, _) = urllib.request.urlretrieve(self.parser_location) if not validate_md5sum(self.parser_tar_digest, filename): print("md5 hash of downloaded file is incorrect! try again.") sys.exit(1) tarfile = tarfile_open(filename, 'r') tarfile.extractall('kenja/lib/csharp')
def download_parser(self): (filename, _) = urllib.urlretrieve(self.parser_location) if not validate_md5sum(self.parser_tar_digest, filename): print("md5 hash of downloaded file is incorrect! try again.") sys.exit(1) tarfile = tarfile_open(filename, 'r') tarfile.extractall('kenja/lib/csharp')
def _open_archive(cls, path: str, mode: str) -> TarFile: extension_index = path.rfind('.') if extension_index > -1: compression = path[extension_index + 1:] else: compression = cls._compression mode = '{}|{}'.format(mode, compression) return tarfile_open(path, mode)
def download_python(ver) -> (str, str): """Download installer, extract it, return the installer dir.""" ver, url = download_info(ver) source_path = PYTHONS + '/Python-' + ver print(f'{url=}') tar_file = tarfile_open(fileobj=BytesIO(urlopen(url).read())) tar_file.extractall(PYTHONS) return ver, source_path
def extract_tarball(url, data): with BytesIO(data) as bio: if '.tar.' in url: with tarfile_open(fileobj=bio) as tarf: for info in tarf.getmembers(): if info.isfile() and info.name.startswith('bin/ec-'): return tarf.extractfile(info).read() raise AssertionError('unreachable `extract` function')
def _given_resource(self, resource_id: str, name: str, lines: bytes): client = self._storage._file_storage._client buffer = BytesIO() with tarfile_open(mode='w:gz', fileobj=buffer) as archive: tarinfo = TarInfo(name) tarinfo.size = len(lines) archive.addfile(tarinfo, BytesIO(lines)) buffer.seek(0) client.upload_object_via_stream(buffer, resource_id)
def tar_cz_relative(*path): """tar_cz(*path) -> bytes Compress a sequence of files or directories in memory. The resulting string could be stored as a .tgz file.""" file_out = io_BytesIO() tar = tarfile_open(mode="w:gz", fileobj=file_out) for p in path: tar.add(p, arcname='./') tar.close() return file_out.getvalue()
def xtract_tar(inpt: str): ## {{{ from os import path, mkdir from tarfile import open as tarfile_open inpt = remove_trailing_slash(inpt) root_base, ext = path.splitext(inpt) dest_dir = root_base mkdir(dest_dir) with tarfile_open(inpt) as CUR_TAR: CUR_TAR.extractall(dest_dir)
def extractlayers(dc, args, layers, top_most_layer_id): target_path = args.target flags = O_WRONLY if target_path == _TARGET_STDOUT: target_fd = stdout.fileno() else: flags |= O_CREAT | O_TRUNC if not args.force: flags |= O_EXCL target_fd = logexception( _LOGGER, ERROR, 'unable to open target file "{}": {{e}}'.format(target_path), os_open, target_path, flags, 0o666) with fdopen(target_fd, 'wb') as target_file: if hasattr(target_file, 'seekable'): seekable = target_file.seekable() else: try: seekable = not lseek(target_fd, 0, SEEK_CUR) < 0 \ and S_ISREG(fstat(target_fd).st_mode) except OSError as e: if errorcode.get(e.errno) != 'ESPIPE': raise seekable = False open_args = {'fileobj': target_file} if args.compression is None: open_args['mode'] = 'w' if seekable else 'w|' else: if seekable: mode = 'w:{}' open_args['compresslevel'] = args.compress_level _, ext = ospath_splitext(target_path) if ext.lower() != '{}{}'.format(ospath_extsep, args.compression): _LOGGER.warning( 'target name "%s" doesn\'t match compression type ("%s")', target_path, args.compression) else: mode = 'w|{}' _LOGGER.warning( 'target "%s" is not seekable, ignoring compression level (%d)', target_path, args.compress_level) open_args['mode'] = mode.format(args.compression) with tarfile_open(**open_args) as tar_file: dimgx_extractlayers(dc, layers, tar_file, top_most_layer_id)
def write(self, path: str, mode: str): with NamedTemporaryFile() as decompressed: archive = tarfile_open(decompressed.name, mode=mode) try: yield archive finally: archive.close() decompressed.seek(0) with open(path, 'wb') as compressed: zstd = zstandard.ZstdCompressor(**self.zstd_kwargs) zstd.copy_stream(decompressed, compressed)
def read(self, path: str, mode: str): with NamedTemporaryFile() as decompressed: with open(path, 'rb') as compressed: zstd = zstandard.ZstdDecompressor(**self.zstd_kwargs) zstd.copy_stream(compressed, decompressed) decompressed.seek(0) archive = tarfile_open(mode=mode, fileobj=decompressed) try: yield archive finally: archive.close()
def try_tarfile(filename, pattern): with closing(tarfile_open(filename)) as tar: for f in tar.getmembers(): if not (".root" in f.path and (not pattern or pattern in f.path)): continue print " -", f.path tmpdir = mkdtemp() try: tar.extract(f.path, tmpdir) yield safe_root_open(tmpdir + "/" + f.path) finally: rmtree(tmpdir)
def tar_gz_decompress(self, destination): """ Decompress a given file into the given destination. Argument: - destination: str The destination of the decompressed. """ if destination is not None and isinstance(destination, str): with tarfile_open(self.file) as thetar: thetar.extractall(path=destination)
def tar_gz_compress(self, destination): """ Compress a file into a tar.gz. Argument: - destination: str The destination of the compressed file. """ if destination is not None and isinstance(destination, str): with tarfile_open(destination, "w:gz") as tar: tar.add(self.file)
def tar_gz_decompress(self, destination): """ Decompress the given file into the given destination. :param str destination: The destination of the decompression. """ if destination is not None and isinstance(destination, str): with tarfile_open(self.file) as thetar: thetar.extractall(path=destination) else: raise ValueError("{0} expected. {1} given.".format( type(str), type(destination)))
def read(self, fileobj): """How to read LDEO ASEP files from an NODC accession.""" def is_fname_ok(fname): if '.csv' not in fname: return False if fname.find('/') > -1: raise ValueError( u'CTD Exchange Zip files should not contain directories.') return True def reader(dfile, fileobj, retain_order, header_only): ctdex.read(dfile, fileobj, retain_order, header_only) dfile.globals['_FILENAME'] = fileobj.name dfiles = [] datapath = None datadirname = '0-data' with tarfile_open(mode='r:gz', fileobj=fileobj) as fff: for member in fff.getmembers(): if datapath is None: if datadirname in member.name: datapath = member.name.split( datadirname)[0] + datadirname + '/' log.info('NODC accession data path: {0}'.format(datapath)) else: continue if not member.name.startswith(datapath): continue bname = os.path.basename(member.name) if bname.endswith('pdf'): continue if '_ros.' in bname: continue # don't want upcasts if '_ctd_U.' in bname: continue dfile = DataFile() ggg = fff.extractfile(member) if ggg is None: log.error(u'Unable to extract file {0!r}'.format(member)) else: ldeo_asep.read(dfile, ggg) dfiles.append(dfile) self.files = sorted(dfiles, key=lambda dfile: lexico(dfile.globals['STNNBR']))
def xtarfile_open(path: str, mode: str, **kwargs): compression = get_compression(path, mode) if not compression or compression in _NATIVE_FORMATS: return tarfile_open(path, mode, **kwargs) handler_class = _HANDLERS.get(compression) if handler_class is not None: handler = handler_class(**kwargs) if mode.startswith('r'): return handler.read(path, mode[:2]) elif mode.startswith('w'): return handler.write(path, mode[:2]) raise NotImplementedError
def extractlayers(dc, args, layers, top_most_layer_id): target_path = args.target flags = O_WRONLY if target_path == _TARGET_STDOUT: target_fd = stdout.fileno() else: flags |= O_CREAT | O_TRUNC if not args.force: flags |= O_EXCL target_fd = logexception(_LOGGER, ERROR, 'unable to open target file "{}": {{e}}'.format(target_path), os_open, target_path, flags, 0o666) with fdopen(target_fd, 'wb') as target_file: if hasattr(target_file, 'seekable'): seekable = target_file.seekable() else: try: seekable = not lseek(target_fd, 0, SEEK_CUR) < 0 \ and S_ISREG(fstat(target_fd).st_mode) except OSError as e: if errorcode.get(e.errno) != 'ESPIPE': raise seekable = False open_args = { 'fileobj': target_file } if args.compression is None: open_args['mode'] = 'w' if seekable else 'w|' else: if seekable: mode = 'w:{}' open_args['compresslevel'] = args.compress_level _, ext = ospath_splitext(target_path) if ext.lower() != '{}{}'.format(ospath_extsep, args.compression): _LOGGER.warning('target name "%s" doesn\'t match compression type ("%s")', target_path, args.compression) else: mode = 'w|{}' _LOGGER.warning('target "%s" is not seekable, ignoring compression level (%d)', target_path, args.compress_level) open_args['mode'] = mode.format(args.compression) with tarfile_open(**open_args) as tar_file: dimgx_extractlayers(dc, layers, tar_file, top_most_layer_id)
def create_config_tarfile(config_dir: str, config_tarfile: str) -> CertbotCertificate: """ Create the configuration tar file for storage in S3 and return a dictionary containing Certificate, CertificateChain, and PrivateKey with those elements found. """ certificate = None chain = None full_chain = None private_key = None with tarfile_open(config_tarfile, "w:gz") as tf: for path, _, filenames in walk(config_dir): for filename in filenames: pathname = path + "/" + filename relpath_strip = len(config_dir) + 1 relpath = pathname[relpath_strip:] print(f"Adding {relpath} to archive") tf.add(pathname, relpath, recursive=False) if fnmatch(relpath, CERT_FILENAME_PATTERN): with open(pathname, "rb") as fd: certificate = fd.read() elif fnmatch(relpath, CHAIN_FILENAME_PATTERN): with open(pathname, "rb") as fd: chain = fd.read() elif fnmatch(relpath, FULLCHAIN_FILENAME_PATTERN): with open(pathname, "rb") as fd: full_chain = fd.read() elif fnmatch(relpath, KEY_FILENAME_PATTERN): with open(pathname, "rb") as fd: private_key = fd.read() if certificate is None: raise ValueError(f"Did not find live certificate in {config_dir}") if chain is None: raise ValueError(f"Did not find intermediate certificate in {config_dir}") if full_chain is None: raise ValueError(f"Did not find full certificate chain in {config_dir}") if private_key is None: raise ValueError(f"Did not find private key in {config_dir}") return CertbotCertificate(certificate=certificate, chain=chain, full_chain=full_chain, private_key=private_key)
def get_image(self, image): if not image: raise APIError(HTTPError('500 Server Error'), None, explanation='Usage: image_export IMAGE [IMAGE...]') layers = [] next_layer_id = image while next_layer_id: layer = normalizeimage(self._findlayer(next_layer_id), copy=True) layers.append(layer) next_layer_id = layers[-1][':parent_id'] image_file = BytesIO() mtime = time() with tarfile_open(mode='w', fileobj=image_file) as image_tar_file: for layer in layers: ti_dir = TarInfo(layer[':id']) ti_dir.mtime = mtime ti_dir.mode = 0o755 ti_dir.type = DIRTYPE image_tar_file.addfile(ti_dir) layer_tar_src_path = ospath_join(self._my_dir, 'data', layer[':short_id'], 'layer.tar') with open(layer_tar_src_path, 'rb') as layer_tar_src_file: layer_tar_dst_path = '{}/layer.tar'.format(layer[':id']) ti_layer = image_tar_file.gettarinfo( layer_tar_src_path, layer_tar_dst_path) ti_layer.mtime = mtime ti_layer.mode = 0o644 ti_layer.uid = ti_layer.gid = 0 ti_layer.uname = ti_layer.gname = '' image_tar_file.addfile(ti_layer, fileobj=layer_tar_src_file) image_file.seek(0) return image_file
def __init__(self, name, path, sign_key=None, sudo=False): """ :param name: The name of this repository. :param path: Path to this repository. :param sign_key: GPG key to sign. None means no signing. :param sudo: Whether to modify this repository using sudo(1) or not. """ self.name = name self.directory = path self.sign_key = sign_key self.sign_parameters = ['-s', '-k', sign_key] if sign_key else [] self.sudo = sudo self.db_path = join(path, name + '.db.tar.gz') if not exists(self.db_path): run(['repo-add', self.db_path], sudo=sudo, capture=False) packages = [PackageTinyInfo.from_repodb_directory_name(member.name) for member in tarfile_open(self.db_path).getmembers() if member.isdir()] self.packages = {package.name: package for package in packages}
def decompress(self, fobj: IO[bytes]) -> IO[bytes]: mode = 'r|{}'.format(self.compression) archive = tarfile_open(fileobj=fobj, mode=mode) try: fobj = None while True: member = archive.next() if member is None: break if member.name == self.filename: fobj = archive.extractfile(member) break if fobj is None: raise FileNotFoundError('{} not found'.format(self.filename)) try: yield fobj finally: fobj.close() finally: archive.close()
def mergeall(self, params): from hmerge import merge_files from tarfile import open as tarfile_open from contextlib import closing print "Input:", params.files output_files = {} fileset = set() for f in params.files: with closing(tarfile_open(f)) as tar: for rootfile in tar.getmembers(): output_files.setdefault(rootfile.path, set()).add(f) from multiprocessing import Pool, cpu_count pool = Pool(cpu_count()) to_merge = [(output, sorted(inputs), output) for output, inputs in sorted(output_files.iteritems())] pool.map(mp_merge, to_merge)
def write(self, path: str, mode: str): try: with NamedTemporaryFile(delete=False) as decompressed: archive = tarfile_open(decompressed.name, mode=mode) try: yield archive finally: archive.close() decompressed.seek(0) with open(path, 'wb') as compressed: zstd = zstandard.ZstdCompressor(**self.zstd_kwargs) zstd.copy_stream(decompressed, compressed) finally: # We delete it manually because otherwise on Windows # it gets deleted before we move it to the output file location. # This is because on Windows, file handles with the O_TEMPORARY # flag (which is set if we pass `delete=True`) are deleted as # soon as they're closed. decompressed.close() os_remove(decompressed.name)
def read_correlate_copynumber_vs_mrnaseq(tar_gz_file_path, genes): with tarfile_open(tar_gz_file_path) as tar_gz_file: n = read_csv( tar_gz_file.extractfile( tuple(file for file in tar_gz_file if file.name.endswith("qa.txt"))[0] ), sep="\t", index_col=0, ).loc["sample", "comm"] df = read_csv( tar_gz_file.extractfile( tuple(file for file in tar_gz_file if file.name.endswith("cors.txt"))[0] ), sep="\t", index_col=1, ) return n, df.loc[genes, "cor"].to_dict()
def open_read(self, path: str) -> IO[bytes]: mode = 'r|{}'.format(self.compression) archive = tarfile_open(path, mode) try: fobj = None while True: member = archive.next() if member is None: break if member.name == self.filename: fobj = archive.extractfile(member) break if fobj is None: raise FileNotFoundError('{} not found in {}'.format( self.filename, path)) try: yield fobj finally: fobj.close() finally: archive.close()
def get_image(self, image): if not image: raise APIError(HTTPError('500 Server Error'), None, explanation='Usage: image_export IMAGE [IMAGE...]') layers = [] next_layer_id = image while next_layer_id: layer = normalizeimage(self._findlayer(next_layer_id), copy=True) layers.append(layer) next_layer_id = layers[-1][':parent_id'] image_file = BytesIO() mtime = time() with tarfile_open(mode='w', fileobj=image_file) as image_tar_file: for layer in layers: ti_dir = TarInfo(layer[':id']) ti_dir.mtime = mtime ti_dir.mode = 0o755 ti_dir.type = DIRTYPE image_tar_file.addfile(ti_dir) layer_tar_src_path = ospath_join(self._my_dir, 'data', layer[':short_id'], 'layer.tar') with open(layer_tar_src_path, 'rb') as layer_tar_src_file: layer_tar_dst_path = '{}/layer.tar'.format(layer[':id']) ti_layer = image_tar_file.gettarinfo(layer_tar_src_path, layer_tar_dst_path) ti_layer.mtime = mtime ti_layer.mode = 0o644 ti_layer.uid = ti_layer.gid = 0 ti_layer.uname = ti_layer.gname = '' image_tar_file.addfile(ti_layer, fileobj=layer_tar_src_file) image_file.seek(0) return image_file
# Check to see if we have the codegen json file in this directory if not exists(CODEGEN_JSON): # Retrieve the codegen archive print "Downloading codegen JSON file to %s." % CODEGEN_JSON handle = urlopen(CODEGEN_JSON_URL) bzip2_tarball = handle.read() # Write the file out to a temp file tempfile = NamedTemporaryFile(delete=False) tempfile.write(bzip2_tarball) tempfile.close() # Extract the CODEGEN_JSON file to this directory tarball = tarfile_open(tempfile.name, 'r:*') archived_file = 'rabbitmq-codegen-default/' + CODEGEN_JSON.split('/')[-1] json_data = tarball.extractfile(archived_file) # Write out the JSON file with open(CODEGEN_JSON, 'w') as handle: handle.write(json_data.read()) # Remove the tempfile unlink(tempfile.name) # Read in the codegen JSON file with open(CODEGEN_JSON, 'r') as handle: amqp = load(handle) # Check to see if we have the codegen xml file in this directory
# Check to see if we have the codegen json file in this directory if not exists(CODEGEN_JSON): # Retrieve the codegen archive print("Downloading codegen JSON file to %s." % CODEGEN_JSON) handle = urlopen(CODEGEN_JSON_URL) bzip2_tarball = handle.read() # Write the file out to a temp file tempfile = NamedTemporaryFile(delete=False) tempfile.write(bzip2_tarball) tempfile.close() # Extract the CODEGEN_JSON file to this directory tarball = tarfile_open(tempfile.name, "r:*") archived_file = "rabbitmq-codegen-default/" + CODEGEN_JSON.split("/")[-1] json_data = tarball.extractfile(archived_file) # Write out the JSON file with open(CODEGEN_JSON, "w") as handle: handle.write(json_data.read()) # Remove the tempfile unlink(tempfile.name) # Read in the codegen JSON file with open(CODEGEN_JSON, "r") as handle: amqp = load(handle) # Check to see if we have the codegen xml file in this directory
def __init__(self, archpath): super(TARArchive, self).__init__(tarfile_open(archpath))
def __init__(self, archpath): ArchiveBase.__init__(self, tarfile_open(archpath))
def extractlayers(dc, layers, tar_file, top_most_layer=0): """ :param dc: a |docker.Client|_ :param layers: a sequence of inspection objects (likely retrieved with :func:`inspectlayers`) corresponding to the layers to extract and flatten in order of precedence :param tar_file: a :class:`~tarfile.TarFile` open for writing to which to write the flattened layer archive :param top_most_layer: an image ID or an index into :obj:`layers` indicating the most recent layer to retrieve (the default of ``0`` references the first item in :obj:`layers`; see below) :raises docker.errors.APIError: on failure interacting with Docker (e.g., failed connection, Docker not running, etc.) :raises docker.errors.DockerException: on failure interacting with Docker (e.g., bad image ID, etc.) :raises UnsafeTarPath: - probably indicative of a bug in Docker Retrieves the layers corresponding to the :obj:`layers` parameter and extracts them into :obj:`tar_file`. Changes from layers corresponding to smaller indexes in :obj:`layers` will overwrite or block those from larger ones. Callers will need to set the :obj:`top_most_layer` parameter if :obj:`layers` is not in descending order. It is always safe to provide the same value as the :obj:`image_spec` parameter to :func:`inspectlayers`, but this may be ineffecient if that layer does not appear in :obj:`layers`. """ if not layers: _LOGGER.warning('nothing to extract') return image_spec = top_most_layer if not isinstance(top_most_layer, int) else layers[top_most_layer][':id'] tmp_dir = path_realpath(mkdtemp()) try: image = logexception(_LOGGER, ERROR, 'unable to retrieve image layers from "{}": {{e}}'.format(image_spec), dc.get_image, image_spec) with tarfile_open(mode='r|*', fileobj=image) as image_tar_file: next_info = image_tar_file.next() while next_info: next_path = path_realpath(path_join(tmp_dir, next_info.name)) if not next_path.startswith(tmp_dir): exc = UnsafeTarPath('unsafe path: "{}"'.format(next_info.name)) logexception(_LOGGER, ERROR, 'unable to retrieve entry from export of "{}": {{e}}'.format(image_spec), exc) image_tar_file.extract(next_info, tmp_dir) next_info = image_tar_file.next() seen = set() hides_subtrees = set() # Look through each layer's archive (newest to oldest) for layer in layers: layer_id = layer[':id'] layer_tar_path = path_join(tmp_dir, layer_id, 'layer.tar') with tarfile_open(layer_tar_path) as layer_tar_file: next_info = layer_tar_file.next() while next_info: next_dirname = posixpath_dirname(next_info.name) next_basename = posixpath_basename(next_info.name) if next_basename.startswith(_WHITEOUT_PFX): removed_path = posixpath_join(next_dirname, next_basename[_WHITEOUT_PFX_LEN:]) hides_subtrees.add(( removed_path, 'removal' )) if removed_path in seen: _LOGGER.debug('skipping removal "%s"', removed_path) else: _LOGGER.debug('hiding "%s" as removed', removed_path) elif next_info.name in seen: _LOGGER.debug('skipping "%s" as overwritten', next_info.name) else: next_name_len = len(next_info.name) hidden = None for h, deverbal in hides_subtrees: # https://en.wikipedia.org/wiki/deverbal if len(h) > next_name_len: continue common_pfx = posixpath_commonprefix(( h, next_info.name )) common_pfx_len = len(common_pfx) if next_name_len == common_pfx_len \ or next_info.name[common_pfx_len:].startswith(posixpath_sep): hidden = deverbal, h break if hidden: _LOGGER.debug('skipping "%s" hidden by %s of %s', next_info.name, *hidden) else: mtime = naturaltime(datetime.utcfromtimestamp(next_info.mtime).replace(tzinfo=TZ_UTC)) _LOGGER.info('writing "%s" from "%s" to archive (size: %s; mode: %o; mtime: %s)', next_info.name, layer_id, naturalsize(next_info.size), next_info.mode, mtime) if next_info.linkname: # TarFile.extractfile() tries to do # something weird when its parameter # represents a link (see the docs) fileobj = None else: fileobj = layer_tar_file.extractfile(next_info) tar_file.addfile(next_info, fileobj) seen.add(next_info.name) if not next_info.isdir(): hides_subtrees.add(( next_info.name, 'presence' )) next_info = layer_tar_file.next() finally: rmtree(tmp_dir, ignore_errors=True)