def _createScriptExtensionTarArchive(self, sourceDirectory, scriptExtensionName): """ Creates a TAR archive for the given script extension. """ tarFileName = scriptExtensionName + ".tar" tarFilePath = os.path.join(self.__buildConfiguration.distDirectory, tarFileName) tarFile = TarFile(tarFilePath, "w") for inputDirectory in ["lib", "src"]: baseDirectory = os.path.join(sourceDirectory, inputDirectory) if os.path.exists(baseDirectory): for packageDirName in os.listdir(baseDirectory): pythonModulesToAddList = list() packageDirectory = os.path.join(baseDirectory, packageDirName) if os.path.exists(packageDirectory): for walkTuple in os.walk(packageDirectory): directoryPath = walkTuple[0] fileNameList = walkTuple[2] for fileName in fileNameList: if fileName.endswith(".py") or fileName == "SCRIPTS": filePath = os.path.join(directoryPath, fileName) pythonModulesToAddList.append(filePath) for pythonModule in pythonModulesToAddList: startPosition = pythonModule.find(baseDirectory) + len(baseDirectory) + 1 archiveName = pythonModule[startPosition:] tarFile.add(pythonModule, archiveName) tarFile.close() if self.verbose: print("Created tar archive '%s'." % tarFilePath)
def test_tar_experiment_download(self): self.assertTrue(all(df.verified for df in self.dfs)) response = self.client.get(reverse( 'tardis.tardis_portal.download.streaming_download_experiment', args=(self.exp.id, 'tar'))) with NamedTemporaryFile('w') as tarfile: for c in response.streaming_content: tarfile.write(c) tarfile.flush() self.assertEqual(int(response['Content-Length']), os.stat(tarfile.name).st_size) tf = TarFile(tarfile.name) if settings.EXP_SPACES_TO_UNDERSCORES: exp_title = self.exp.title.replace(' ', '_') else: exp_title = self.exp.title exp_title = quote(exp_title, safe=settings.SAFE_FILESYSTEM_CHARACTERS) for df in self.dfs: full_path = os.path.join( exp_title, quote(self.ds.description, safe=settings.SAFE_FILESYSTEM_CHARACTERS), df.directory, df.filename) # docker has a file path limit of ~240 characters if os.environ.get('DOCKER_BUILD', 'false') != 'true': tf.extract(full_path, '/tmp') self.assertEqual( os.stat(os.path.join('/tmp', full_path)).st_size, int(df.size))
def install_packages(self): self._update_package_with_install_path() installed_packages = [] for p in self._packages: if 'install_from' not in p: print("[dem] Could not find package: {}, version: {}".format(p['name'], p['version'])) else: if not self._cache.is_package_installed(p['name'], p['version']): print('[dem] installing {}-{}'.format(p['name'], p['version'])) if p['install_from_ext'] == 'zip': with ZipFile(p['install_from'], 'r') as archive: locations = self._extract(archive, p) elif p['install_from_ext'] == 'tar.gz': with TarFile.open(p['install_from'], 'r:gz') as archive: locations = self._extract(archive, p) elif p['install_from_ext'] == 'tar.bz2': with TarFile.open(p['install_from'], 'r:bz2') as archive: locations = self._extract(archive, p) elif p['install_from_ext'] == 'gz': with gzip.open(p['install_from'], 'r') as archive: locations = self._extract(archive, p) if 'pkg-config' in p: PkgConfigProcessor.replace_prefix(locations, p['pkg-config']) else: print('[dem] {}-{} already installed'.format(p['name'], p['version'])) locations = self._cache.install_locations(p['name']) package = dict() package[p['name']] = {'version': p['version'], 'type': 'local', 'install_locations': locations} installed_packages.append(package) return installed_packages
def install_repo(self, repo): if repo in KNOWN_PUBLIC_REPOS: repo = KNOWN_PUBLIC_REPOS[repo]['path'] # replace it by the url git_path = which('git') if not git_path: return ('git command not found: You need to have git installed on ' 'your system to be able to install git based plugins.', ) # TODO: Update download path of plugin. if repo.endswith('tar.gz'): tar = TarFile(fileobj=urlopen(repo)) tar.extractall(path=self.plugin_dir) s = repo.split(':')[-1].split('/')[-2:] human_name = '/'.join(s).rstrip('.tar.gz') else: human_name = human_name_for_git_url(repo) p = subprocess.Popen([git_path, 'clone', repo, human_name], cwd=self.plugin_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) feedback = p.stdout.read().decode('utf-8') error_feedback = p.stderr.read().decode('utf-8') if p.wait(): return "Could not load this plugin: \n\n%s\n\n---\n\n%s" % (feedback, error_feedback), self.add_plugin_repo(human_name, repo) return self.update_dynamic_plugins()
def rebuild(filename, tag=None, format="gz", zonegroups=[], metadata=None): """Rebuild the internal timezone info in dateutil/zoneinfo/zoneinfo*tar* filename is the timezone tarball from ``ftp.iana.org/tz``. """ tmpdir = tempfile.mkdtemp() zonedir = os.path.join(tmpdir, "zoneinfo") moduledir = os.path.dirname(__file__) try: with TarFile.open(filename) as tf: for name in zonegroups: tf.extract(name, tmpdir) filepaths = [os.path.join(tmpdir, n) for n in zonegroups] try: check_call(["zic", "-d", zonedir] + filepaths) except OSError as e: _print_on_nosuchfile(e) raise # write metadata file with open(os.path.join(zonedir, METADATA_FN), 'w') as f: json.dump(metadata, f, indent=4, sort_keys=True) target = os.path.join(moduledir, ZONEFILENAME) with TarFile.open(target, "w:%s" % format) as tf: for entry in os.listdir(zonedir): entrypath = os.path.join(zonedir, entry) tf.add(entrypath, entry) finally: shutil.rmtree(tmpdir)
def install(self, mess, args): """ install a plugin repository from the given source or a known public repo (see !repos to find those). for example from a known repo : !install err-codebot for example a git url : [email protected]:gbin/plugin.git or an url towards a tar.gz archive : http://www.gootz.net/plugin-latest.tar.gz """ if not args.strip(): return "You should have an urls/git repo argument" if args in KNOWN_PUBLIC_REPOS: args = KNOWN_PUBLIC_REPOS[args][0] # replace it by the url git_path = which('git') if not git_path: return 'git command not found: You need to have git installed on your system to by able to install git based plugins.' if args.endswith('tar.gz'): tar = TarFile(fileobj=urlopen(args)) tar.extractall(path= PLUGIN_DIR) human_name = args.split('/')[-1][:-7] else: human_name = human_name_for_git_url(args) p = subprocess.Popen([git_path, 'clone', args, human_name], cwd = PLUGIN_DIR, stdout = subprocess.PIPE, stderr = subprocess.PIPE) feedback = p.stdout.read() error_feedback = p.stderr.read() if p.wait(): return "Could not load this plugin : \n%s\n---\n%s" % (feedback, error_feedback) self.add_plugin_repo(human_name, args) errors = self.update_dynamic_plugins() if errors: self.send(mess.getFrom(), 'Some plugins are generating errors:\n' + '\n'.join(errors) , message_type=mess.getType()) else: self.send(mess.getFrom(), "A new plugin repository named %s has been installed correctly from %s. Refreshing the plugins commands..." % (human_name, args), message_type=mess.getType()) self.activate_non_started_plugins() return "Plugin reload done."
def rebuild(filename, tag=None, format="gz"): import tempfile, shutil tmpdir = tempfile.mkdtemp() zonedir = os.path.join(tmpdir, "zoneinfo") moduledir = os.path.dirname(__file__) if tag: tag = "-"+tag targetname = "zoneinfo%s.tar.%s" % (tag, format) try: tf = TarFile.open(filename) for name in tf.getnames(): if not (name.endswith(".sh") or name.endswith(".tab") or name == "leapseconds"): tf.extract(name, tmpdir) filepath = os.path.join(tmpdir, name) os.system("zic -d %s %s" % (zonedir, filepath)) tf.close() target = os.path.join(moduledir, targetname) for entry in os.listdir(moduledir): if entry.startswith("zoneinfo") and ".tar." in entry: os.unlink(os.path.join(moduledir, entry)) tf = TarFile.open(target, "w:%s" % format) for entry in os.listdir(zonedir): entrypath = os.path.join(zonedir, entry) tf.add(entrypath, entry) tf.close() finally: shutil.rmtree(tmpdir)
def create_archive(self): (handle, path) = mkstemp(dir=self.temp_dir) os.close(handle) archive = TarFile(path, mode="w") archive.add(os.path.join(_common.RSRC, "full.mp3"), "full.mp3") archive.close() return path
def rebuild(filename, tag=None, format="gz"): import tempfile, shutil tmpdir = tempfile.mkdtemp() zonedir = os.path.join(tmpdir, "zoneinfo") moduledir = os.path.dirname(__file__) if tag: tag = "-"+tag targetname = "zoneinfo%s.tar.%s" % (tag, format) try: tf = TarFile.open(filename) # The "backwards" zone file contains links to other files, so must be # processed as last for name in sorted(tf.getnames(), key=lambda k: k != "backward" and k or "z"): if not (name.endswith(".sh") or name.endswith(".tab") or name == "leapseconds"): tf.extract(name, tmpdir) filepath = os.path.join(tmpdir, name) os.system("zic -d %s %s" % (zonedir, filepath)) tf.close() target = os.path.join(moduledir, targetname) for entry in os.listdir(moduledir): if entry.startswith("zoneinfo") and ".tar." in entry: os.unlink(os.path.join(moduledir, entry)) tf = TarFile.open(target, "w:%s" % format) for entry in os.listdir(zonedir): entrypath = os.path.join(zonedir, entry) tf.add(entrypath, entry) tf.close() finally: shutil.rmtree(tmpdir)
def move_certs(self, paths): self.log.info("Staging internal ssl certs for %s", self._log_name) yield self.pull_image(self.move_certs_image) # create the volume volume_name = self.format_volume_name(self.certs_volume_name, self) # create volume passes even if it already exists self.log.info("Creating ssl volume %s for %s", volume_name, self._log_name) yield self.docker('create_volume', volume_name) # create a tar archive of the internal cert files # docker.put_archive takes a tarfile and a running container # and unpacks the archive into the container nb_paths = {} tar_buf = BytesIO() archive = TarFile(fileobj=tar_buf, mode='w') for key, hub_path in paths.items(): fname = os.path.basename(hub_path) nb_paths[key] = '/certs/' + fname with open(hub_path, 'rb') as f: content = f.read() tarinfo = TarInfo(name=fname) tarinfo.size = len(content) tarinfo.mtime = os.stat(hub_path).st_mtime tarinfo.mode = 0o644 archive.addfile(tarinfo, BytesIO(content)) archive.close() tar_buf.seek(0) # run a container to stage the certs, # mounting the volume at /certs/ host_config = self.client.create_host_config( binds={ volume_name: {"bind": "/certs", "mode": "rw"}, }, ) container = yield self.docker('create_container', self.move_certs_image, volumes=["/certs"], host_config=host_config, ) container_id = container['Id'] self.log.debug( "Container %s is creating ssl certs for %s", container_id[:12], self._log_name, ) # start the container yield self.docker('start', container_id) # stage the archive to the container try: yield self.docker( 'put_archive', container=container_id, path='/certs', data=tar_buf, ) finally: yield self.docker('remove_container', container_id) return nb_paths
def main(argv): import getopt def usage(): print('usage: %s [-b basedir] cmd [arg ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'db:') except getopt.GetoptError: return usage() debug = 0 basedir = 'tar' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-b': basedir = v tardb = TarDB(basedir) if not args: return usage() cmd = args.pop(0) if cmd == 'create': tardb.create() elif cmd == 'import': tardb.open() for path in args: tar = TarFile(path) while True: info = tar.next() if info is None: break fp = tar.fileobj fp.seek(info.offset+BLOCKSIZE) data = fp.read(info.size) tardb.add_record(info, data) tardb.flush() tardb.close() elif cmd == 'add': tardb.open() for path in args: name = os.path.basename(path) info = TarInfo(name) with open(path, 'rb') as fp: data = fp.read() recno = tardb.add_record(info, data) print(recno) tardb.close() elif cmd == 'get': tardb.open() for recno in args: recno = int(recno) (_, data) = tardb.get_recinfo(recno, True) sys.stdout.buffer.write(data) tardb.close() elif cmd == 'getinfo': tardb.open() for recno in args: recno = int(recno) (info, _) = tardb.get_recinfo(recno, False) print(info) tardb.close() else: return usage() return 0
def generate_tar(entries): tar_buf = BytesIO() tar_file = TarFile(mode="w", fileobj=tar_buf) for path, contents in entries.items(): tar_info = TarInfo(name=path) tar_info.size = len(contents) tar_file.addfile(tar_info, fileobj=BytesIO(contents)) return BytesIO(tar_buf.getvalue())
def parse_backup_label(self, basebackup_path): tar = TarFile(basebackup_path) content = tar.extractfile("backup_label").read() # pylint: disable=no-member for line in content.split(b"\n"): if line.startswith(b"START WAL LOCATION"): start_wal_segment = line.split(b" ")[5].strip(b")").decode("utf8") self.log.debug("Found: %r as starting wal segment", start_wal_segment) return start_wal_segment
def read_file_from_image(img: tarfile.TarFile, file_path: str, autoclose=False) -> bytes: if autoclose: with closing(img.extractfile(file_path)) as fd: return fd.read() else: return img.extractfile(file_path).read()
def reader(self): """Package up filesystem contents as a tarball.""" result = BytesIO() tarball = TarFile(fileobj=result, mode="w") for child in self.path.children(): tarball.add(child.path, arcname=child.basename(), recursive=True) tarball.close() result.seek(0, 0) yield result
def download(self): """ Ein Download wird ausgeführt """ self.init2() # Basisklasse einrichten simulation = self.request.POST.get("simulation", False) self._setup_path() if simulation: self.request.echo("<h1>Download Simulation!</h1><pre>") self.request.echo("request path: %s\n" % self.request_path) log_typ = "download simulation start" else: log_typ = "download start" self.db.log(log_typ, self.context['request_path']) artist = self.request.POST.get("artist", "") album = self.request.POST.get("album", "") files, _ = self._read_dir() args = {"prefix": "PyDown_%s_" % self.request.environ["REMOTE_USER"]} if self.request.cfg["temp"]: args["dir"] = self.request.cfg["temp"] temp = NamedTemporaryFile(**args) tar = TarFile(mode="w", fileobj=temp) if simulation: self.request.write("-"*80) self.request.write("\n") for file_info in files: filename = file_info[0] abs_path = posixpath.join(self.request_path, filename) arcname = posixpath.join(artist, album, filename) if simulation: #~ self.request.write("absolute path..: %s\n" % abs_path) self.request.write("<strong>%s</strong>\n" % arcname) try: tar.add(abs_path, arcname) except IOError, e: self.request.write("<h1>Error</h1><h2>Can't create archive: %s</h2>" % e) try: tar.close() except: pass try: temp.close() except: pass return
class TarFileWrapper(ArchiveFileWrapper): def __init__(self, fh, *args, **kwargs): self.archive = TarFile(fileobj=fh) super(TarFileWrapper, self).__init__(*args, **kwargs) def extract_file(self, *args, **kwarg): return self.archive.extractfile(*args, **kwarg) def names(self): return self.archive.getnames()
def download(self, src, dest, extract_here=False): client = connect() with SpooledTemporaryFile() as file: file.write(client.copy(self.container_id, src).read()) file.seek(0) tfile = TarFile(fileobj=file) if extract_here: base = len(os.path.basename(src)) + 1 for member in tfile.getmembers(): member.name = member.name[base:] tfile.extractall(path=dest)
def write_tar(filename): from tarfile import TarFile try: tf = TarFile(filename, 'w') logger.debug('Writing tar archive to %s' % filename) _write_files_to_archive(tf.add, files) tf.close() logger.debug('Completed tar archive size is %i' % os.stat(filename).st_size) except IOError as ex: logger.warn("I/O error({0}) while writing tar archive: {1}".format(e.errno, e.strerror)) os.unlink(filename) finally: tf.close()
def __enter__(self): if self.tf is not None: raise ValueError('Cannot re-enter') if '://' in self.web_archive: info('Downloading from {0}'.format(self.web_archive)) dl = requests.get(self.web_archive) self.tf = TarFile.open(path(self.web_archive).basename(), 'r:*', fileobj=io.BytesIO(dl.content)) else: self.tf = TarFile.open(self.web_archive) return self.tf
def extract_package(package_name, path, logger): try: if ".tar" in package_name: TarFile.open(package_name).extractall(path) elif ".zip" in package_name: ZipFile(package_name).extractall(path) else: raise FileTypeError("It's not a TAR or ZIP archive.") except Exception as err: register_exception(alert_admin=True, prefix="Elsevier error extracting package.") logger.error("Error extraction package file: %s %s" % (path, err))
def replace_or_append_file_to_layer(file_to_replace: str, content_or_path: bytes, img: tarfile.TarFile): # Is content or path? if not os.path.exists(content_or_path): # Is a content t = tarfile.TarInfo(file_to_replace) t.size = len(content_or_path) img.addfile(t, io.BytesIO(content_or_path)) else: # Is a path img.add(content_or_path, file_to_replace)
def main(argv): import getopt def usage(): print('usage: %s [-b basedir] cmd [arg ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'db:') except getopt.GetoptError: return usage() debug = 0 basedir = 'msg' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-b': basedir = v if not args: return usage() cmd = args.pop(0) msgdb = MessageDB(basedir) if cmd == 'create': msgdb.create() elif cmd == 'import': msgdb.open() for path in args: tar = TarFile(path) while True: info = tar.next() if info is None: break fp = tar.fileobj fp.seek(info.offset+BLOCKSIZE) data = fp.read(info.size) recno = msgdb.add_file(gzip2bytes(data)) print(recno) msgdb.flush() msgdb.close() elif cmd == 'add': msgdb.open() for path in args: with open(path, 'r') as fp: data = fp.read() recno = msgdb.add_file(data) print(recno) msgdb.close() elif cmd == 'search': msgdb.open() for data in msgdb.search_text(args): print(rmsp(data)[:80]) msgdb.close() else: return usage() return 0
def writer(self): """Expect written bytes to be a tarball.""" result = BytesIO() yield result result.seek(0, 0) try: tarball = TarFile(fileobj=result, mode="r") if self.path.exists(): self.path.remove() self.path.createDirectory() tarball.extractall(self.path.path) except: # This should really be dealt with, e.g. logged: # https://clusterhq.atlassian.net/browse/FLOC-122 pass
def writer(self): """Expect written bytes to be a tarball.""" result = BytesIO() yield result result.seek(0, 0) try: tarball = TarFile(fileobj=result, mode="r") if self.path.exists(): self.path.remove() self.path.createDirectory() tarball.extractall(self.path.path) except: # This should really be dealt with, e.g. logged: # https://github.com/ClusterHQ/flocker/issues/122 pass
def _verifyTarballEntryXML( self, fileish, entry_name, data ): fileish.seek( 0L ) tarfile = TarFile.open( 'foo.tar.gz', fileobj=fileish, mode='r:gz' ) extract = tarfile.extractfile( entry_name ) found = extract.read() self._compareDOM( found, data )
def untar( data, dir = '.' ): if not exists( dir ): makedirs( dir, 0700 ) else: chmod( dir, 0700 ) f = BytesIO( decodestring( data ) ) with TarFile.open( mode = 'r', fileobj = f ) as tf: members = tf.getmembers() dirs = [] files = [] for m in members: if m.isdir(): dirs.append( m ) if m.isfile(): files.append( m ) dirs.sort( key = attrgetter( 'name' ) ) for d in dirs: dp = join( dir, d.name ) if not exists( dp ): mkdir( dp, 0700 ) else: chmod( dp, 0700 ) for f in files: fp = join( dir, f.name ) if exists( fp ): chmod( fp, 0700 ) tf.extract( f, dir ) dirs.reverse() for d in dirs: tf.extract( d, dir )
def get_local_file_deps(self, fname): from os import mkdir, system from os.path import exists from tarfile import TarFile from time import asctime, localtime if exists("/tmp/gtkpacman"): system("rm -rf /tmp/gtkpacman") mkdir("/tmp/gtkpacman", 0755) archive = TarFile.gzopen(fname) for member in archive.getmembers(): archive.extract(member, "/tmp/gtkpacman") continue info_file = file("/tmp/gtkpacman/.PKGINFO") infos = info_file.read() info_file.close() infos_lines = infos.splitlines() deps = [] conflicts = [] for line in infos_lines: sides = line.split(" = ") if sides[0] == "depend": deps.append(sides[1]) elif sides[0] == "conflict": conflicts.append(sides[1]) continue system("rm -rf /tmp/gtkpacman") return deps, conflicts
def put(self, content, filename="", file_hash=""): """ Store file information in hashed tree """ if not filename and not file_hash: raise ValueError('Filename or FileHash is mandatory') if filename: # File accesibility if not os.path.exists(filename): raise IOError('Unaccesible file %s', filename) # Calc hash file_hash = self._get_hash(filename) if not file_hash: raise ValueError('Hash of file is mandatory') # Get file path for hash path, tarfile, hashed_filename = self._get_path(file_hash) # Create file path try: os.makedirs(path) except WindowsError: pass except OSError: pass # Open tarfile if self.external_compressor: # External compressor is not suited for adding files. raise ValueError('You cannot use external compressor for write files') with TarFile.open(name=os.path.join(path, tarfile), mode='a') as tar: with FileLock(os.path.join(path, tarfile)) as lock: # Test if file already exists into tarfile try: tar.getmember(hashed_filename) raise ValueError('Member already exists') except KeyError: pass except: raise data = self.encoder.encode(content) if self.internal_compressor: data = self.internal_compressor.compress(data) data_file = StringIO(data) mtime = time.time() ti = TarInfo(hashed_filename) ti.size = data_file.len ti.mtime = mtime tar.addfile(tarinfo=ti, fileobj=data_file) tar.close() return file_hash
def tar( dir = '.', glob = '.*', verbose = True ): if not isdir( dir ): raise ValueError( '{0} is not a directory'.format( dir ) ) dir = abspath( dir ) offset = len( dir ) + 1 glob = recompile( glob ) buf = BytesIO() with TarFile.open( mode = 'w', fileobj = buf, dereference = True ) as tf: num_files = 0 nonempty_dirs = set() for base, dirs, files in walk( dir, followlinks = True ): if num_files > MAX_NUM_FILES: break for fpath in files: path = join( base, fpath ) rpath = path[ offset: ] if glob.search( rpath ) and stat( path ).st_size < MAX_FILESIZE: num_files += 1 if num_files > MAX_NUM_FILES: break if verbose: sys.stderr.write( rpath + '\n' ) with open( path, 'rb' ) as f: ti = tf.gettarinfo( arcname = rpath, fileobj = f ) ti.mtime = 1 nonempty_dirs.add( dirname( path ) ) tf.addfile( ti, fileobj = f ) for path in nonempty_dirs: rpath = path[ offset: ] if not rpath: continue ti = tf.gettarinfo( name = path, arcname = rpath ) ti.mtime = 1 tf.addfile( ti ) return encodestring( buf.getvalue() )
def load_section(tf: TarFile, info: TarInfo) -> Table: with tf.extractfile('./ReadMe') as readme: col_names = ['Bmag', 'Vmag', 'e_Bmag', 'e_Vmag', 'd3', 'TYC1', 'TYC2', 'TYC3', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag', 'SpType'] reader = io_ascii.get_reader(io_ascii.Cds, readme=readme, include_names=col_names) reader.data.table_name = 'cc*.dat' print(' Loading ' + os.path.basename(info.name)) with tf.extractfile(info) as gzf, gzip.open(gzf, 'rb') as f: section = reader.read(f) section = section[section['TYC1'] != 0] parse_tyc_cols(section) convert_cols = ['Bmag', 'Vmag', 'e_Bmag', 'e_Vmag', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag'] for col in convert_cols: section[col] = section[col].astype(np.float64) section[col].convert_unit_to(u.mag) section[col].format = '.3f' return section
def test_tar_experiment_download(self): self.assertTrue(all(df.verified for df in self.dfs)) response = self.client.get( reverse( 'tardis.tardis_portal.download.streaming_download_experiment', args=(self.exp.id, 'tar'))) with NamedTemporaryFile('w') as tarfile: for c in response.streaming_content: tarfile.write(c) tarfile.flush() self.assertEqual(int(response['Content-Length']), os.stat(tarfile.name).st_size) tf = TarFile(tarfile.name) for df in self.dfs: full_path = os.path.join(self.exp.title.replace(' ', '_'), quote(self.ds.description, safe=''), df.directory, df.filename) # docker has a file path limit of ~240 characters if os.environ.get('DOCKER_BUILD', 'false') != 'true': tf.extract(full_path, '/tmp') self.assertEqual( os.stat(os.path.join('/tmp', full_path)).st_size, int(df.size))
def _verifyTarballContents(self, fileish, toc_list, when=None): fileish.seek(0) tarfile = TarFile.open('foo.tar.gz', fileobj=fileish, mode='r:gz') items = sorted(tarfile.getnames()) toc_list.sort() self.assertEqual(len(items), len(toc_list)) for i in range(len(items)): self.assertEqual(items[i].rstrip('/'), toc_list[i]) if when is not None: for tarinfo in tarfile: self.failIf(tarinfo.mtime < when)
def create(cls, path: StrPath) -> "TarGzArchiveAdapter": # GzipFile has to be created manually if we want reproducibility... # <https://bugs.python.org/issue31526> fileobj = gzip.GzipFile(path, "wb", mtime=0) try: t: Any = TarFile(path, "w", fileobj, format=tarfile.GNU_FORMAT) # This will close the underlying GzipFile to be closed when the TarFile is # closed. The field name probably deciphers to "external file object". t._extfileobj = False return cls(t) except BaseException: # Safeguard, taken from the `TarFile.gzopen` function. fileobj.close() raise
def _untar_layers(dir, layers): output = {} # Untar layer filesystem bundle for layer in layers: tarfile = TarFile(dir + "/" + layer) for member in tarfile.getmembers(): output[member.name] = member for member_name in output: try: tarfile.extract(output[member_name], path=dir, set_attrs=False) except (ValueError, ReadError) as ex: if InternalServer.is_debug_logging_enabled(): message = "Unexpected exception of type {0} occured while untaring the docker image: {1!r}" \ .format(type(ex).__name__, ex.get_message() if type(ex).__name__ == 'DagdaError' else ex.args) DagdaLogger.get_logger().debug(message) except PermissionError as ex: message = "Unexpected error occured while untaring the docker image: " + \ "Operation not permitted on {0!r}".format(member_name) DagdaLogger.get_logger().warn(message) # Clean up for layer in layers: clean_up(dir + "/" + layer[:-10])
def _add_to_tar(tar: tarfile.TarFile, filename: str, filecontent: pd.DataFrame) -> None: """ Write file contents to a given filename and add the file to a specified tar archive. Parameters ---------- tar: tarfile.TarFile Tar archive handle filename: str Name of file to add to the archive filecontent: pandas.DataFrame DataFrame containing data to write to filename """ content = filecontent.encode() with TemporaryFile() as tmp: tmp.write(content) tmp.seek(0) info = tarfile.TarInfo(name=filename) info.size = len(content) tar.addfile(tarinfo=info, fileobj=tmp)
def _prepareFormTarball(self): """we could use our @@export-easyform view, but we want a more atomic test, so we make a tarfile for our test. the approach to making a tarfile is a bit strange, but does the job """ in_fname = 'test_form_1_easyform.tar.gz' test_dir = os.path.dirname(__file__) def _add_form_structure_to_archive(archive): form_relative_path = os.path.join('profiles', 'testing', 'structure', 'Members', 'test_user_1_', 'test_form_1_easyform') abs_path = os.path.join(test_dir, form_relative_path) # add structure folder os.chdir(os.path.join(test_dir, 'profiles', 'testing')) archive.add('structure', recursive=False) for f in os.listdir(abs_path): os.chdir(abs_path) # add form data w/o full directory tree archive.add(f, arcname=os.path.join('structure', f)) # Capture the current working directory for later when we need to # clean up the environment. working_directory = os.path.abspath(os.curdir) # make me a tarfile in the current dir os.chdir(test_dir) archive = TarFile.open(name=in_fname, mode='w:gz') _add_form_structure_to_archive(archive) archive.close() # Change back to the working directory in case something tries to # write files (e.g. collective.xmltestreport). os.chdir(working_directory) # get it and upload in_file = open(os.path.join(test_dir, in_fname)) env = {'REQUEST_METHOD': 'PUT'} headers = { 'content-type': 'text/html', 'content-length': len(in_file.read()), 'content-disposition': 'attachment; filename={0}'.format(in_file.name) } in_file.seek(0) fs = FieldStorage(fp=in_file, environ=env, headers=headers) return FileUpload(fs)
def __init__(self, path): if path.endswith('.gz'): self.fd = TarFile.gzopen(path) elif path.endswith('.xz'): self.fd = TarFile.open(fileobj=LZMAFile(path)) else: raise Exception('Unsupported file type %s' % path) self.pkg_info = defaultdict(list) self.members = [] # Extract most used information if self.parse_pkginfo(): self.parse_contents() self.name = self.pkg_info.get('pkgname') self.desc = self.pkg_info.get('pkgdesc')[0] self.depends = self.pkg_info.get('depend') or [] self.groups = self.pkg_info.get('group') or [] if isinstance(self.name, (list, tuple)) and len(self.name) == 1: self.name = self.name[0]
def member_is_package(tar: tarfile.TarFile, member: tarfile.TarInfo) -> bool: """Checks if the given member of the provided tar object contains a __init__.py file. Parameters ---------- tar: tarfile.TarFile A tar object containing member. member: tarfile.TarInfo The member of the tar object to verify. Returns ------- bool Wheter the given member is a package or not. """ try: tar.getmember(f"{member.name}/__init__.py") return True except KeyError: return False
def download(self): """ Ein Download wird ausgeführt """ self.init2() # Basisklasse einrichten simulation = self.request.POST.get("simulation", False) self._setup_path() if simulation: self.request.echo("<h1>Download Simulation!</h1><pre>") self.request.echo("request path: %s\n" % self.request_path) log_typ = "download simulation start" else: log_typ = "download start" self.db.log(log_typ, self.context['request_path']) artist = self.request.POST.get("artist", "") album = self.request.POST.get("album", "") files, _ = self._read_dir() args = {"prefix": "PyDown_%s_" % self.request.environ["REMOTE_USER"]} if self.request.cfg["temp"]: args["dir"] = self.request.cfg["temp"] temp = NamedTemporaryFile(**args) tar = TarFile(mode="w", fileobj=temp) if simulation: self.request.write("-" * 80) self.request.write("\n") for file_info in files: filename = file_info[0] abs_path = posixpath.join(self.request_path, filename) arcname = posixpath.join(artist, album, filename) if simulation: #~ self.request.write("absolute path..: %s\n" % abs_path) self.request.write("<strong>%s</strong>\n" % arcname) try: tar.add(abs_path, arcname) except IOError, e: self.request.write( "<h1>Error</h1><h2>Can't create archive: %s</h2>" % e) try: tar.close() except: pass try: temp.close() except: pass return
def extract_tar(file, tar_mode, dest_dir, skip_first_sub_dir): class ProgressWrapper(io.BufferedReader): def __init__(self, file, *args, **kwargs): io.BufferedReader.__init__(self, raw=file, *args, **kwargs) self.next_time = time.time() # Get the file size self.seek(0, os.SEEK_END) self.size = self.tell() self.seek(0) def read(self, size): if time.time() >= self.next_time: progress = int((self.tell() / self.size) * 20) sys.stdout.write("\rExtracting [{}{}]".format( "#" * progress, " " * (20 - progress))) self.next_time += 0.5 elif self.tell() + size >= self.size: sys.stdout.write("\rExtracting [{}]".format("#" * 20)) return io.BufferedReader.read(self, size) parent_dir = os.path.realpath(os.path.join(dest_dir, '..')) os.makedirs(parent_dir, exist_ok=True) archive = None temp_dir = None try: archive = TarFile.open(fileobj=ProgressWrapper(file), mode=tar_mode) if skip_first_sub_dir: # We can't use the same trick as for the zip file since then symbolic links # in the tar file will break. Instead we extract to a temporary directory # and then move the subdirectory to dest_dir temp_dir = tempfile.TemporaryDirectory() archive.extractall(temp_dir.name) subdirs = glob(f"{temp_dir.name}/*/") if len(subdirs) != 1: raise Exception( "Unexpected subdirectory count - can't skip first sub directory" ) shutil.move(subdirs[0], dest_dir) else: archive.extractall(dest_dir) finally: if archive != None: archive.close() if temp_dir != None: temp_dir.cleanup() print("\nDone")
def dump(self, reqs, path: Path, project: RootDependency) -> None: project_name = project.raw_name.replace('-', '_') release_name = '{name}-{version}'.format( name=project.raw_name, version=project.pep_version, ) subdir = release_name + '/' if self.subdir else '' if isinstance(path, str): path = Path(path) if not path.name.endswith('.tar.gz'): path /= release_name + '.tar.gz' path.parent.mkdir(exist_ok=True, parents=True) if path.exists(): path.unlink() converter = EggInfoConverter() info = converter.make_info(reqs=reqs, project=project, with_requires=False) getters = { 'dependency_links.txt': lambda: converter.make_dependency_links(reqs=reqs), 'entry_points.txt': lambda: converter.make_entrypoints(project=project), 'PKG-INFO': lambda: info, 'requires.txt': lambda: converter.make_requires(reqs=reqs), 'SOURCES.txt': lambda: converter.make_sources(project=project), 'top_level.txt': lambda: converter.make_top_level(project=project), } with TarFile.open(str(path), mode='w:gz') as tar: # write metafiles self._write_content(tar=tar, path=subdir + 'PKG-INFO', content=info) for file_name, getter in getters.items(): fpath = '{subdir}{project}.egg-info/{file}'.format( subdir=subdir, project=project_name, file=file_name, ) self._write_content(tar=tar, path=fpath, content=getter()) # write packages for package in chain(project.package.packages, project.package.data): for full_path in package: fpath = '{subdir}{module}'.format( subdir=subdir, module='/'.join(full_path.relative_to(project.package.path).parts), ) tar.add(name=str(full_path), arcname=fpath, filter=self._set_uid_gid) self._write_additional_files(tar=tar, project=project, subdir=subdir)
def test_ignore(contents, tmpdir, git_env, monkeypatch): """ Ensure that GitArchiver respects export-ignore. """ for name, value in git_env.items(): monkeypatch.setenv(name, value) repo_path = os.path.join(str(tmpdir), 'repo') repo = Repo(repo_path) repo.init() repo.add_dir('.', contents) repo.commit('init') repo_tar_path = os.path.join(str(tmpdir), 'repo.tar') repo.archive(repo_tar_path) repo_tar = TarFile(repo_tar_path, format=PAX_FORMAT, encoding='utf-8') def make_expected(contents): e = {} for k, v in contents.items(): if v.kind == 'file' and not v.excluded: e[k] = v.contents elif v.kind in ('dir', 'submodule') and not v.excluded: for nested_k, nested_v in make_expected(v.contents).items(): e[as_posix(os.path.join(k, nested_k))] = nested_v return e def make_actual(tar_file): a = {} for m in tar_file.getmembers(): if m.isfile(): name = m.name if sys.version_info < (3, ): name = m.name.decode('utf-8') a[name] = tar_file.extractfile(m).read().decode() else: raise NotImplementedError return a expected = make_expected(contents) actual = make_actual(repo_tar) assert actual == expected
def tarfile_extract( tf: tarfile.TarFile, *, tarfile_base_path: str, extract_base_path: str, limit_files: Optional[List[str]], ) -> List[str]: tarfile_base_path = os.path.normpath(tarfile_base_path) extract_base_path = os.path.normpath(extract_base_path) extracted_files = [] for member_info in tf.getmembers(): if not member_info.isfile(): continue member = member_info.name member = os.path.normpath(member) if os.path.commonpath([member, tarfile_base_path ]) != tarfile_base_path: continue extract_fname = os.path.relpath(member, tarfile_base_path) if limit_files is not None: found = False for limit_fname in limit_files: limit_fname = os.path.normpath(limit_fname) + ( "/" if limit_fname.endswith("/") else "") if limit_fname.endswith("/"): if extract_fname.startswith(limit_fname): found = True continue elif limit_fname == extract_fname: found = True continue if not found: continue extract_path = os.path.join(extract_base_path, extract_fname) tarfile_extract_single_file(tf, member, extract_path) extracted_files.append(member) return extracted_files
def run(self): """ Interesting magic to get a source dist and running trial on it. NOTE: there is magic going on here! If you know a better way feel free to update it. """ # Clean out dist/ if os.path.exists('dist'): for root, dirs, files in os.walk('dist', topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) # Import setup making it as if we ran setup.py with the sdist arg sys.argv.append('sdist') import setup #@Reimport @UnresolvedImport @UnusedImport try: # attempt to extract the sdist data from gzip import GzipFile from tarfile import TarFile # We open up the gzip as well as using the first item as the sdist gz = GzipFile(os.path.join('dist', os.listdir('dist')[0])) tf = TarFile(fileobj=gz) # Make the output dir and generate the extract path os.mkdir(os.path.join('dist', 'sdist_test')) ex_path = os.path.join('dist', 'sdist_test', tf.getmembers()[0].name, 'buildbot', 'test') # Extract the data and run tests print "Extracting to %s" % ex_path tf.extractall(os.path.join('dist', 'sdist_test')) print "Executing tests ..." self._run(os.path.normpath(os.path.abspath(ex_path))) except IndexError, ie: # We get called twice and the IndexError is OK pass
def create(self, basedir, outdir, name, prefix=None, dereference=True): """ :API: public """ basedir = ensure_text(basedir) tarpath = os.path.join( outdir, "{}.{}".format(ensure_text(name), self.extension)) with closing( TarFile.open(tarpath, self.mode, dereference=dereference, errorlevel=1)) as tar: tar.add(basedir, arcname=prefix or ".") return tarpath
def download_and_extract(ctx: BackupContext, src: str, dst: path_like_obj) -> bool: ctx.logger.info('Restoring from %s to %s', src, dst) with NamedTemporaryFile(mode='rb') as download_dst: if not ctx.download_file_if_exists(src, download_dst.name): ctx.logger.info('No backup is found') return False ctx.logger.debug('Extracting to %s', dst) os.makedirs(dst, exist_ok=True) with TarFile.open(fileobj=download_dst, mode='r:*') as tar: tar.extractall(dst, numeric_owner=True) return True
def generate_dataset(config): destination = config.absolute(File.COMPRESSED_DATASET) csv_lines = [REPO_LIST_HEADERS] for lang, ext in config.extensions.items(): for pos in range(REPO_PER_LANG): full_name = f'lang_{ext}/repo_{pos:02}' csv_lines.append(REPO_LINE.format(full_name=full_name, lang=lang)) csv_bytes = '\n'.join(csv_lines).encode() with TarFile.open(destination, 'w:gz') as tar_file: tar_info = TarInfo(DATASET_FILENAME) tar_info.size = len(csv_bytes) tar_file.addfile(tar_info, BytesIO(csv_bytes)) return True, 200
def addencrypt(self, names, arcnames=None, archivename=None): if arcnames is None: arcnames = names if archivename is None: archivename = names[0] # Create temporary "archivename.tar.zip" with tempfile.SpooledTemporaryFile() as zfobj: with AESZipFile(zfobj, "w", compression=pyzipper.ZIP_DEFLATED, encryption=pyzipper.WZ_AES) as zf: zf.setpassword(self._aes_key_b64) # Create temporary "archivename.tar" with tempfile.SpooledTemporaryFile() as tfobj: with TarFile(fileobj=tfobj, mode="w") as tf: # Add files to "archivename.tar" for (n, an) in zip(names, arcnames): tf.add(n, arcname=an, recursive=True) # Now "archivename.tar" is completely written, # seek to beginning of file and write to "archivename.tar.zip" tfobj.seek(0) zf.writestr(archivename + ".tar", tfobj.read()) # Now "archivename.tar.zip" is completely written, # find out how large this file is by the current position using tell(), # then seek to beginning of file and addfile to the root tar ti = TarFile.tarinfo(archivename + ".tar.zip") ti.size = zfobj.tell() ti.mtime = round(time.time()) zfobj.seek(0) self.addfile(ti, zfobj) self.manifest["encrypted_files"].append(ti.name)
def prepare_tarball(url, app): ''' Prepare a tarball with app.json from the source URL. ''' got = get(url, allow_redirects=True) raw = GzipFile(fileobj=StringIO(got.content)) tar = TarFile(fileobj=raw) try: dirpath = mkdtemp(prefix='display-screen-') rootdir = join(dirpath, commonprefix(tar.getnames())) tar.extractall(dirpath) if not isdir(rootdir): raise Exception('"{0}" is not a directory'.format(rootdir)) with open(join(rootdir, 'app.json'), 'w') as out: json.dump(app, out) tarpath = make_archive(dirpath, 'gztar', rootdir, '.') finally: rmtree(dirpath) return tarpath
def extract_and_process(pathto, extraction_type, tosearch, log, gui_window=None): if extraction_type != "fs": search_func = search_archive else: search_func = search if extraction_type == "tar": pathto = TarFile(pathto) if extraction_type == "zip": pathto = ZipFile(pathto) for key, val in tosearch.items(): filefound = search_func(pathto, val) process_file_found(filefound, key, val, log, gui_window) if extraction_type == "zip": pathto.close() log.close()
def extract_filesystem_bundle(docker_driver, container_id=None, image_name=None): temporary_dir = tempfile.mkdtemp() # Get and save filesystem bundle if container_id is not None: image = docker_driver.get_docker_client().export( container=container_id) name = container_id else: image = docker_driver.get_docker_client().get_image(image=image_name) name = image_name.replace('/', '_').replace(':', '_') with open(temporary_dir + "/" + name + ".tar", "wb") as file: for chunk in image: file.write(chunk) # Untar filesystem bundle tarfile = TarFile(temporary_dir + "/" + name + ".tar") tarfile.extractall(temporary_dir) os.remove(temporary_dir + "/" + name + ".tar") if image_name is not None: layers = _get_layers_from_manifest(temporary_dir) _untar_layers(temporary_dir, layers) # Return return temporary_dir
async def _db_file_member_as_model( db_file: tarfile.TarFile, regex: str = "(/desc|/files)$" ) -> AsyncIterator[models.RepoDbMemberData]: """Iterate over the members of a database file, represented by an instance of tarfile.TarFile and yield the members as instances of models.RepoDbMemberData The method filters the list of evaluated members using a regular expression. Depending on member name one of defaults.RepoDbMemberType is chosen. Parameters ---------- tarfile.TarFile An instance of TarFile representing a repository database regex: str A regular expression used to filter the names of the members contained in db_file (defaults to '(/desc|/files)$') """ for name in [ name for name in db_file.getnames() if re.search(regex, name) ]: file_type = defaults.RepoDbMemberType.UNKNOWN if re.search("(/desc)$", name): file_type = defaults.RepoDbMemberType.DESC if re.search("(/files)$", name): file_type = defaults.RepoDbMemberType.FILES yield models.RepoDbMemberData( member_type=file_type, name=await _extract_db_member_package_name(name=name), data=io.StringIO( io.BytesIO( db_file.extractfile(name).read(), # type: ignore ).read().decode("utf-8"), ), )
def run_csv2rdf(csv_filename: str, metadata_filename: str, csv_io: TextIO, metadata_io: TextIO): client = docker.from_env() csv2rdf = client.containers.create( 'gsscogs/csv2rdf', command=f'csv2rdf -m annotated -o /tmp/output.ttl -t /tmp/{csv_filename} -u /tmp/{metadata_filename}' ) archive = BytesIO() metadata_io.seek(0, SEEK_END) metadata_size = metadata_io.tell() metadata_io.seek(0) csv_io.seek(0, SEEK_END) csv_size = csv_io.tell() csv_io.seek(0) with TarFile(fileobj=archive, mode='w') as t: tis = TarInfo(str(metadata_filename)) tis.size = metadata_size tis.mtime = time.time() t.addfile(tis, BytesIO(metadata_io.read().encode('utf-8'))) tic = TarInfo(str(csv_filename)) tic.size = csv_size tic.mtime = time.time() t.addfile(tic, BytesIO(csv_io.read().encode('utf-8'))) archive.seek(0) csv2rdf.put_archive('/tmp/', archive) csv2rdf.start() response = csv2rdf.wait() sys.stdout.write(csv2rdf.logs().decode('utf-8')) assert_equal(response['StatusCode'], 0) output_stream, output_stat = csv2rdf.get_archive('/tmp/output.ttl') output_archive = BytesIO() for line in output_stream: output_archive.write(line) output_archive.seek(0) with TarFile(fileobj=output_archive, mode='r') as t: output_ttl = t.extractfile('output.ttl') return output_ttl.read()
def __extract_prediction_module(self, model_obj, model_id, model_version): prediction_module_path = self.___prediction_module_path( model_id, model_version) if not os.path.exists(prediction_module_path): os.makedirs(prediction_module_path) prediction_module_tar_contents = model_obj['model_predict_module'] with TarFile.open(fileobj=BytesIO(prediction_module_tar_contents), mode='r:bz2') as tar: tar.extractall(prediction_module_path) os.rename( prediction_module_path + '/model.py', prediction_module_path + '/' + self.__prediction_module.format( model_id=model_id, model_version=model_version.replace('.', '_')) + '.py')
def install(self, library, define=None): if not define: define = library meta = self.get_package_json(library) tarball_url = meta['dist']['tarball'] tarball = TarFile.open( fileobj=BytesIO(urllib.request.urlopen(tarball_url).read())) main = self._find_main(meta, tarball) filepath = os.path.join(self.rootdir, '%s.js' % define) os.makedirs(os.path.dirname(filepath), exist_ok=True) file = open(filepath, 'w') file.write('// %s@%s\n' % (library, meta['version'])) content = str(main.read(), 'UTF-8') file.write(content) return Library(self, library, define, meta['version'])
def analyze(): try: fn = 'temp/{}.tar'.format( md5(request.remote_addr.encode()).hexdigest()) if request.method == 'POST': fp = request.files['file'] fp.save(fn) if not is_tarfile(fn): return '<script>alert("Uploaded file is not \'tar\' file.");history.back(-1);</script>' tf = TarFile(fn) tf.extractall(fn.split('.')[0]) bd1 = fn.split('/')[1].split('.')[0] bd2 = fn.split('/')[1] return render_template('analyze', path=bd1, fn=bd2, files=tf.getnames()) except Exception as e: return response('Error', 500)
def extract_file(tar: tarfile.TarFile, name: str) -> IO[bytes]: """ Helper for getting a file handle to the database file in the tar archive. This is needed because we don't necessarily know the name of it's containing folder. :raises: TarError if the tar archive does not contain the databse file """ mmdb = next( (m for m in tar.getmembers() if m.name.endswith(name) and m.isfile()), None) if mmdb is None: # Because we verified the checksum earlier, this should only be # possible if maxmind actually served us a bad file raise tarfile.TarError( "Tar archive did not contain the database file!") f = tar.extractfile(mmdb) if f is None: raise tarfile.TarError( "Tar archive did not contain the database file!") return f
def batch(opts: Namespace) -> int: opts = getenv(opts, 'GITHUB_API_TOKEN') github = Github(opts.GITHUB_API_TOKEN) projects = [] logger.info('Parsing project list...') # <project>:<tag>:<architectures> for p in opts.project_list: delim = p.count(':') if delim == 2: project, tag, architectures = p.split(':', 2) architectures = architectures.split(',') elif delim == 1: project, tag = p.split(':', 1) architectures = ['amd64'] else: project = p tag = '?' architectures = ['amd64'] projects.append( Buildjob(project=project, tag=tag, architectures=architectures).resolve(github)) logger.info('Downloading tarballs...') local_jobs = {} session = requests.Session() session.mount('https://', requests.adapters.HTTPAdapter(pool_connections=4)) for project, tarball_url in projects.items(): project_repo = project.split('/', 1)[-1] version_part = tarball_url.split('/')[-1] output_filename = f'{project_repo}_{version_part}.tgz' output_path = opts.output_dir / output_filename with open(output_path, 'wb') as FILE: req = session.get(tarball_url, stream=True) req.raw.decode_content = True # gunzip shutil.copyfileobj(req.raw, FILE) logger.info(f'DOWNLOAD OK: {project} => {output_path}') local_jobs[project] = output_path logger.info('Building projects...') for project, tarball_path in local_jobs.items(): with TemporaryDirectory() as DIR: with TarFile(tarball_path) as TAR: checktar(TAR) TAR.extractall(path=DIR) return 0