def decompress_file(archive, dir_): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str """ with swallow_outputs() as cmo: archive = ensure_bytes(archive) dir_ = ensure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) # should be supplied in PY3 to avoid b'' outdir = ensure_unicode(outdir) archive = ensure_unicode(archive) format_compression = patoolib.get_archive_format(archive) if format_compression == ('gzip', None): # Yarik fell into the trap of being lazy and not providing proper # support for .gz .xz etc "stream archivers" formats in handling # of archives. ATM out support for .gz relies on behavior of 7z while # extracting them and respecting possibly present .gz filename # header field. # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861 # TODO: provide proper handling of all those archives without # relying on any filename been stored in the header program = patoolib.find_archive_program(format_compression[0], 'extract') if basename(program) != '7z': raise MissingExternalDependency( "cmd:7z", msg="(Not) Funny enough but ATM we need p7zip installation " "to handle .gz files extraction 'correctly'") patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s", cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s", cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC)
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path patoolib._extract_archive(unixify_path(archive), outdir=unixify_path(dir_), verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) os.rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path patoolib._extract_archive(unixify_path(archive), outdir=unixify_path(dir_), verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) os.rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path patoolib._extract_archive(unixify_path(archive), outdir=unixify_path(dir_), verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) os.rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: archive = assure_bytes(archive) dir_ = assure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) if not PY2: # should be supplied in PY3 to avoid b'' outdir = assure_unicode(outdir) archive = assure_unicode(archive) format_compression = patoolib.get_archive_format(archive) if format_compression == ('gzip', None): # Yarik fell into the trap of being lazy and not providing proper # support for .gz .xz etc "stream archivers" formats in handling # of archives. ATM out support for .gz relies on behavior of 7z while # extracting them and respecting possibly present .gz filename # header field. # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861 # TODO: provide proper handling of all those archives without # relying on any filename been stored in the header program = patoolib.find_archive_program(format_compression[0], 'extract') if basename(program) != '7z': raise MissingExternalDependency( "cmd:7z", msg="(Not) Funny enough but ATM we need p7zip installation " "to handle .gz files extraction 'correctly'") patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: archive = assure_bytes(archive) dir_ = assure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) if not PY2: # should be supplied in PY3 to avoid b'' outdir = assure_unicode(outdir) archive = assure_unicode(archive) format_compression = patoolib.get_archive_format(archive) if format_compression == ('gzip', None): # Yarik fell into the trap of being lazy and not providing proper # support for .gz .xz etc "stream archivers" formats in handling # of archives. ATM out support for .gz relies on behavior of 7z while # extracting them and respecting possibly present .gz filename # header field. # See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861 # TODO: provide proper handling of all those archives without # relying on any filename been stored in the header program = patoolib.find_archive_program( format_compression[0], 'extract') if basename(program) != '7z': raise MissingExternalDependency( "cmd:7z", msg="(Not) Funny enough but ATM we need p7zip installation " "to handle .gz files extraction 'correctly'" ) patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)
def prepare(self): assert (self.state == TransactionState.EXPANDED) # Preload all the fetches into a set of files to get, by doing so we # can have a global download progress bar. fetches = set() for t in self.installs: print("Collecting sources from {}".format(t.name)) for s in t.sources: fetches.add((s.uri, s.filename)) # Actually do the downloads files = self.downloader.fetch(fetches) # Lets extract the archives into hashed folder names as well. This will # require us to translate the paths coming out of the scripts, but so # be it for uri, (archive_path, org_filename) in files.items(): if uri in self.source_map: continue with self.source_map.atomic_add(uri) as unpack_dir: print("Extracting " "{Style.BRIGHT}{Fore.MAGENTA}{}{Style.RESET_ALL}".format( uri, Style=Style, Fore=Fore)) patoolib.util.check_existing_filename(archive_path) mime, encoding = patoolib.util.guess_mime_mimedb(org_filename) if mime in patoolib.ArchiveMimetypes: format_ = patoolib.ArchiveMimetypes[mime] if format_ == encoding: encoding = None patoolib._extract_archive( archive_path, outdir=unpack_dir, interactive=False, verbosity=-1, format=format_, compression=encoding, ) # Populate the package_files list with the operations to complete for t in self.installs: vfs = VirtualFS() # Create the source name lookup table for s in t.sources: filename_noext = s.get_name() p = self.source_map.get(s.uri) vfs.remap(filename_noext, p) t.package(vfs) # @COMPLETE package_files should probably be checked for problems # BEFORE we allow a commit # @COMPLETE We should check if there's space on the disk for a copy of # package_files before we allow a commit. Currently a lack of space # breaks a package install. This can cause various issues. # @COMPLETE we should check if we can remove the packages first if it's # an upgrade self.state = TransactionState.PREPARED
def decompress_file(archive, dir_, leading_directories='strip'): """Decompress `archive` into a directory `dir_` Parameters ---------- archive: str dir_: str leading_directories: {'strip', None} If `strip`, and archive contains a single leading directory under which all content is stored, all the content will be moved one directory up and that leading directory will be removed. """ if not exists(dir_): lgr.debug("Creating directory %s to extract archive into" % dir_) os.makedirs(dir_) with swallow_outputs() as cmo: archive = assure_bytes(archive) dir_ = assure_bytes(dir_) patoolib.util.check_existing_filename(archive) patoolib.util.check_existing_filename(dir_, onlyfiles=False) # Call protected one to avoid the checks on existence on unixified path outdir = unixify_path(dir_) if not PY2: # should be supplied in PY3 to avoid b'' outdir = assure_unicode(outdir) archive = assure_unicode(archive) patoolib._extract_archive(unixify_path(archive), outdir=outdir, verbosity=100) if cmo.out: lgr.debug("patool gave stdout:\n%s" % cmo.out) if cmo.err: lgr.debug("patool gave stderr:\n%s" % cmo.err) # Note: (ben) Experienced issue, where extracted tarball # lacked execution bit of directories, leading to not being # able to delete them while having write permission. # Can't imagine a situation, where we would want to fail on # that kind of mess. So, to be sure set it. if not on_windows: os.chmod(dir_, os.stat(dir_).st_mode | os.path.stat.S_IEXEC) for root, dirs, files in os.walk(dir_, followlinks=False): for d in dirs: subdir = opj(root, d) os.chmod(subdir, os.stat(subdir).st_mode | os.path.stat.S_IEXEC) if leading_directories == 'strip': _, dirs, files = next(os.walk(dir_)) if not len(files) and len(dirs) == 1: # move all the content under dirs[0] up 1 level widow_dir = opj(dir_, dirs[0]) lgr.debug("Moving content within %s upstairs" % widow_dir) subdir, subdirs_, files_ = next(os.walk(opj(dir_, dirs[0]))) for f in subdirs_ + files_: os.rename(opj(subdir, f), opj(dir_, f)) rmdir(widow_dir) elif leading_directories is None: pass # really do nothing else: raise NotImplementedError("Not supported %s" % leading_directories)