def compile_inputs_outputs(runs, inputs, outputs): """Gives names to input/output files and creates InputOutputFile objects. """ # {path: (run_nb, arg_nb) or None} runs_with_file = {} # run_nb: number_of_file_arguments nb_file_args = [] # {path: [runs]} readers = {} writers = {} for run_nb, run, in_files, out_files in izip(count(), runs, inputs, outputs): # List which runs read or write each file for p in in_files: readers.setdefault(p, []).append(run_nb) for p in out_files: writers.setdefault(p, []).append(run_nb) # Locate files that appear on a run's command line files_set = set(in_files) | set(out_files) nb_files = 0 for arg_nb, arg in enumerate(run['argv']): p = Path(run['workingdir'], arg).resolve() if p in files_set: nb_files += 1 if p not in runs_with_file: runs_with_file[p] = run_nb, arg_nb elif runs_with_file[p] is not None: runs_with_file[p] = None nb_file_args.append(nb_files) file_names = {} make_unique = UniqueNames() for fi in flatten(2, (inputs, outputs)): if fi in file_names: continue # If it appears in at least one of the command-lines if fi in runs_with_file: # If it only appears once in the command-lines if runs_with_file[fi] is not None: run_nb, arg_nb = runs_with_file[fi] parts = [] # Run number, if there are more than one runs if len(runs) > 1: parts.append(run_nb) # Argument number, if there are more than one file arguments if nb_file_args[run_nb] > 1: parts.append(arg_nb) file_names[fi] = make_unique( 'arg%s' % '_'.join('%s' % s for s in parts)) else: file_names[fi] = make_unique('arg_%s' % fi.unicodename) else: file_names[fi] = make_unique(fi.unicodename) return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, []))) for p, n in iteritems(file_names))
def search_for_files(self, files): nb_pkg_files = 0 for f in self.filter_files(files): pkgnames = self._get_packages_for_file(f.path) # Stores the file if not pkgnames: self.unknown_files.add(f) else: pkgs = [] for pkgname in pkgnames: if pkgname in self.packages: pkgs.append(self.packages[pkgname]) else: pkg = self._create_package(pkgname) if pkg is not None: self.packages[pkgname] = pkg pkgs.append(self.packages[pkgname]) if len(pkgs) == 1: pkgs[0].add_file(f) nb_pkg_files += 1 else: self.unknown_files.add(f) # Filter out packages with no files self.packages = { pkgname: pkg for pkgname, pkg in iteritems(self.packages) if pkg.files } logging.info("%d packages with %d files, and %d other files", len(self.packages), nb_pkg_files, len(self.unknown_files))
def python(files, input_files, **kwargs): remove = [] add = [] for path, fi in iteritems(files): if path.ext == '.pyc': pyfile = path.parent / path.stem + '.py' if pyfile.is_file(): logging.info("Removing %s", path) remove.append(path) pyfile = path.parent / path.stem + '.py' if pyfile not in files: logging.info("Adding %s", pyfile) add.append(TracedFile(pyfile)) for path in remove: files.pop(path, None) for fi in add: files[fi.path] = fi for i in irange(len(input_files)): lst = [] for path in input_files[i]: if path.ext in ('.py', '.pyc'): logging.info("Removing input %s", path) else: lst.append(path) input_files[i] = lst
def search_for_files(self, files): for f in self.filter_files(files): pkgnames = self._get_packages_for_file(f.path) # Stores the file if not pkgnames: self.unknown_files.add(f) else: pkgs = [] for pkgname in pkgnames: if pkgname in self.packages: pkgs.append(self.packages[pkgname]) else: pkg = self._create_package(pkgname) if pkg is not None: self.packages[pkgname] = pkg pkgs.append(self.packages[pkgname]) if len(pkgs) == 1: pkgs[0].add_file(f) else: self.unknown_files.add(f) # Filter out packages with no files self.packages = {pkgname: pkg for pkgname, pkg in iteritems(self.packages) if pkg.files}
def search_for_files(self, files): nb_pkg_files = 0 for f in self.filter_files(files): pkgnames = self._get_packages_for_file(f.path) # Stores the file if not pkgnames: self.unknown_files.add(f) else: pkgs = [] for pkgname in pkgnames: if pkgname in self.packages: pkgs.append(self.packages[pkgname]) else: pkg = self._create_package(pkgname) if pkg is not None: self.packages[pkgname] = pkg pkgs.append(self.packages[pkgname]) if len(pkgs) == 1: pkgs[0].add_file(f) nb_pkg_files += 1 else: self.unknown_files.add(f) # Filter out packages with no files self.packages = {pkgname: pkg for pkgname, pkg in iteritems(self.packages) if pkg.files} logger.info("%d packages with %d files, and %d other files", len(self.packages), nb_pkg_files, len(self.unknown_files))
def search_for_files(self, files): # Make a set of all the requested files requested = dict((f.path, f) for f in self.filter_files(files)) found = {} # {path: pkgname} # Request a few files at a time so we don't hit the command-line size # limit iter_batch = iter(requested) while True: batch = list(itertools.islice(iter_batch, MAX_ARGV)) if not batch: break proc = subprocess.Popen(['dpkg-query', '-S'] + [path.path for path in batch], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() for l in out.splitlines(): pkgname, path = l.split(b': ', 1) path = Path(path.strip()) # 8-bit safe encoding, because this might be a localized error # message (that we don't care about) pkgname = pkgname.decode('iso-8859-1') if ', ' in pkgname: # Multiple packages found[path] = None continue pkgname = pkgname.split(':', 1)[0] # Remove :arch if path in requested: if ' ' not in pkgname: # If we had assigned it to a package already, undo if path in found: found[path] = None # Else assign to the package else: found[path] = pkgname # Remaining files are not from packages self.unknown_files.update( f for f in files if f.path in requested and found.get(f.path) is None) nb_pkg_files = 0 for path, pkgname in iteritems(found): if pkgname is None: continue if pkgname in self.packages: package = self.packages[pkgname] else: package = self._create_package(pkgname) self.packages[pkgname] = package package.add_file(requested.pop(path)) nb_pkg_files += 1 logger.info("%d packages with %d files, and %d other files", len(self.packages), nb_pkg_files, len(self.unknown_files))
def search_for_files(self, files): # Make a set of all the requested files requested = dict((f.path, f) for f in self.filter_files(files)) found = {} # {path: pkgname} # Process /var/lib/dpkg/info/*.list for listfile in Path('/var/lib/dpkg/info').listdir(): pkgname = listfile.unicodename[:-5] # Removes :arch pkgname = pkgname.split(':', 1)[0] if not listfile.unicodename.endswith('.list'): continue with listfile.open('rb') as fp: # Read paths from the file l = fp.readline() while l: if l[-1:] == b'\n': l = l[:-1] path = Path(l) # If it's one of the requested paths if path in requested: # If we had assigned it to a package already, undo if path in found: found[path] = None # Else assign to the package else: found[path] = pkgname l = fp.readline() # Remaining files are not from packages self.unknown_files.update( f for f in files if f.path in requested and found.get(f.path) is None) nb_pkg_files = 0 for path, pkgname in iteritems(found): if pkgname is None: continue if pkgname in self.packages: package = self.packages[pkgname] else: package = self._create_package(pkgname) self.packages[pkgname] = package package.add_file(requested.pop(path)) nb_pkg_files += 1 logging.info("%d packages with %d files, and %d other files", len(self.packages), nb_pkg_files, len(self.unknown_files))
def search_for_files(self, files): # Make a set of all the requested files requested = dict((f.path, f) for f in self.filter_files(files)) found = {} # {path: pkgname} # Process /var/lib/dpkg/info/*.list for listfile in Path('/var/lib/dpkg/info').listdir(): pkgname = listfile.unicodename[:-5] # Removes :arch pkgname = pkgname.split(':', 1)[0] if not listfile.unicodename.endswith('.list'): continue with listfile.open('rb') as fp: # Read paths from the file l = fp.readline() while l: if l[-1:] == b'\n': l = l[:-1] path = Path(l) # If it's one of the requested paths if path in requested: # If we had assigned it to a package already, undo if path in found: found[path] = None # Else assign to the package else: found[path] = pkgname l = fp.readline() # Remaining files are not from packages self.unknown_files.update( f for f in files if f.path in requested and f.path not in found) for path, pkgname in iteritems(found): if pkgname in self.packages: package = self.packages[pkgname] else: package = self._create_package(pkgname) self.packages[pkgname] = package package.add_file(requested.pop(path))
def python(files, input_files, **kwargs): add = [] for path, fi in iteritems(files): if path.ext == b'.pyc': pyfile = path.parent / path.stem + '.py' if pyfile.is_file(): if pyfile not in files: logger.info("Adding %s", pyfile) add.append(TracedFile(pyfile)) for fi in add: files[fi.path] = fi for i in irange(len(input_files)): lst = [] for path in input_files[i]: if path.ext in (b'.py', b'.pyc'): logger.info("Removing input %s", path) else: lst.append(path) input_files[i] = lst
def pack(target, directory, sort_packages): """Main function for the pack subcommand. """ if target.exists(): # Don't overwrite packs... logger.critical("Target file exists!") sys.exit(1) # Reads configuration configfile = directory / 'config.yml' if not configfile.is_file(): logger.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = config = load_config( configfile, canonical=False) additional_patterns = config.additional_patterns inputs_outputs = config.inputs_outputs # Validate run ids run_chars = ('0123456789_-@() .:%' 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') for i, run in enumerate(runs): if (any(c not in run_chars for c in run['id']) or all(c in string.digits for c in run['id'])): logger.critical("Illegal run id: %r (run number %d)", run['id'], i) sys.exit(1) # Canonicalize config (re-sort, expand 'additional_files' patterns) packages, other_files = canonicalize_config( packages, other_files, additional_patterns, sort_packages) logger.info("Creating pack %s...", target) tar = tarfile.open(str(target), 'w:') fd, tmp = Path.tempfile() os.close(fd) try: datatar = PackBuilder(tmp) # Add the files from the packages for pkg in packages: if pkg.packfiles: logger.info("Adding files from package %s...", pkg.name) files = [] for f in pkg.files: if not Path(f.path).exists(): logger.warning("Missing file %s from package %s", f.path, pkg.name) else: datatar.add_data(f.path) files.append(f) pkg.files = files else: logger.info("NOT adding files from package %s", pkg.name) # Add the rest of the files logger.info("Adding other files...") files = set() for f in other_files: if not Path(f.path).exists(): logger.warning("Missing file %s", f.path) else: datatar.add_data(f.path) files.add(f) other_files = files datatar.close() tar.add(str(tmp), 'DATA.tar.gz') finally: tmp.remove() logger.info("Adding metadata...") # Stores pack version fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt') os.close(fd) try: with manifest.open('wb') as fp: fp.write(b'REPROZIP VERSION 2\n') tar.add(str(manifest), 'METADATA/version') finally: manifest.remove() # Stores the original trace trace = directory / 'trace.sqlite3' if not trace.is_file(): logger.critical("trace.sqlite3 is gone! Aborting") sys.exit(1) tar.add(str(trace), 'METADATA/trace.sqlite3') # Checks that input files are packed for name, f in iteritems(inputs_outputs): if f.read_runs and not Path(f.path).exists(): logger.warning("File is designated as input (name %s) but is not " "to be packed: %s", name, f.path) # Generates a unique identifier for the pack (for usage reports purposes) pack_id = str(uuid.uuid4()) # Stores canonical config fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_') os.close(fd) try: save_config(can_configfile, runs, packages, other_files, reprozip_version, inputs_outputs, canonical=True, pack_id=pack_id) tar.add(str(can_configfile), 'METADATA/config.yml') finally: can_configfile.remove() tar.close() # Record some info to the usage report record_usage_package(runs, packages, other_files, inputs_outputs, pack_id)
def pack(target, directory, sort_packages): """Main function for the pack subcommand. """ if target.exists(): # Don't overwrite packs... logging.critical("Target file exists!") sys.exit(1) # Reads configuration configfile = directory / 'config.yml' if not configfile.is_file(): logging.critical("Configuration file does not exist!\n" "Did you forget to run 'reprozip trace'?\n" "If not, you might want to use --dir to specify an " "alternate location.") sys.exit(1) runs, packages, other_files = config = load_config(configfile, canonical=False) additional_patterns = config.additional_patterns inputs_outputs = config.inputs_outputs # Validate run ids run_chars = ('0123456789_-@() .:%' 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') for i, run in enumerate(runs): if (any(c not in run_chars for c in run['id']) or all(c in string.digits for c in run['id'])): logging.critical("Illegal run id: %r (run number %d)", run['id'], i) sys.exit(1) # Canonicalize config (re-sort, expand 'additional_files' patterns) packages, other_files = canonicalize_config(packages, other_files, additional_patterns, sort_packages) logging.info("Creating pack %s...", target) tar = tarfile.open(str(target), 'w:') fd, tmp = Path.tempfile() os.close(fd) try: datatar = PackBuilder(tmp) # Add the files from the packages for pkg in packages: if pkg.packfiles: logging.info("Adding files from package %s...", pkg.name) files = [] for f in pkg.files: if not Path(f.path).exists(): logging.warning("Missing file %s from package %s", f.path, pkg.name) else: datatar.add_data(f.path) files.append(f) pkg.files = files else: logging.info("NOT adding files from package %s", pkg.name) # Add the rest of the files logging.info("Adding other files...") files = set() for f in other_files: if not Path(f.path).exists(): logging.warning("Missing file %s", f.path) else: datatar.add_data(f.path) files.add(f) other_files = files datatar.close() tar.add(str(tmp), 'DATA.tar.gz') finally: tmp.remove() logging.info("Adding metadata...") # Stores pack version fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt') os.close(fd) try: with manifest.open('wb') as fp: fp.write(b'REPROZIP VERSION 2\n') tar.add(str(manifest), 'METADATA/version') finally: manifest.remove() # Stores the original trace trace = directory / 'trace.sqlite3' if not trace.is_file(): logging.critical("trace.sqlite3 is gone! Aborting") sys.exit(1) tar.add(str(trace), 'METADATA/trace.sqlite3') # Checks that input files are packed for name, f in iteritems(inputs_outputs): if f.read_runs and not Path(f.path).exists(): logging.warning( "File is designated as input (name %s) but is not " "to be packed: %s", name, f.path) # Generates a unique identifier for the pack (for usage reports purposes) pack_id = str(uuid.uuid4()) # Stores canonical config fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_') os.close(fd) try: save_config(can_configfile, runs, packages, other_files, reprozip_version, inputs_outputs, canonical=True, pack_id=pack_id) tar.add(str(can_configfile), 'METADATA/config.yml') finally: can_configfile.remove() tar.close() # Record some info to the usage report record_usage_package(runs, packages, other_files, inputs_outputs, pack_id)