示例#1
0
def compile_inputs_outputs(runs, inputs, outputs):
    """Gives names to input/output files and creates InputOutputFile objects.
    """
    # {path: (run_nb, arg_nb) or None}
    runs_with_file = {}
    # run_nb: number_of_file_arguments
    nb_file_args = []
    # {path: [runs]}
    readers = {}
    writers = {}

    for run_nb, run, in_files, out_files in izip(count(), runs,
                                                 inputs, outputs):
        # List which runs read or write each file
        for p in in_files:
            readers.setdefault(p, []).append(run_nb)
        for p in out_files:
            writers.setdefault(p, []).append(run_nb)

        # Locate files that appear on a run's command line
        files_set = set(in_files) | set(out_files)
        nb_files = 0
        for arg_nb, arg in enumerate(run['argv']):
            p = Path(run['workingdir'], arg).resolve()
            if p in files_set:
                nb_files += 1
                if p not in runs_with_file:
                    runs_with_file[p] = run_nb, arg_nb
                elif runs_with_file[p] is not None:
                    runs_with_file[p] = None
        nb_file_args.append(nb_files)

    file_names = {}
    make_unique = UniqueNames()

    for fi in flatten(2, (inputs, outputs)):
        if fi in file_names:
            continue

        # If it appears in at least one of the command-lines
        if fi in runs_with_file:
            # If it only appears once in the command-lines
            if runs_with_file[fi] is not None:
                run_nb, arg_nb = runs_with_file[fi]
                parts = []
                # Run number, if there are more than one runs
                if len(runs) > 1:
                    parts.append(run_nb)
                # Argument number, if there are more than one file arguments
                if nb_file_args[run_nb] > 1:
                    parts.append(arg_nb)
                file_names[fi] = make_unique(
                    'arg%s' % '_'.join('%s' % s for s in parts))
            else:
                file_names[fi] = make_unique('arg_%s' % fi.unicodename)
        else:
            file_names[fi] = make_unique(fi.unicodename)

    return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, [])))
                for p, n in iteritems(file_names))
示例#2
0
    def search_for_files(self, files):
        nb_pkg_files = 0

        for f in self.filter_files(files):
            pkgnames = self._get_packages_for_file(f.path)

            # Stores the file
            if not pkgnames:
                self.unknown_files.add(f)
            else:
                pkgs = []
                for pkgname in pkgnames:
                    if pkgname in self.packages:
                        pkgs.append(self.packages[pkgname])
                    else:
                        pkg = self._create_package(pkgname)
                        if pkg is not None:
                            self.packages[pkgname] = pkg
                            pkgs.append(self.packages[pkgname])
                if len(pkgs) == 1:
                    pkgs[0].add_file(f)
                    nb_pkg_files += 1
                else:
                    self.unknown_files.add(f)

        # Filter out packages with no files
        self.packages = {
            pkgname: pkg
            for pkgname, pkg in iteritems(self.packages) if pkg.files
        }

        logging.info("%d packages with %d files, and %d other files",
                     len(self.packages), nb_pkg_files, len(self.unknown_files))
示例#3
0
def python(files, input_files, **kwargs):
    remove = []
    add = []
    for path, fi in iteritems(files):
        if path.ext == '.pyc':
            pyfile = path.parent / path.stem + '.py'
            if pyfile.is_file():
                logging.info("Removing %s", path)
                remove.append(path)
                pyfile = path.parent / path.stem + '.py'
                if pyfile not in files:
                    logging.info("Adding %s", pyfile)
                    add.append(TracedFile(pyfile))

    for path in remove:
        files.pop(path, None)

    for fi in add:
        files[fi.path] = fi

    for i in irange(len(input_files)):
        lst = []
        for path in input_files[i]:
            if path.ext in ('.py', '.pyc'):
                logging.info("Removing input %s", path)
            else:
                lst.append(path)

        input_files[i] = lst
示例#4
0
def compile_inputs_outputs(runs, inputs, outputs):
    """Gives names to input/output files and creates InputOutputFile objects.
    """
    # {path: (run_nb, arg_nb) or None}
    runs_with_file = {}
    # run_nb: number_of_file_arguments
    nb_file_args = []
    # {path: [runs]}
    readers = {}
    writers = {}

    for run_nb, run, in_files, out_files in izip(count(), runs,
                                                 inputs, outputs):
        # List which runs read or write each file
        for p in in_files:
            readers.setdefault(p, []).append(run_nb)
        for p in out_files:
            writers.setdefault(p, []).append(run_nb)

        # Locate files that appear on a run's command line
        files_set = set(in_files) | set(out_files)
        nb_files = 0
        for arg_nb, arg in enumerate(run['argv']):
            p = Path(run['workingdir'], arg).resolve()
            if p in files_set:
                nb_files += 1
                if p not in runs_with_file:
                    runs_with_file[p] = run_nb, arg_nb
                elif runs_with_file[p] is not None:
                    runs_with_file[p] = None
        nb_file_args.append(nb_files)

    file_names = {}
    make_unique = UniqueNames()

    for fi in flatten(2, (inputs, outputs)):
        if fi in file_names:
            continue

        # If it appears in at least one of the command-lines
        if fi in runs_with_file:
            # If it only appears once in the command-lines
            if runs_with_file[fi] is not None:
                run_nb, arg_nb = runs_with_file[fi]
                parts = []
                # Run number, if there are more than one runs
                if len(runs) > 1:
                    parts.append(run_nb)
                # Argument number, if there are more than one file arguments
                if nb_file_args[run_nb] > 1:
                    parts.append(arg_nb)
                file_names[fi] = make_unique(
                    'arg%s' % '_'.join('%s' % s for s in parts))
            else:
                file_names[fi] = make_unique('arg_%s' % fi.unicodename)
        else:
            file_names[fi] = make_unique(fi.unicodename)

    return dict((n, InputOutputFile(p, readers.get(p, []), writers.get(p, [])))
                for p, n in iteritems(file_names))
示例#5
0
    def search_for_files(self, files):
        for f in self.filter_files(files):
            pkgnames = self._get_packages_for_file(f.path)

            # Stores the file
            if not pkgnames:
                self.unknown_files.add(f)
            else:
                pkgs = []
                for pkgname in pkgnames:
                    if pkgname in self.packages:
                        pkgs.append(self.packages[pkgname])
                    else:
                        pkg = self._create_package(pkgname)
                        if pkg is not None:
                            self.packages[pkgname] = pkg
                            pkgs.append(self.packages[pkgname])
                if len(pkgs) == 1:
                    pkgs[0].add_file(f)
                else:
                    self.unknown_files.add(f)

        # Filter out packages with no files
        self.packages = {pkgname: pkg
                         for pkgname, pkg in iteritems(self.packages)
                         if pkg.files}
示例#6
0
    def search_for_files(self, files):
        nb_pkg_files = 0

        for f in self.filter_files(files):
            pkgnames = self._get_packages_for_file(f.path)

            # Stores the file
            if not pkgnames:
                self.unknown_files.add(f)
            else:
                pkgs = []
                for pkgname in pkgnames:
                    if pkgname in self.packages:
                        pkgs.append(self.packages[pkgname])
                    else:
                        pkg = self._create_package(pkgname)
                        if pkg is not None:
                            self.packages[pkgname] = pkg
                            pkgs.append(self.packages[pkgname])
                if len(pkgs) == 1:
                    pkgs[0].add_file(f)
                    nb_pkg_files += 1
                else:
                    self.unknown_files.add(f)

        # Filter out packages with no files
        self.packages = {pkgname: pkg
                         for pkgname, pkg in iteritems(self.packages)
                         if pkg.files}

        logger.info("%d packages with %d files, and %d other files",
                    len(self.packages),
                    nb_pkg_files,
                    len(self.unknown_files))
示例#7
0
    def search_for_files(self, files):
        # Make a set of all the requested files
        requested = dict((f.path, f) for f in self.filter_files(files))
        found = {}  # {path: pkgname}

        # Request a few files at a time so we don't hit the command-line size
        # limit
        iter_batch = iter(requested)
        while True:
            batch = list(itertools.islice(iter_batch, MAX_ARGV))
            if not batch:
                break

            proc = subprocess.Popen(['dpkg-query', '-S'] +
                                    [path.path for path in batch],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            out, err = proc.communicate()
            for l in out.splitlines():
                pkgname, path = l.split(b': ', 1)
                path = Path(path.strip())
                # 8-bit safe encoding, because this might be a localized error
                # message (that we don't care about)
                pkgname = pkgname.decode('iso-8859-1')
                if ', ' in pkgname:  # Multiple packages
                    found[path] = None
                    continue
                pkgname = pkgname.split(':', 1)[0]  # Remove :arch
                if path in requested:
                    if ' ' not in pkgname:
                        # If we had assigned it to a package already, undo
                        if path in found:
                            found[path] = None
                        # Else assign to the package
                        else:
                            found[path] = pkgname

        # Remaining files are not from packages
        self.unknown_files.update(
            f for f in files
            if f.path in requested and found.get(f.path) is None)

        nb_pkg_files = 0

        for path, pkgname in iteritems(found):
            if pkgname is None:
                continue
            if pkgname in self.packages:
                package = self.packages[pkgname]
            else:
                package = self._create_package(pkgname)
                self.packages[pkgname] = package
            package.add_file(requested.pop(path))
            nb_pkg_files += 1

        logger.info("%d packages with %d files, and %d other files",
                    len(self.packages),
                    nb_pkg_files,
                    len(self.unknown_files))
示例#8
0
    def search_for_files(self, files):
        # Make a set of all the requested files
        requested = dict((f.path, f) for f in self.filter_files(files))
        found = {}  # {path: pkgname}

        # Request a few files at a time so we don't hit the command-line size
        # limit
        iter_batch = iter(requested)
        while True:
            batch = list(itertools.islice(iter_batch, MAX_ARGV))
            if not batch:
                break

            proc = subprocess.Popen(['dpkg-query', '-S'] +
                                    [path.path for path in batch],
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            out, err = proc.communicate()
            for l in out.splitlines():
                pkgname, path = l.split(b': ', 1)
                path = Path(path.strip())
                # 8-bit safe encoding, because this might be a localized error
                # message (that we don't care about)
                pkgname = pkgname.decode('iso-8859-1')
                if ', ' in pkgname:  # Multiple packages
                    found[path] = None
                    continue
                pkgname = pkgname.split(':', 1)[0]  # Remove :arch
                if path in requested:
                    if ' ' not in pkgname:
                        # If we had assigned it to a package already, undo
                        if path in found:
                            found[path] = None
                        # Else assign to the package
                        else:
                            found[path] = pkgname

        # Remaining files are not from packages
        self.unknown_files.update(
            f for f in files
            if f.path in requested and found.get(f.path) is None)

        nb_pkg_files = 0

        for path, pkgname in iteritems(found):
            if pkgname is None:
                continue
            if pkgname in self.packages:
                package = self.packages[pkgname]
            else:
                package = self._create_package(pkgname)
                self.packages[pkgname] = package
            package.add_file(requested.pop(path))
            nb_pkg_files += 1

        logger.info("%d packages with %d files, and %d other files",
                    len(self.packages),
                    nb_pkg_files,
                    len(self.unknown_files))
示例#9
0
    def search_for_files(self, files):
        # Make a set of all the requested files
        requested = dict((f.path, f) for f in self.filter_files(files))
        found = {}  # {path: pkgname}

        # Process /var/lib/dpkg/info/*.list
        for listfile in Path('/var/lib/dpkg/info').listdir():
            pkgname = listfile.unicodename[:-5]
            # Removes :arch
            pkgname = pkgname.split(':', 1)[0]

            if not listfile.unicodename.endswith('.list'):
                continue
            with listfile.open('rb') as fp:
                # Read paths from the file
                l = fp.readline()
                while l:
                    if l[-1:] == b'\n':
                        l = l[:-1]
                    path = Path(l)
                    # If it's one of the requested paths
                    if path in requested:
                        # If we had assigned it to a package already, undo
                        if path in found:
                            found[path] = None
                        # Else assign to the package
                        else:
                            found[path] = pkgname
                    l = fp.readline()

        # Remaining files are not from packages
        self.unknown_files.update(
            f for f in files
            if f.path in requested and found.get(f.path) is None)

        nb_pkg_files = 0

        for path, pkgname in iteritems(found):
            if pkgname is None:
                continue
            if pkgname in self.packages:
                package = self.packages[pkgname]
            else:
                package = self._create_package(pkgname)
                self.packages[pkgname] = package
            package.add_file(requested.pop(path))
            nb_pkg_files += 1

        logging.info("%d packages with %d files, and %d other files",
                     len(self.packages),
                     nb_pkg_files,
                     len(self.unknown_files))
示例#10
0
    def search_for_files(self, files):
        # Make a set of all the requested files
        requested = dict((f.path, f) for f in self.filter_files(files))
        found = {}  # {path: pkgname}

        # Process /var/lib/dpkg/info/*.list
        for listfile in Path('/var/lib/dpkg/info').listdir():
            pkgname = listfile.unicodename[:-5]
            # Removes :arch
            pkgname = pkgname.split(':', 1)[0]

            if not listfile.unicodename.endswith('.list'):
                continue
            with listfile.open('rb') as fp:
                # Read paths from the file
                l = fp.readline()
                while l:
                    if l[-1:] == b'\n':
                        l = l[:-1]
                    path = Path(l)
                    # If it's one of the requested paths
                    if path in requested:
                        # If we had assigned it to a package already, undo
                        if path in found:
                            found[path] = None
                        # Else assign to the package
                        else:
                            found[path] = pkgname
                    l = fp.readline()

        # Remaining files are not from packages
        self.unknown_files.update(
            f for f in files
            if f.path in requested and f.path not in found)

        for path, pkgname in iteritems(found):
            if pkgname in self.packages:
                package = self.packages[pkgname]
            else:
                package = self._create_package(pkgname)
                self.packages[pkgname] = package
            package.add_file(requested.pop(path))
示例#11
0
def python(files, input_files, **kwargs):
    add = []
    for path, fi in iteritems(files):
        if path.ext == b'.pyc':
            pyfile = path.parent / path.stem + '.py'
            if pyfile.is_file():
                if pyfile not in files:
                    logger.info("Adding %s", pyfile)
                    add.append(TracedFile(pyfile))

    for fi in add:
        files[fi.path] = fi

    for i in irange(len(input_files)):
        lst = []
        for path in input_files[i]:
            if path.ext in (b'.py', b'.pyc'):
                logger.info("Removing input %s", path)
            else:
                lst.append(path)

        input_files[i] = lst
示例#12
0
文件: pack.py 项目: ViDA-NYU/reprozip
def pack(target, directory, sort_packages):
    """Main function for the pack subcommand.
    """
    if target.exists():
        # Don't overwrite packs...
        logger.critical("Target file exists!")
        sys.exit(1)

    # Reads configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logger.critical("Configuration file does not exist!\n"
                        "Did you forget to run 'reprozip trace'?\n"
                        "If not, you might want to use --dir to specify an "
                        "alternate location.")
        sys.exit(1)
    runs, packages, other_files = config = load_config(
        configfile,
        canonical=False)
    additional_patterns = config.additional_patterns
    inputs_outputs = config.inputs_outputs

    # Validate run ids
    run_chars = ('0123456789_-@() .:%'
                 'abcdefghijklmnopqrstuvwxyz'
                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    for i, run in enumerate(runs):
        if (any(c not in run_chars for c in run['id']) or
                all(c in string.digits for c in run['id'])):
            logger.critical("Illegal run id: %r (run number %d)",
                            run['id'], i)
            sys.exit(1)

    # Canonicalize config (re-sort, expand 'additional_files' patterns)
    packages, other_files = canonicalize_config(
        packages, other_files, additional_patterns, sort_packages)

    logger.info("Creating pack %s...", target)
    tar = tarfile.open(str(target), 'w:')

    fd, tmp = Path.tempfile()
    os.close(fd)
    try:
        datatar = PackBuilder(tmp)
        # Add the files from the packages
        for pkg in packages:
            if pkg.packfiles:
                logger.info("Adding files from package %s...", pkg.name)
                files = []
                for f in pkg.files:
                    if not Path(f.path).exists():
                        logger.warning("Missing file %s from package %s",
                                       f.path, pkg.name)
                    else:
                        datatar.add_data(f.path)
                        files.append(f)
                pkg.files = files
            else:
                logger.info("NOT adding files from package %s", pkg.name)

        # Add the rest of the files
        logger.info("Adding other files...")
        files = set()
        for f in other_files:
            if not Path(f.path).exists():
                logger.warning("Missing file %s", f.path)
            else:
                datatar.add_data(f.path)
                files.add(f)
        other_files = files
        datatar.close()

        tar.add(str(tmp), 'DATA.tar.gz')
    finally:
        tmp.remove()

    logger.info("Adding metadata...")
    # Stores pack version
    fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt')
    os.close(fd)
    try:
        with manifest.open('wb') as fp:
            fp.write(b'REPROZIP VERSION 2\n')
        tar.add(str(manifest), 'METADATA/version')
    finally:
        manifest.remove()

    # Stores the original trace
    trace = directory / 'trace.sqlite3'
    if not trace.is_file():
        logger.critical("trace.sqlite3 is gone! Aborting")
        sys.exit(1)
    tar.add(str(trace), 'METADATA/trace.sqlite3')

    # Checks that input files are packed
    for name, f in iteritems(inputs_outputs):
        if f.read_runs and not Path(f.path).exists():
            logger.warning("File is designated as input (name %s) but is not "
                           "to be packed: %s", name, f.path)

    # Generates a unique identifier for the pack (for usage reports purposes)
    pack_id = str(uuid.uuid4())

    # Stores canonical config
    fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_')
    os.close(fd)
    try:
        save_config(can_configfile, runs, packages, other_files,
                    reprozip_version,
                    inputs_outputs, canonical=True,
                    pack_id=pack_id)

        tar.add(str(can_configfile), 'METADATA/config.yml')
    finally:
        can_configfile.remove()

    tar.close()

    # Record some info to the usage report
    record_usage_package(runs, packages, other_files,
                         inputs_outputs,
                         pack_id)
示例#13
0
def pack(target, directory, sort_packages):
    """Main function for the pack subcommand.
    """
    if target.exists():
        # Don't overwrite packs...
        logging.critical("Target file exists!")
        sys.exit(1)

    # Reads configuration
    configfile = directory / 'config.yml'
    if not configfile.is_file():
        logging.critical("Configuration file does not exist!\n"
                         "Did you forget to run 'reprozip trace'?\n"
                         "If not, you might want to use --dir to specify an "
                         "alternate location.")
        sys.exit(1)
    runs, packages, other_files = config = load_config(configfile,
                                                       canonical=False)
    additional_patterns = config.additional_patterns
    inputs_outputs = config.inputs_outputs

    # Validate run ids
    run_chars = ('0123456789_-@() .:%'
                 'abcdefghijklmnopqrstuvwxyz'
                 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    for i, run in enumerate(runs):
        if (any(c not in run_chars for c in run['id'])
                or all(c in string.digits for c in run['id'])):
            logging.critical("Illegal run id: %r (run number %d)", run['id'],
                             i)
            sys.exit(1)

    # Canonicalize config (re-sort, expand 'additional_files' patterns)
    packages, other_files = canonicalize_config(packages, other_files,
                                                additional_patterns,
                                                sort_packages)

    logging.info("Creating pack %s...", target)
    tar = tarfile.open(str(target), 'w:')

    fd, tmp = Path.tempfile()
    os.close(fd)
    try:
        datatar = PackBuilder(tmp)
        # Add the files from the packages
        for pkg in packages:
            if pkg.packfiles:
                logging.info("Adding files from package %s...", pkg.name)
                files = []
                for f in pkg.files:
                    if not Path(f.path).exists():
                        logging.warning("Missing file %s from package %s",
                                        f.path, pkg.name)
                    else:
                        datatar.add_data(f.path)
                        files.append(f)
                pkg.files = files
            else:
                logging.info("NOT adding files from package %s", pkg.name)

        # Add the rest of the files
        logging.info("Adding other files...")
        files = set()
        for f in other_files:
            if not Path(f.path).exists():
                logging.warning("Missing file %s", f.path)
            else:
                datatar.add_data(f.path)
                files.add(f)
        other_files = files
        datatar.close()

        tar.add(str(tmp), 'DATA.tar.gz')
    finally:
        tmp.remove()

    logging.info("Adding metadata...")
    # Stores pack version
    fd, manifest = Path.tempfile(prefix='reprozip_', suffix='.txt')
    os.close(fd)
    try:
        with manifest.open('wb') as fp:
            fp.write(b'REPROZIP VERSION 2\n')
        tar.add(str(manifest), 'METADATA/version')
    finally:
        manifest.remove()

    # Stores the original trace
    trace = directory / 'trace.sqlite3'
    if not trace.is_file():
        logging.critical("trace.sqlite3 is gone! Aborting")
        sys.exit(1)
    tar.add(str(trace), 'METADATA/trace.sqlite3')

    # Checks that input files are packed
    for name, f in iteritems(inputs_outputs):
        if f.read_runs and not Path(f.path).exists():
            logging.warning(
                "File is designated as input (name %s) but is not "
                "to be packed: %s", name, f.path)

    # Generates a unique identifier for the pack (for usage reports purposes)
    pack_id = str(uuid.uuid4())

    # Stores canonical config
    fd, can_configfile = Path.tempfile(suffix='.yml', prefix='rpz_config_')
    os.close(fd)
    try:
        save_config(can_configfile,
                    runs,
                    packages,
                    other_files,
                    reprozip_version,
                    inputs_outputs,
                    canonical=True,
                    pack_id=pack_id)

        tar.add(str(can_configfile), 'METADATA/config.yml')
    finally:
        can_configfile.remove()

    tar.close()

    # Record some info to the usage report
    record_usage_package(runs, packages, other_files, inputs_outputs, pack_id)