Пример #1
0
    def siblings(self, node):
        """Return siblings for a given node.

        The key is part of the result set, hence to check if the node has
        siblings you should check the lenght is greater than 1.
        """
        parent = None

        if isinstance(node, Entity):
            if not node.parent:
                return {node}
            parent_siblings = self.siblings(node.parent) - {node.parent}
            return set(node.parent.members) | parent_siblings
        elif isinstance(node, Generation):
            parent = node.activity
        elif isinstance(node, Usage):
            parent = self.activities[node.commit]
        elif isinstance(node, Process) or isinstance(node, Run):
            return {node}

        if parent is None or not isinstance(parent, ProcessRun):
            raise errors.InvalidOutputPath(
                'The file "{0}" was not created by a renku command. \n\n'
                'Check the file history using: git log --follow "{0}"'.format(
                    node.path
                )
            )

        return set(parent.generated)
Пример #2
0
    def find_explicit_outputs(self, starting_output_id):
        """Yield explicit output and changed command input parameter."""
        inputs = {
            str(i.default.path.relative_to(self.working_dir)): i
            for i in self.inputs if i.type in PATH_OBJECTS
        }
        output_id = starting_output_id

        for path in self.explicit_outputs:
            if self.file_candidate(path) is None:
                raise errors.InvalidOutputPath(
                    'The output file or directory does not exist.'
                    '\n\n\t' + click.style(str(path), fg='yellow') + '\n\n'
                )

            output_path = str(path.relative_to(self.working_dir))
            type = 'Directory' if path.is_dir() else 'File'
            if output_path in inputs:
                # change input type to note that it is also an output
                input = inputs[output_path]
                input = attr.evolve(input, type='string', default=output_path)
                yield (
                    CommandOutputParameter(
                        id='output_{0}'.format(output_id),
                        type=type,
                        outputBinding=dict(
                            glob='$(inputs.{0})'.format(input.id)
                        )
                    ), input, output_path
                )
            else:
                yield (
                    CommandOutputParameter(
                        id='output_{0}'.format(output_id),
                        type=type,
                        outputBinding=dict(glob=str(output_path))
                    ), None, output_path
                )

            output_id += 1
Пример #3
0
    def guess_outputs(self, paths):
        """Yield detected output and changed command input parameter."""
        # TODO what to do with duplicate paths & inputs with same defauts
        paths = list(paths)
        tree = DirectoryTree.from_list(paths)

        input_candidates = {}
        conflicting_paths = {}

        for index, input in enumerate(self.inputs):
            # Convert input defaults to paths relative to working directory.
            if input.type not in PATH_OBJECTS:
                try:
                    path = self.directory / str(input.default)
                    input_path = path.resolve().relative_to(self.working_dir)
                except FileNotFoundError:
                    continue
            else:
                input_path = input.default.path.relative_to(self.working_dir)

            if input_path.is_dir() and tree.get(input_path):
                # The directory might exist before running the script
                subpaths = {
                    str(input_path / path)
                    for path in tree.get(input_path, default=[])
                }
                absolute_path = os.path.abspath(input_path)
                if Path(absolute_path) not in self.explicit_outputs:
                    content = {
                        str(path)
                        for path in input_path.rglob('*')
                        if not path.is_dir() and path.name != '.gitkeep'
                    }
                    extra_paths = content - subpaths
                    if extra_paths:
                        raise errors.InvalidOutputPath(
                            'The output directory "{0}" is not empty. \n\n'
                            'Delete existing files before running the '
                            'command:'
                            '\n  (use "git rm <file>..." to remove them '
                            'first)'
                            '\n\n'.format(input_path) + '\n'.join(
                                '\t' + click.style(path, fg='yellow')
                                for path in extra_paths
                            ) + '\n\n'
                            'Once you have removed files that should be used '
                            'as outputs,\n'
                            'you can safely rerun the previous command.'
                        )

                # Remove files from the input directory
                paths = [path for path in paths if path not in subpaths]
                # Include input path in the paths to check
                paths.append(str(input_path))

                input_candidates[str(input_path)] = input
            elif input.type not in PATH_OBJECTS:
                # Input need to be changed if an output is detected
                input_candidates[str(input_path)] = input
            else:
                # Names that can not be outputs because they are already inputs
                conflicting_paths[str(input_path)] = (index, input)

        streams = {
            path
            for path in (getattr(self, name) for name in ('stdout', 'stderr'))
            if path is not None
        }

        # TODO group by a common prefix

        for position, path in enumerate(paths):
            if Path(os.path.abspath(path)) in self.explicit_outputs:
                del paths[position]

        for position, path in enumerate(paths):
            candidate = self.file_candidate(self.working_dir / path)

            if candidate is None:
                raise ValueError('Path "{0}" does not exist.'.format(path))

            glob = str(candidate.relative_to(self.working_dir))

            if glob in streams:
                continue

            new_input = None

            if glob in conflicting_paths:
                # it means that it is rewriting a file
                index, input = conflicting_paths[glob]
                new_input = attr.evolve(input, type='string', default=glob)
                input_candidates[glob] = new_input

                del conflicting_paths[glob]
                # TODO add warning ('Output already exists in inputs.')

            candidate_type = 'Directory' if candidate.is_dir() else 'File'

            if glob in input_candidates:
                input = input_candidates[glob]

                if new_input is None:
                    new_input = input_candidates[glob] = attr.evolve(
                        input, type='string', default=glob
                    )

                yield (
                    CommandOutputParameter(
                        id='output_{0}'.format(position),
                        type=candidate_type,
                        outputBinding=dict(
                            glob='$(inputs.{0})'.format(input.id),
                        ),
                    ), new_input, glob
                )
            else:
                yield (
                    CommandOutputParameter(
                        id='output_{0}'.format(position),
                        type=candidate_type,
                        outputBinding=dict(glob=glob, ),
                    ), None, glob
                )
Пример #4
0
    def guess_outputs(self, candidates):
        """Yield detected output and changed command input parameter."""
        # TODO what to do with duplicate paths & inputs with same defaults
        candidates = list(candidates)
        tree = DirectoryTree.from_list(candidates)

        input_candidates = {}
        conflicting_paths = {}

        for index, input in enumerate(self.inputs):
            # Convert input defaults to paths relative to working directory.
            if input.type not in PATH_OBJECTS:
                if self.no_input_detection:
                    continue
                try:
                    path = self.directory / str(input.default)
                    input_path = Path(os.path.abspath(path)).relative_to(
                        self.working_dir)
                except FileNotFoundError:
                    continue
            else:
                input_path = input.default.path.relative_to(self.working_dir)

            if input_path.is_dir() and tree.get(input_path):
                # The directory might exist before running the script
                subpaths = {
                    str(input_path / path)
                    for path in tree.get(input_path, default=[])
                }
                absolute_path = os.path.abspath(input_path)
                if Path(absolute_path) not in self.explicit_outputs:
                    content = {
                        str(path)
                        for path in input_path.rglob("*")
                        if not path.is_dir() and path.name != ".gitkeep"
                    }
                    preexisting_paths = content - subpaths
                    if preexisting_paths:
                        raise errors.InvalidOutputPath(
                            'The output directory "{0}" is not empty. \n\n'
                            "Delete existing files before running the "
                            "command:"
                            '\n  (use "git rm <file>..." to remove them '
                            "first)"
                            "\n\n".format(input_path) +
                            "\n".join("\t" + click.style(path, fg="yellow")
                                      for path in preexisting_paths) + "\n\n"
                            "Once you have removed files that should be used "
                            "as outputs,\n"
                            "you can safely rerun the previous command.")

                # Remove files from the input directory
                candidates[:] = (path for path in candidates
                                 if path not in subpaths)
                # Include input path in the candidates to check
                candidates.append(str(input_path))

                input_candidates[str(input_path)] = input
            elif input.type not in PATH_OBJECTS:
                # Input need to be changed if an output is detected
                input_candidates[str(input_path)] = input
            else:
                # Names that can not be outputs because they are already inputs
                conflicting_paths[str(input_path)] = input

        streams = {
            path
            for path in (getattr(self, name) for name in ("stdout", "stderr"))
            if path is not None
        }

        # TODO group by a common prefix

        for position, path in enumerate(candidates):
            candidate = self.is_existing_path(self.working_dir / path)

            if candidate is None:
                raise errors.UsageError(
                    'Path "{0}" does not exist.'.format(path))

            glob = str(candidate.relative_to(self.working_dir))

            if glob in streams:
                continue

            new_input = None

            if glob in conflicting_paths:
                # it means that it is rewriting a file
                input = conflicting_paths[glob]
                new_input = attr.evolve(input, type="string", default=glob)
                input_candidates[glob] = new_input

                del conflicting_paths[glob]
                # TODO add warning ('Output already exists in inputs.')

            candidate_type = "Directory" if candidate.is_dir() else "File"

            if glob in input_candidates:
                input = input_candidates[glob]

                if new_input is None:
                    new_input = input_candidates[glob] = attr.evolve(
                        input, type="string", default=glob)

                yield (
                    CommandOutputParameter(
                        id="output_{0}".format(position),
                        type=candidate_type,
                        outputBinding=dict(glob="$(inputs.{0})".format(
                            input.id), ),
                    ),
                    new_input,
                    glob,
                )
            else:
                yield (
                    CommandOutputParameter(
                        id="output_{0}".format(position),
                        type=candidate_type,
                        outputBinding=dict(glob=glob, ),
                    ),
                    None,
                    glob,
                )