Пример #1
0
def update(client, revision, no_output, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths)
    outputs = {node for node in outputs if graph.need_update(node)}
    if not outputs:
        click.secho("All files were generated from the latest inputs.", fg="green")
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs if _safe_path(node.path)}

    # Get all clean nodes.
    input_paths = {node.path for node in graph.nodes} - output_paths

    # Store the generated workflow used for updating paths.
    workflow = graph.as_workflow(input_paths=input_paths, output_paths=output_paths, outputs=outputs,)

    wf, path = CWLConverter.convert(workflow, client)
    # Don't compute paths if storage is disabled.
    if client.check_external_storage():
        # Make sure all inputs are pulled from a storage.
        paths_ = (i.consumes.path for i in workflow.inputs)
        client.pull_paths_from_storage(*paths_)

    execute(client, path, output_paths=output_paths)

    paths = [o.produces.path for o in workflow.outputs]

    client.repo.git.add(*paths)

    if client.repo.is_dirty():
        commit_msg = "renku update: committing {} newly added files".format(len(paths))

        committer = Actor("renku {0}".format(__version__), version_url)

        client.repo.index.commit(
            commit_msg, committer=committer, skip_hooks=True,
        )

    workflow_name = "{0}_update.yaml".format(uuid.uuid4().hex)

    path = client.workflow_path / workflow_name

    workflow.update_id_and_label_from_commit_path(client, client.repo.head.commit, path)

    with with_reference(path):
        cls = WorkflowRun if workflow.subprocesses else ProcessRun
        run = cls.from_run(run=workflow, client=client, path=path, update_commits=True)
        run.to_yaml()
        client.add_to_activity_index(run)
Пример #2
0
def update(client, revision, no_output, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths)
    outputs = {node for node in outputs if graph.need_update(node)}

    if not outputs:
        click.secho('All files were generated from the latest inputs.',
                    fg='green')
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs if _safe_path(node.path)}

    # Get all clean nodes.
    input_paths = {node.path for node in graph.nodes} - output_paths

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    workflow = graph.ascwl(
        input_paths=input_paths,
        output_paths=output_paths,
        outputs=outputs,
    )

    # Don't compute paths if storage is disabled.
    if client.has_external_storage:
        # Make sure all inputs are pulled from a storage.
        paths_ = (
            path
            for _, path in workflow.iter_input_files(client.workflow_path))
        client.pull_paths_from_storage(*paths_)

    with output_file.open('w') as f:
        f.write(
            yaml.dump(ascwl(
                workflow,
                filter=lambda _, x: x is not None,
                basedir=client.workflow_path,
            ),
                      default_flow_style=False))

    execute(client, output_file, output_paths=output_paths)
Пример #3
0
    def watch(self, client, no_output=False):
        """Watch a Renku repository for changes to detect outputs."""
        tool = self.generate_tool()
        repo = client.repo

        # Remove indirect files list if any
        self.delete_indirect_files_list()

        # NOTE consider to use git index instead
        existing_directories = {
            str(p.relative_to(client.path))
            for p in client.path.glob('**/')
        }

        from renku.core.plugins.pluginmanager import get_plugin_manager
        pm = get_plugin_manager()
        pm.hook.pre_run(tool=tool)

        yield tool

        if repo:
            # Include indirect inputs and outputs before further processing
            self.add_indirect_inputs()
            self.add_indirect_outputs()
            # Remove indirect files list if any
            self.delete_indirect_files_list()

            # List of all output paths.
            paths = []

            inputs = {input.id: input for input in self.inputs}
            outputs = list(tool.outputs)

            # Keep track of unmodified output files.
            unmodified = set()

            # Calculate possible output paths.
            # Capture newly created files through redirects.
            candidates = {file_ for file_ in repo.untracked_files}

            # Capture modified files through redirects.
            candidates |= {
                o.a_path
                for o in repo.index.diff(None) if not o.deleted_file
            }

            from renku.core.commands.graph import _safe_path
            candidates = {path for path in candidates if _safe_path(path)}

            for output, input, path in self.guess_outputs(candidates):
                outputs.append(output)
                paths.append(path)

                if input is not None:
                    if input.id not in inputs:  # pragma: no cover
                        raise RuntimeError('Inconsistent input name.')

                    inputs[input.id] = input

            for stream_name in ('stdout', 'stderr'):
                stream = getattr(self, stream_name)
                if (
                    stream and stream not in candidates and
                    Path(os.path.abspath(stream)) not in self.explicit_outputs
                ):
                    unmodified.add(stream)
                elif stream:
                    paths.append(stream)

            if self.explicit_outputs:
                last_output_id = len(outputs)

                for output, input, path in self.find_explicit_outputs(
                    last_output_id
                ):
                    outputs.append(output)
                    paths.append(path)

                    if input is not None:
                        if input.id not in inputs:  # pragma: no cover
                            raise RuntimeError('Inconsistent input name.')

                        inputs[input.id] = input

            if unmodified:
                raise errors.UnmodifiedOutputs(repo, unmodified)

            if not no_output and not paths:
                raise errors.OutputsNotFound(repo, inputs.values())

            if client.has_external_storage:
                client.track_paths_in_storage(*paths)

            tool.inputs = list(inputs.values())
            tool.outputs = outputs

        # Requirement detection can be done anytime.
        from .process_requirements import InitialWorkDirRequirement, \
            InlineJavascriptRequirement
        initial_work_dir_requirement = InitialWorkDirRequirement.from_tool(
            tool,
            existing_directories=existing_directories,
            working_dir=self.working_dir
        )
        if initial_work_dir_requirement:
            tool.requirements.extend([
                InlineJavascriptRequirement(),
                initial_work_dir_requirement,
            ])

        results = pm.hook.cmdline_tool_annotations(tool=tool)
        tool.annotations = [a for r in results for a in r]
    def watch(self, client, no_output=False):
        """Watch a Renku repository for changes to detect outputs."""
        client.check_external_storage()

        repo = client.repo

        # Remove indirect files list if any
        self.delete_indirect_files_list()

        from renku.core.plugins.pluginmanager import get_plugin_manager
        pm = get_plugin_manager()
        pm.hook.pre_run(tool=self)
        self.existing_directories = {
            str(p.relative_to(client.path))
            for p in client.path.glob('**/')
        }

        yield self

        if repo:
            # Include indirect inputs and outputs before further processing
            self.add_indirect_inputs()
            self.add_indirect_outputs()
            # Remove indirect files list if any
            self.delete_indirect_files_list()

            # List of all output paths.
            paths = []

            inputs = {input.id: input for input in self.inputs}
            outputs = list(self.outputs)

            # Keep track of unmodified output files.
            unmodified = set()

            # Calculate possible output paths.
            # Capture newly created files through redirects.
            candidates = {file_ for file_ in repo.untracked_files}

            # Capture modified files through redirects.
            candidates |= {
                o.a_path
                for o in repo.index.diff(None) if not o.deleted_file
            }

            from renku.core.commands.graph import _safe_path
            candidates = {path for path in candidates if _safe_path(path)}

            for output, input, path in self.guess_outputs(candidates):
                outputs.append(output)
                paths.append(path)

                if input is not None:
                    if input.id not in inputs:  # pragma: no cover
                        raise RuntimeError('Inconsistent input name.')

                    inputs[input.id] = input

            for stream_name in ('stdout', 'stderr'):
                stream = getattr(self, stream_name)
                if (
                    stream and stream not in candidates and
                    Path(os.path.abspath(stream)) not in self.explicit_outputs
                ):
                    unmodified.add(stream)
                elif stream:
                    paths.append(stream)

            if self.explicit_outputs:
                last_output_id = len(outputs)

                for output, input, path in self.find_explicit_outputs(
                    last_output_id
                ):
                    outputs.append(output)
                    paths.append(path)

                    if input is not None:
                        if input.id not in inputs:  # pragma: no cover
                            raise RuntimeError('Inconsistent input name.')

                        inputs[input.id] = input
                    # remove outputs covered by explicit outputs
                    output_glob = output.outputBinding.glob
                    for input_id, input in inputs.items():
                        if (
                            input.type == 'string' and
                            input.default.startswith(output_glob)
                        ):
                            input_glob = '$(inputs.{})'.format(input_id)
                            existing_output = next(
                                o for o in outputs
                                if o.outputBinding.glob == input_glob
                            )
                            if existing_output:
                                outputs.remove(existing_output)

            if unmodified:
                raise errors.UnmodifiedOutputs(repo, unmodified)

            if not no_output and not paths:
                raise errors.OutputsNotFound(repo, inputs.values())

            if client.check_external_storage():
                lfs_paths = client.track_paths_in_storage(*paths)

                show_message = client.get_value('renku', 'show_lfs_message')
                if (
                    lfs_paths and
                    (show_message is None or show_message == 'True')
                ):
                    self.messages = (
                        INFO + 'Adding these files to Git LFS:\n' +
                        '\t{}'.format('\n\t'.join(lfs_paths)) +
                        '\nTo disable this message in the future, run:' +
                        '\n\trenku config show_lfs_message False'
                    )

            repo.git.add(*paths)

            if repo.is_dirty():
                commit_msg = ('renku run: '
                              'committing {} newly added files').format(
                                  len(paths)
                              )

                committer = Actor('renku {0}'.format(__version__), version_url)

                repo.index.commit(
                    commit_msg,
                    committer=committer,
                    skip_hooks=True,
                )

                self._had_changes = True

            self.inputs = list(inputs.values())
            self.outputs = outputs

        results = pm.hook.cmdline_tool_annotations(tool=self)
        self.annotations = [a for r in results for a in r]
Пример #5
0
    def watch(self, client, no_output=False):
        """Watch a Renku repository for changes to detect outputs."""
        client.check_external_storage()

        repo = client.repo

        # Remove indirect files list if any
        self.delete_indirect_files_list()

        from renku.core.plugins.pluginmanager import get_plugin_manager

        pm = get_plugin_manager()
        pm.hook.pre_run(tool=self)
        self.existing_directories = {
            str(p.relative_to(client.path))
            for p in client.path.glob("**/")
        }

        yield self

        if repo:
            # Include indirect inputs and outputs before further processing
            self.add_indirect_inputs()
            self.add_indirect_outputs()
            # Remove indirect files list if any
            self.delete_indirect_files_list()

            # List of all output paths.
            output_paths = []

            inputs = {input.id: input for input in self.inputs}
            outputs = list(self.outputs)

            # Keep track of unmodified output files.
            unmodified = set()

            candidates = set()

            if not self.no_output_detection:
                # Calculate possible output paths.
                # Capture newly created files through redirects.
                candidates |= {file_ for file_ in repo.untracked_files}

                # Capture modified files through redirects.
                candidates |= {
                    o.a_path
                    for o in repo.index.diff(None) if not o.deleted_file
                }

            # Include explicit outputs
            candidates |= {
                str(path.relative_to(self.working_dir))
                for path in self.explicit_outputs
            }

            from renku.core.commands.graph import _safe_path

            candidates = {path for path in candidates if _safe_path(path)}

            for output, input, path in self.guess_outputs(candidates):
                outputs.append(output)
                output_paths.append(path)

                if input is not None:
                    if input.id not in inputs:  # pragma: no cover
                        raise RuntimeError("Inconsistent input name.")

                    inputs[input.id] = input

            for stream_name in ("stdout", "stderr"):
                stream = getattr(self, stream_name)
                if stream and stream not in candidates and Path(
                        os.path.abspath(stream)) not in self.explicit_outputs:
                    unmodified.add(stream)
                elif stream:
                    output_paths.append(stream)

            if unmodified:
                raise errors.UnmodifiedOutputs(repo, unmodified)

            if not no_output and not output_paths:
                raise errors.OutputsNotFound(repo, inputs.values())

            if client.check_external_storage():
                lfs_paths = client.track_paths_in_storage(*output_paths)

                show_message = client.get_value("renku", "show_lfs_message")
                if lfs_paths and (show_message is None
                                  or show_message == "True"):
                    self.messages = (
                        INFO + "Adding these files to Git LFS:\n" +
                        "\t{}".format("\n\t".join(lfs_paths)) +
                        "\nTo disable this message in the future, run:" +
                        "\n\trenku config show_lfs_message False")

            repo.git.add(*output_paths)

            if repo.is_dirty():
                commit_msg = ("renku run: "
                              "committing {} newly added files").format(
                                  len(output_paths))

                committer = Actor("renku {0}".format(__version__), version_url)

                repo.index.commit(
                    commit_msg,
                    committer=committer,
                    skip_hooks=True,
                )

                self._had_changes = True

            self.inputs = list(inputs.values())
            self.outputs = outputs

        results = pm.hook.cmdline_tool_annotations(tool=self)
        self.annotations = [a for r in results for a in r]