Пример #1
0
def inputs(ctx, client, revision, paths):
    r"""Show inputs files in the repository.

    <PATHS>    Files to show. If no files are given all input files are shown.
    """
    from renku.core.models.provenance.activities import ProcessRun

    graph = Graph(client)
    paths = set(paths)
    nodes = graph.build(revision=revision)

    commits = {node.commit for node in nodes}
    candidates = {(node.commit, node.path)
                  for node in nodes if not paths or node.path in paths}

    input_paths = set()

    for commit in commits:
        activity = graph.activities[commit]

        if isinstance(activity, ProcessRun):
            for usage in activity.qualified_usage:
                for entity in usage.entity.entities:
                    path = str((usage.client.path / entity.path).relative_to(
                        client.path
                    ))
                    usage_key = (entity.commit, entity.path)

                    if path not in input_paths and usage_key in candidates:
                        input_paths.add(path)

    click.echo('\n'.join(graph._format_path(path) for path in input_paths))
    ctx.exit(0 if not paths or len(input_paths) == len(paths) else 1)
Пример #2
0
def siblings(client, revision, paths):
    """Show siblings for given paths."""
    graph = Graph(client)
    nodes = graph.build(paths=paths, revision=revision)
    siblings_ = set(nodes)
    for node in nodes:
        siblings_ |= graph.siblings(node)

    paths = {node.path for node in siblings_}
    for path in paths:
        click.echo(graph._format_path(path))
Пример #3
0
def create(client, output_file, revision, paths):
    """Create a workflow description for a file."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    output_file.write(
        yaml.dump(ascwl(
            graph.ascwl(outputs=outputs),
            filter=lambda _, x: x is not None and x != [],
            basedir=os.path.dirname(getattr(output_file, 'name', '.')) or '.',
        ),
                  default_flow_style=False))
Пример #4
0
def rerun(client, revision, roots, siblings, inputs, paths):
    """Recreate files generated by a sequence of ``run`` commands."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs}

    # Normalize and check all starting paths.
    roots = {graph.normalize_path(root) for root in roots}
    assert not roots & output_paths, '--from colides with output paths'

    # Generate workflow and check inputs.
    # NOTE The workflow creation is done before opening a new file.
    workflow = inputs(
        client,
        graph.ascwl(
            input_paths=roots,
            output_paths=output_paths,
            outputs=outputs,
        ))

    # Don't compute paths if storage is disabled.
    if client.has_external_storage:
        # Make sure all inputs are pulled from a storage.
        paths_ = (
            path
            for _, path in workflow.iter_input_files(client.workflow_path))
        client.pull_paths_from_storage(*paths_)

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    with output_file.open('w') as f:
        f.write(
            yaml.dump(ascwl(
                workflow,
                filter=lambda _, x: x is not None,
                basedir=client.workflow_path,
            ),
                      default_flow_style=False))

    # Execute the workflow and relocate all output files.
    # FIXME get new output paths for edited tools
    # output_paths = {path for _, path in workflow.iter_output_files()}
    execute(
        client,
        output_file,
        output_paths=output_paths,
    )
Пример #5
0
def update(client, revision, no_output, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths)
    outputs = {node for node in outputs if graph.need_update(node)}
    if not outputs:
        click.secho("All files were generated from the latest inputs.", fg="green")
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs if _safe_path(node.path)}

    # Get all clean nodes.
    input_paths = {node.path for node in graph.nodes} - output_paths

    # Store the generated workflow used for updating paths.
    workflow = graph.as_workflow(input_paths=input_paths, output_paths=output_paths, outputs=outputs,)

    wf, path = CWLConverter.convert(workflow, client)
    # Don't compute paths if storage is disabled.
    if client.check_external_storage():
        # Make sure all inputs are pulled from a storage.
        paths_ = (i.consumes.path for i in workflow.inputs)
        client.pull_paths_from_storage(*paths_)

    execute(client, path, output_paths=output_paths)

    paths = [o.produces.path for o in workflow.outputs]

    client.repo.git.add(*paths)

    if client.repo.is_dirty():
        commit_msg = "renku update: committing {} newly added files".format(len(paths))

        committer = Actor("renku {0}".format(__version__), version_url)

        client.repo.index.commit(
            commit_msg, committer=committer, skip_hooks=True,
        )

    workflow_name = "{0}_update.yaml".format(uuid.uuid4().hex)

    path = client.workflow_path / workflow_name

    workflow.update_id_and_label_from_commit_path(client, client.repo.head.commit, path)

    with with_reference(path):
        cls = WorkflowRun if workflow.subprocesses else ProcessRun
        run = cls.from_run(run=workflow, client=client, path=path, update_commits=True)
        run.to_yaml()
        client.add_to_activity_index(run)
Пример #6
0
def create(client, output_file, revision, paths):
    """Create a workflow description for a file."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    workflow = graph.as_workflow(outputs=outputs, )

    if output_file:
        output_file = Path(output_file)

    wf, path = CWLConverter.convert(workflow, client, path=output_file)

    if not output_file:
        click.echo(wf.export_string())
Пример #7
0
def update(client, revision, no_output, siblings, paths):
    """Update existing files by rerunning their outdated workflow."""
    graph = Graph(client)
    outputs = graph.build(revision=revision, can_be_cwl=no_output, paths=paths)
    outputs = {node for node in outputs if graph.need_update(node)}

    if not outputs:
        click.secho('All files were generated from the latest inputs.',
                    fg='green')
        sys.exit(0)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs if _safe_path(node.path)}

    # Get all clean nodes.
    input_paths = {node.path for node in graph.nodes} - output_paths

    # Store the generated workflow used for updating paths.
    import yaml

    output_file = client.workflow_path / '{0}.cwl'.format(uuid.uuid4().hex)
    workflow = graph.ascwl(
        input_paths=input_paths,
        output_paths=output_paths,
        outputs=outputs,
    )

    # Don't compute paths if storage is disabled.
    if client.has_external_storage:
        # Make sure all inputs are pulled from a storage.
        paths_ = (
            path
            for _, path in workflow.iter_input_files(client.workflow_path))
        client.pull_paths_from_storage(*paths_)

    with output_file.open('w') as f:
        f.write(
            yaml.dump(ascwl(
                workflow,
                filter=lambda _, x: x is not None,
                basedir=client.workflow_path,
            ),
                      default_flow_style=False))

    execute(client, output_file, output_paths=output_paths)
Пример #8
0
def log(client, revision, format, no_output, strict, paths):
    """Show logs for a file."""
    graph = Graph(client)
    if not paths:
        start, is_range, stop = revision.partition('..')
        if not is_range:
            stop = start
        elif not stop:
            stop = 'HEAD'

        commit = client.repo.rev_parse(stop)
        paths = (
            str(client.path / item.a_path)
            for item in commit.diff(commit.parents or NULL_TREE)
            # if not item.deleted_file
        )

    # NOTE shall we warn when "not no_output and not paths"?
    graph.build(paths=paths, revision=revision, can_be_cwl=no_output)
    FORMATS[format](graph, strict=strict)
Пример #9
0
def _graph(client, revision, paths):
    if PG_AVAILABLE:
        provenance_graph = ProvenanceGraph.from_json(
            client.provenance_graph_path)
        provenance_graph.custom_bindings = [
            ("mls", "http://www.w3.org/ns/mls#"),
            ("oa", "http://www.w3.org/ns/oa#"),
            ("xsd", "http://www.w3.org/2001/XMLSchema#")
        ]
        return provenance_graph

    renku_graph = Graph(client)
    renku_graph.build(paths=paths, revision=revision)
    cg = _conjunctive_graph(renku_graph)

    cg.bind("mls", "http://www.w3.org/ns/mls#")
    cg.bind("prov", "http://www.w3.org/ns/prov#")
    cg.bind("oa", "http://www.w3.org/ns/oa#")
    cg.bind("schema", "http://schema.org/")
    cg.bind("xsd", "http://www.w3.org/2001/XMLSchema#")
    return cg
Пример #10
0
def outputs(ctx, client, revision, paths):
    r"""Show output files in the repository.

    <PATHS>    Files to show. If no files are given all output files are shown.
    """
    graph = Graph(client)
    filter = graph.build(paths=paths, revision=revision)
    output_paths = graph.output_paths

    click.echo('\n'.join(graph._format_path(path) for path in output_paths))

    if paths:
        if not output_paths:
            ctx.exit(1)

        from renku.core.models.datastructures import DirectoryTree
        tree = DirectoryTree.from_list(item.path for item in filter)

        for output in output_paths:
            if tree.get(output) is None:
                ctx.exit(1)
                return
Пример #11
0
def outputs(ctx, client, revision, verbose, paths):
    r"""Show output files in the repository.

    <PATHS>    Files to show. If no files are given all output files are shown.
    """
    graph = Graph(client)
    filter_ = graph.build(paths=paths, revision=revision)
    output_paths = {}

    for activity in graph.activities.values():
        if isinstance(activity, ProcessRun):
            for entity in activity.generated:
                if entity.path not in graph.output_paths:
                    continue
                output_paths[entity.path] = Result(
                    path=entity.path, commit=entity.commit, time=activity.ended_at_time, workflow=activity.path
                )

    if not verbose:
        click.echo("\n".join(graph._format_path(path) for path in output_paths.keys()))
    else:
        records = list(output_paths.values())
        records.sort(key=lambda v: v[0])
        HEADERS["time"] = "generation time"
        click.echo(tabulate(collection=records, headers=HEADERS))

    if paths:
        if not output_paths:
            ctx.exit(1)

        from renku.core.models.datastructures import DirectoryTree

        tree = DirectoryTree.from_list(item.path for item in filter_)

        for output in output_paths:
            if tree.get(output) is None:
                ctx.exit(1)
                return
Пример #12
0
def inputs(ctx, client, revision, verbose, paths):
    r"""Show inputs files in the repository.

    <PATHS>    Files to show. If no files are given all input files are shown.
    """
    graph = Graph(client)
    paths = set(paths)
    nodes = graph.build(revision=revision)
    commits = {node.activity.commit if hasattr(node, "activity") else node.commit for node in nodes}
    commits |= {node.activity.commit for node in nodes if hasattr(node, "activity")}
    candidates = {(node.commit, node.path) for node in nodes if not paths or node.path in paths}

    input_paths = {}

    for commit in commits:
        activity = graph.activities.get(commit)
        if not activity:
            continue

        if isinstance(activity, ProcessRun):
            for usage in activity.qualified_usage:
                for entity in usage.entity.entities:
                    path = str((usage.client.path / entity.path).relative_to(client.path))
                    usage_key = (entity.commit, entity.path)

                    if path not in input_paths and usage_key in candidates:
                        input_paths[path] = Result(
                            path=path, commit=entity.commit, time=activity.started_at_time, workflow=activity.path
                        )

    if not verbose:
        click.echo("\n".join(graph._format_path(path) for path in input_paths))
    else:
        records = list(input_paths.values())
        records.sort(key=lambda v: v[0])
        HEADERS["time"] = "usage time"
        click.echo(tabulate(collection=records, headers=HEADERS))
    ctx.exit(0 if not paths or len(input_paths) == len(paths) else 1)
Пример #13
0
def siblings(client, revision, flat, verbose, paths):
    """Show siblings for given paths."""
    graph = Graph(client)
    nodes = graph.build(paths=paths, revision=revision)
    nodes = [n for n in nodes if not isinstance(n, Entity) or n.parent]

    sibling_sets = {frozenset([n]) for n in set(nodes)}
    for node in nodes:
        try:
            sibling_sets.add(frozenset(graph.siblings(node)))
        except (errors.InvalidOutputPath):
            # ignore nodes that aren't outputs if no path was supplied
            if paths:
                raise
            else:
                sibling_sets.discard({node})

    result_sets = []
    for candidate in sibling_sets:
        new_result = []

        for result in result_sets:
            if candidate & result:
                candidate |= result
            else:
                new_result.append(result)

        result_sets = new_result
        result_sets.append(candidate)

    result = [[sibling_name(graph, node, verbose) for node in r]
              for r in result_sets]

    if flat:
        click.echo('\n'.join({n for r in result for n in r}))
    else:
        click.echo('\n---\n'.join('\n'.join(r) for r in result))
Пример #14
0
def rerun(client, revision, roots, siblings, inputs, paths):
    """Recreate files generated by a sequence of ``run`` commands."""
    graph = Graph(client)
    outputs = graph.build(paths=paths, revision=revision)

    # Check or extend siblings of outputs.
    outputs = siblings(graph, outputs)
    output_paths = {node.path for node in outputs}

    # Normalize and check all starting paths.
    roots = {graph.normalize_path(root) for root in roots}
    output_paths -= roots
    outputs = [o for o in outputs if o.path not in roots]

    # Generate workflow and check inputs.
    # NOTE The workflow creation is done before opening a new file.
    workflow = inputs(
        client,
        graph.as_workflow(
            input_paths=roots,
            output_paths=output_paths,
            outputs=outputs,
        )
    )

    wf, path = CWLConverter.convert(workflow, client)

    # Don't compute paths if storage is disabled.
    if client.check_external_storage():
        # Make sure all inputs are pulled from a storage.
        paths_ = (i.consumes.path for i in workflow.inputs)
        client.pull_paths_from_storage(*paths_)

    # Execute the workflow and relocate all output files.
    # FIXME get new output paths for edited tools
    # output_paths = {path for _, path in workflow.iter_output_files()}
    execute(
        client,
        path,
        output_paths=output_paths,
    )

    paths = [o.produces.path for o in workflow.outputs]

    client.repo.git.add(*paths)

    if client.repo.is_dirty():
        commit_msg = ('renku rerun: '
                      'committing {} newly added files').format(len(paths))

        committer = Actor('renku {0}'.format(__version__), version_url)

        client.repo.index.commit(
            commit_msg,
            committer=committer,
            skip_hooks=True,
        )

    workflow_name = '{0}_rerun.yaml'.format(uuid.uuid4().hex)

    path = client.workflow_path / workflow_name

    workflow.update_id_and_label_from_commit_path(
        client, client.repo.head.commit, path
    )

    with with_reference(path):
        run = WorkflowRun.from_run(workflow, client, path)
        run.to_yaml()
        client.add_to_activity_index(run)
Пример #15
0
def status(ctx, client, revision, no_output, path):
    """Show a status of the repository."""
    graph = Graph(client)
    # TODO filter only paths = {graph.normalize_path(p) for p in path}
    status = graph.build_status(revision=revision, can_be_cwl=no_output)

    if client.has_external_files():
        click.echo(
            'Changes in external files are not detected automatically. To '
            'update external files run "renku dataset update -e".')

    try:
        click.echo('On branch {0}'.format(client.repo.active_branch))
    except TypeError:
        click.echo('Git HEAD is detached!\n'
                   ' Please move back to your working branch to use renku\n')
    if status['outdated']:
        click.echo('Files generated from newer inputs:\n'
                   '  (use "renku log [<file>...]" to see the full lineage)\n'
                   '  (use "renku update [<file>...]" to '
                   'generate the file from its latest inputs)\n')

        for filepath, stts in sorted(status['outdated'].items()):
            outdated = (', '.join('{0}#{1}'.format(
                click.style(graph._format_path(n.path), fg='blue', bold=True),
                _format_sha1(graph, n),
            ) for n in stts if n.path and n.path not in status['outdated']))

            click.echo('\t{0}: {1}'.format(
                click.style(graph._format_path(filepath), fg='red', bold=True),
                outdated))

        click.echo()

    else:
        click.secho('All files were generated from the latest inputs.',
                    fg='green')

    if status['multiple-versions']:
        click.echo(
            'Input files used in different versions:\n'
            '  (use "renku log --revision <sha1> <file>" to see a lineage '
            'for the given revision)\n')

        for filepath, files in sorted(status['multiple-versions'].items()):
            # Do not show duplicated commits!  (see #387)
            commits = {_format_sha1(graph, key) for key in files}
            click.echo('\t{0}: {1}'.format(
                click.style(graph._format_path(filepath), fg='blue',
                            bold=True),
                ', '.join(
                    # Sort the commit hashes alphanumerically to have a
                    # predictable output.
                    sorted(commits))))

        click.echo()

    if status['deleted']:
        click.echo('Deleted files used to generate outputs:\n'
                   '  (use "git show <sha1>:<file>" to see the file content '
                   'for the given revision)\n')

        for filepath, node in status['deleted'].items():
            click.echo('\t{0}: {1}'.format(
                click.style(graph._format_path(filepath), fg='blue',
                            bold=True), _format_sha1(graph, node)))

        click.echo()

    ctx.exit(1 if status['outdated'] else 0)