Exemplo n.º 1
0
class CommitMixin:
    """Represent a commit mixin."""

    commit = attr.ib(kw_only=True)
    client = attr.ib(kw_only=True)
    submodules = attr.ib(default=attr.Factory(list), kw_only=True)
    path = attr.ib(default=None, kw_only=True)

    _id = jsonld.ib(context='@id', kw_only=True)
    _label = jsonld.ib(context='rdfs:label', kw_only=True)
    _location = jsonld.ib(context='prov:atLocation', init=False, kw_only=True)

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        return 'blob/{self.commit.hexsha}/{self.path}'.format(self=self)

    @_label.default
    def default_label(self):
        """Generate a default label."""
        return '{self.path}@{self.commit.hexsha}'.format(self=self)

    @_location.default
    def default_location(self):
        """Generate a default location."""
        return self.client.project
Exemplo n.º 2
0
class Person:
    """Represent a person."""

    name = jsonld.ib(context='rdfs:label')
    email = jsonld.ib(context={
        '@type': '@id',
        '@id': 'schema:email',
    })

    _id = jsonld.ib(context='@id', init=False, kw_only=True)

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        return self.email

    @email.validator
    def check_email(self, attribute, value):
        """Check that the email is valid."""
        if not (isinstance(value, str) and re.match(r'[^@]+@[^@]+', value)):
            raise ValueError('Email address "{0}" is invalid.'.format(value))

    @classmethod
    def from_commit(cls, commit):
        """Create an instance from a Git commit."""
        return cls(
            name=commit.author.name,
            email='mailto:{0}'.format(commit.author.email),
        )
Exemplo n.º 3
0
class Generation(EntityProxyMixin):
    """Represent an act of generating a file."""

    entity = jsonld.ib(context={
        '@reverse': 'prov:qualifiedGeneration',
    }, )
    role = jsonld.ib(context='prov:hadRole', default=None)

    _activity = attr.ib(
        default=None,
        kw_only=True,
        converter=lambda value: weakref.ref(value)
        if value is not None else None,
    )
    _id = jsonld.ib(context='@id', kw_only=True)

    @property
    def activity(self):
        """Return the activity object."""
        return self._activity() if self._activity is not None else None

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        if self.role:
            return '{self.activity._id}/outputs/{self.role}'.format(
                self=self, )
        return '{self.activity._id}/tree/{self.entity.path}'.format(
            self=self, )
Exemplo n.º 4
0
class Association:
    """Assign responsibility to an agent for an activity."""

    plan = jsonld.ib(context='prov:hadPlan')
    agent = jsonld.ib(context='prov:agent', default=None)

    _id = jsonld.ib(context='@id', kw_only=True)

    @classmethod
    def from_activity(cls, activity):
        """Create an instance from the activity."""
        from .agents import SoftwareAgent

        agent = SoftwareAgent.from_commit(activity.commit)
        return cls(
            plan=activity.__association_cls__(
                commit=activity.commit,
                client=activity.client,
                submodules=activity.submodules,
                path=activity.path,
                activity=activity,
            ),
            agent=agent,
            id=activity._id + '/association',  # add plan and agent
        )
Exemplo n.º 5
0
class Usage(EntityProxyMixin):
    """Represent a dependent path."""

    entity = jsonld.ib(context='prov:entity', kw_only=True)
    role = jsonld.ib(context='prov:hadRole', default=None, kw_only=True)

    _id = jsonld.ib(context='@id', default=None, kw_only=True)

    @classmethod
    def from_revision(cls,
                      client,
                      path,
                      submodules=None,
                      revision='HEAD',
                      **kwargs):
        """Return dependency from given path and revision."""
        from .entities import Entity

        return cls(entity=Entity(
            client=client,
            commit=client.find_previous_commit(path, revision=revision),
            submodules=submodules or [],
            path=path,
        ),
                   **kwargs)

    @property
    def parents(self):
        """Return parent nodes."""
        # TODO connect files to an input directory
        return []  # pragma: no cover
Exemplo n.º 6
0
class CommitMixin:
    """Represent a commit mixin."""

    commit = attr.ib(default=None, kw_only=True)
    client = attr.ib(default=None, kw_only=True)
    path = jsonld.ib(context='prov:atLocation',
                     default=None,
                     kw_only=True,
                     converter=_str_or_none)

    _id = jsonld.ib(context='@id', kw_only=True)
    _label = jsonld.ib(context='rdfs:label', kw_only=True)
    _project = jsonld.ib(context='schema:isPartOf', kw_only=True)

    @property
    def submodules(self):
        """Proxy to client submodules."""
        return self.client.submodules

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        if self.commit:
            hexsha = self.commit.hexsha
        else:
            hexsha = 'UNCOMMITTED'
        return 'blob/{hexsha}/{self.path}'.format(hexsha=hexsha, self=self)

    @_label.default
    def default_label(self):
        """Generate a default label."""
        if self.commit:
            hexsha = self.commit.hexsha
        else:
            hexsha = 'UNCOMMITTED'
        if self.path:
            return '{self.path}@{hexsha}'.format(hexsha=hexsha, self=self)
        return '{hexsha}'.format(hexsha=hexsha, self=self)

    @_project.default
    def default_project(self):
        """Generate a default location."""
        if self.client:
            return self.client.project

    def __attrs_post_init__(self):
        """Post-init hook."""
        if self.path:
            path = Path(self.path)
            if path.is_absolute():
                self.path = str(path.relative_to(self.client.path))
Exemplo n.º 7
0
class Usage(EntityProxyMixin):
    """Represent a dependent path."""

    entity = jsonld.ib(context='prov:entity', kw_only=True)
    role = jsonld.ib(context='prov:hadRole', default=None, kw_only=True)

    _id = jsonld.ib(context='@id', default=None, kw_only=True)

    @classmethod
    def from_revision(cls, client, path, revision='HEAD', **kwargs):
        """Return dependency from given path and revision."""
        from .entities import Entity

        return cls(entity=Entity.from_revision(client, path, revision),
                   **kwargs)
Exemplo n.º 8
0
class Collection(Entity):
    """Represent a directory with files."""

    members = jsonld.ib(context='prov:hadMember', kw_only=True)

    @members.default
    def default_members(self):
        """Generate default members as entities from current path."""
        dir_path = self.client.path / self.path
        assert dir_path.is_dir()

        members = []
        for path in dir_path.iterdir():
            if path.name == '.gitkeep':
                continue  # ignore empty directories in Git repository
            cls = Collection if path.is_dir() else Entity
            members.append(
                cls(
                    commit=self.commit,
                    client=self.client,
                    path=str(path.relative_to(self.client.path)),
                    parent=self,
                )
            )
        return members

    @property
    def entities(self):
        """Recursively return all files."""
        for member in self.members:
            yield from member.entities
        yield self
Exemplo n.º 9
0
class Workflow(Process):
    """Represent workflow with subprocesses."""

    subprocesses = jsonld.ib(context='wfdesc:hasSubProcess', kw_only=True)

    @subprocesses.default
    def default_subprocesses(self):
        """Load subprocesses."""
        return [
            subprocess.association.plan
            for subprocess in self.activity.subprocesses.values()
        ]
Exemplo n.º 10
0
class Process(CommitMixin):
    """Represent a process."""

    _activity = jsonld.ib(
        context='prov:activity',
        kw_only=True,
        converter=weakref.ref,
    )

    @property
    def activity(self):
        """Return the activity object."""
        return self._activity()
Exemplo n.º 11
0
class SoftwareAgent:
    """Represent a person."""

    label = jsonld.ib(context='rdfs:label', kw_only=True)
    was_started_by = jsonld.ib(
        context='prov:wasStartedBy',
        default=None,
        kw_only=True,
    )

    _id = jsonld.ib(context='@id', kw_only=True)

    @classmethod
    def from_commit(cls, commit):
        """Create an instance from a Git commit."""
        author = Person.from_commit(commit)
        if commit.author != commit.committer:
            return cls(
                label=commit.committer.name,
                id=commit.committer.email,
                was_started_by=author,
            )
        return author
Exemplo n.º 12
0
class CommitMixin:
    """Represent a commit mixin."""

    commit = attr.ib(default=None, kw_only=True)
    client = attr.ib(default=None, kw_only=True)
    path = jsonld.ib(
        context='prov:atLocation',
        default=None,
        kw_only=True,
        converter=_str_or_none
    )

    _id = jsonld.ib(context='@id', kw_only=True)
    _label = jsonld.ib(context='rdfs:label', kw_only=True)
    _project = jsonld.ib(context='dcterms:isPartOf', kw_only=True)

    @property
    def submodules(self):
        """Proxy to client submodules."""
        return self.client.submodules

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        return 'blob/{self.commit.hexsha}/{self.path}'.format(self=self)

    @_label.default
    def default_label(self):
        """Generate a default label."""
        if self.path:
            return '{self.path}@{self.commit.hexsha}'.format(self=self)
        return '{self.commit.hexsha}'.format(self=self)

    @_project.default
    def default_project(self):
        """Generate a default location."""
        return self.client.project
Exemplo n.º 13
0
class WorkflowRun(ProcessRun):
    """A workflow run typically contains several subprocesses."""

    __association_cls__ = Workflow

    # @reverse wfprov:wasPartOfWorkflowRun

    children = attr.ib(kw_only=True)

    _processes = jsonld.ib(
        context={
            '@reverse': 'wfprov:wasPartOfWorkflowRun',
        },
        default=attr.Factory(list),
        kw_only=True,
    )
    subprocesses = attr.ib(kw_only=True)

    outputs = attr.ib(kw_only=True)
    generated = jsonld.ib(
        context={
            '@reverse': 'prov:activity',
        },
        kw_only=True,
        hash=False,
    )

    @children.default
    def default_children(self):
        """Load children from process."""
        import yaml

        basedir = os.path.dirname(self.path) if self.path is not None else None

        def _load(step):
            """Load step definition."""
            if isinstance(step.run, WORKFLOW_STEP_RUN_TYPES):
                return step.run

            if self.commit:
                data = (self.commit.tree / basedir /
                        step.run).data_stream.read()
            else:
                with step.run.open('r') as f:
                    data = f.read()
            return CWLClass.from_cwl(yaml.load(data))

        return {step.id: _load(step) for step in self.process.steps}

    @subprocesses.default
    def default_subprocesses(self):
        """Load subprocesses."""
        basedir = os.path.dirname(self.path)
        revision = '{0}^'.format(self.commit)

        ins = {
            dependency.role: dependency
            for path, dependency in self.inputs.items()
            if isinstance(dependency, Usage)
        }

        entities = {}
        outs = {}
        subprocesses = {}

        for step in reversed(self.process.topological_steps):
            if isinstance(step.run, WORKFLOW_STEP_RUN_TYPES):
                path = None
                process = step.run
            else:
                path = os.path.join(basedir, step.run)
                process = self.children[step.id]

            subprocess_id = self._id + '/steps/' + step.id

            inputs = {}
            for alias, source in step.in_.items():
                usage_id = subprocess_id + '/inputs/' + alias
                if source in ins:
                    dependency = ins[source]
                    inputs[dependency.path] = attr.evolve(
                        dependency,
                        role=alias,
                        id=usage_id,
                    )
                elif source in outs:
                    input_path = outs[source]
                    inputs[input_path] = Usage(
                        entity=entities[input_path],
                        role=alias,
                        id=usage_id,
                    )
                else:
                    # TODO check that it is not Path or Directory
                    pass

            subprocess_entity_commit = self.client.find_previous_commit(
                path, revision=revision)
            subprocess = process.create_run(
                commit=self.commit,
                client=self.client,
                part_of=self,
                process=process,
                path=path,
                inputs=inputs,
                id=subprocess_id,
            )

            subprocess.association = Association.from_activity(
                subprocess,
                commit=subprocess_entity_commit,
            )

            for output_path, source in subprocess.outputs.items():
                outs.setdefault(step.id + '/' + source, output_path)

            for generation in subprocess.generated:
                entity = generation.entity
                entities[entity.path] = entity

                if isinstance(entity, Collection):
                    entities.update(
                        **{member.path: member
                           for member in entity.members})

            subprocesses[step.id] = subprocess
            self._processes.append(subprocess)

        return subprocesses

    def iter_output_files(self, commit=None):
        """Yield tuples with output id and path."""
        commit = commit or self.commit

        tools = self.default_children()
        setattr(self, 'children', tools)

        for output in self.process.outputs:
            if output.type not in PATH_OBJECTS:
                continue

            if output.outputSource:
                step_id, _, source = output.outputSource.partition('/')
                subprocess = self.subprocesses[step_id]
                for glob, output_id in subprocess.outputs.items():
                    if output.id == output_id:
                        yield output.id, glob
                        break
            elif output.outputBinding:
                glob = output.outputBinding.glob
                # TODO better support for Expression
                if glob.startswith('$(inputs.'):
                    input_id = glob[len('$(inputs.'):-1]
                    for input_ in self.inputs:
                        if input_.id == input_id:
                            yield output.id, input_.default
                else:
                    yield output.id, glob

    @outputs.default
    def default_outputs(self):
        """Guess default outputs from a workflow."""
        return super().default_outputs()

    @generated.default
    def default_generated(self):
        """Calculate default values."""
        results = []
        for output in self.process.outputs:
            step_id, _, source = output.outputSource.partition('/')
            assert step_id in self.children

            for generated in self.subprocesses[step_id].generated:
                if generated.role == source:
                    results.append(
                        attr.evolve(
                            generated,
                            role=output.id,
                            activity=self,
                        ))
                    break
            else:
                raise KeyError(output)

        return results

    @property
    def nodes(self):
        """Yield all graph nodes."""
        for subprocess in reversed(self._processes):
            if subprocess.path is None:
                # skip nodes connecting directory to file
                continue
            yield from subprocess.nodes
Exemplo n.º 14
0
class ProcessRun(Activity):
    """A process run is a particular execution of a Process description."""

    __association_cls__ = Process

    inputs = attr.ib(kw_only=True)
    outputs = attr.ib(kw_only=True)

    generated = jsonld.ib(
        context={
            '@reverse': 'prov:activity',
        },
        kw_only=True,
        hash=False,
    )

    association = jsonld.ib(
        context='prov:qualifiedAssociation',
        init=False,
        kw_only=True,
    )

    qualified_usage = jsonld.ib(context='prov:qualifiedUsage', kw_only=True)

    @generated.default
    def default_generated(self):
        """Calculate default values."""
        if self.part_of is not None:
            entities = {
                generation.entity.path: generation.entity
                for generation in self.part_of.generated
            }
        else:
            entities = {}

        return [
            Generation(
                activity=self,
                entity=entities.get(
                    path,
                    Entity(
                        commit=self.commit,
                        client=self.client,
                        submodules=self.submodules,
                        path=path,
                        parent=self,
                    ),
                ),
                role=role,
            ) for path, role in self.outputs.items()
        ]

    def __attrs_post_init__(self):
        """Calculate properties."""
        self.association = Association.from_activity(self)

    @inputs.default
    def default_inputs(self):
        """Guess default inputs from a process."""
        basedir = os.path.dirname(self.path)
        commit = self.commit
        client = self.client
        process = self.process
        hierarchy = self.submodules

        inputs = {}
        revision = '{0}^'.format(commit)

        try:
            from git import Submodule

            submodules = [
                submodule
                for submodule in Submodule.iter_items(client.git,
                                                      parent_commit=commit)
            ]
        except (RuntimeError, ValueError):
            # There are no submodules assiciated with the given commit.
            submodules = []

        subclients = {
            submodule: LocalClient(
                path=(client.path / submodule.path).resolve(),
                parent=client,
            )
            for submodule in submodules
        }

        def resolve_submodules(file_, **kwargs):
            original_path = client.path / file_
            if original_path.is_symlink() or file_.startswith(
                    '.renku/vendors'):
                original_path = original_path.resolve()
                for submodule, subclient in subclients.items():
                    try:
                        subpath = original_path.relative_to(subclient.path)
                        return Usage.from_revision(client=subclient,
                                                   path=str(subpath),
                                                   revision=submodule.hexsha,
                                                   submodules=hierarchy +
                                                   [submodule.name],
                                                   **kwargs)
                    except ValueError:
                        pass

        for input_id, input_path in process.iter_input_files(basedir):
            try:
                usage_id = self._id + '/inputs/' + input_id
                dependency = resolve_submodules(
                    input_path,
                    role=input_id,
                    id=usage_id,
                )
                if dependency is None:
                    dependency = Usage.from_revision(
                        client=client,
                        path=input_path,
                        role=input_id,
                        revision=revision,
                        id=usage_id,
                    )
                inputs[input_path] = dependency
            except KeyError:
                continue

        return inputs

    @qualified_usage.default
    def default_qualified_usage(self):
        """Generate list of used artifacts."""
        return list(self.inputs.values())

    @outputs.default
    def default_outputs(self):
        """Guess default outputs from a process."""
        basedir = os.path.dirname(self.path)
        tree = DirectoryTree.from_list((path
                                        for path in super().default_outputs()
                                        if not self.client.is_cwl(path)))
        outputs = {}

        for output_id, output_path in self.process.iter_output_files(
                basedir, commit=self.commit):
            outputs[output_path] = output_id

            # Expand directory entries.
            for subpath in tree.get(output_path, []):
                outputs.setdefault(os.path.join(output_path, subpath),
                                   output_id)

        return outputs

    @property
    def nodes(self):
        """Return topologically sorted nodes."""
        yield from super().nodes
        yield self
        for node in self.inputs.values():
            if (node.client.path / node.path).is_dir():
                yield node

    @property
    def parents(self):
        """Return a list of parents."""
        return self.qualified_usage
Exemplo n.º 15
0
class WorkflowRun(ProcessRun):
    """A workflow run typically contains several subprocesses."""

    __association_cls__ = Workflow

    # @reverse wfprov:wasPartOfWorkflowRun

    children = attr.ib(init=False, kw_only=True)

    _processes = jsonld.ib(
        context={
            '@reverse': 'wfprov:wasPartOfWorkflowRun',
        },
        default=attr.Factory(list),
        kw_only=True,
    )
    subprocesses = attr.ib(init=False, kw_only=True)

    @children.default
    def default_children(self):
        """Load children from process."""
        return self.process._tools

    @subprocesses.default
    def default_subprocesses(self):
        """Load subprocesses."""
        basedir = os.path.dirname(self.path)
        revision = '{0}^'.format(self.commit)

        ins = {
            dependency.role: dependency
            for path, dependency in self.inputs.items()
        }
        entities = {
            generation.entity.path: generation.entity
            for generation in self.generated
        }
        outputs_ = {id_: path_ for path_, id_ in self.outputs.items()}
        outs = {
            output.outputSource: outputs_[output.id]
            for output in self.process.outputs
        }

        subprocesses = {}

        for step in reversed(self.process.topological_steps):
            path = os.path.join(basedir, step.run)
            process = self.children[step.id]
            subprocess_id = self._id + '/steps/' + step.id

            inputs = {}
            for alias, source in step.in_.items():
                usage_id = subprocess_id + '/inputs/' + alias
                if source in ins:
                    dependency = ins[source]
                    inputs[dependency.path] = attr.evolve(
                        dependency,
                        role=alias,
                        id=usage_id,
                    )
                elif source in outs:
                    input_path = outs[source]
                    inputs[input_path] = Usage(
                        entity=entities[input_path],
                        role=alias,
                        id=usage_id,
                    )
                else:
                    # TODO check that it is not Path or Directory
                    pass

            outputs = {}
            for source in step.out:
                output_source = step.id + '/' + source
                output_path = outs.get(output_source)
                if output_path:
                    outputs[output_path] = source

            subprocess = process.create_run(
                commit=self.client.find_previous_commit(path,
                                                        revision=revision),
                client=self.client,
                part_of=self,
                process=process,
                path=path,
                inputs=inputs,
                outputs=outputs,
                id=subprocess_id,
                submodules=self.submodules,
            )

            subprocesses[path] = (step, subprocess)
            self._processes.append(subprocess)

        return subprocesses

    @property
    def nodes(self):
        """Yield all graph nodes."""
        for subprocess in reversed(self._processes):
            yield from subprocess.nodes
Exemplo n.º 16
0
class Activity(CommitMixin):
    """Represent an activity in the repository."""

    _id = jsonld.ib(context='@id', kw_only=True)
    _message = jsonld.ib(context='rdfs:comment', kw_only=True)
    _was_informed_by = jsonld.ib(
        context='prov:wasInformedBy',
        kw_only=True,
    )

    part_of = attr.ib(default=None, kw_only=True)

    process = attr.ib(default=None, kw_only=True)
    outputs = attr.ib(kw_only=True)

    _collections = attr.ib(default=attr.Factory(OrderedDict),
                           init=False,
                           kw_only=True)
    generated = jsonld.ib(context={
        '@reverse': 'prov:activity',
    },
                          kw_only=True,
                          hash=False)
    influenced = jsonld.ib(
        context='prov:influenced',
        kw_only=True,
    )

    started_at_time = jsonld.ib(
        context={
            '@id': 'prov:startedAtTime',
            '@type': 'http://www.w3.org/2001/XMLSchema#dateTime',
        },
        kw_only=True,
    )

    ended_at_time = jsonld.ib(
        context={
            '@id': 'prov:endedAtTime',
            '@type': 'http://www.w3.org/2001/XMLSchema#dateTime',
        },
        kw_only=True,
    )

    @generated.default
    def default_generated(self):
        """Calculate default values."""
        results = []
        for path, role in self.outputs.items():
            client, commit, path = self.client.resolve_in_submodules(
                self.commit,
                path,
            )

            output_path = client.path / path
            parents = list(output_path.relative_to(client.path).parents)

            collection = None
            members = []
            for parent in reversed(parents[:-1]):
                if str(parent) in self._collections:
                    collection = self._collections[str(parent)]
                else:
                    collection = Collection(
                        client=client,
                        commit=commit,
                        path=str(parent),
                        members=[],
                        parent=collection,
                    )
                    members.append(collection)
                    self._collections[str(parent)] = collection

                members = collection.members

            entity_cls = Entity
            if (self.client.path / path).is_dir():
                entity_cls = Collection

            entity = entity_cls(
                commit=commit,
                client=client,
                path=str(path),
                parent=collection,
            )

            if collection:
                collection.members.append(entity)

            results.append(
                Generation(
                    activity=self,
                    entity=entity,
                    role=role,
                ))
        return results

    @influenced.default
    def default_influenced(self):
        """Calculate default values."""
        return list(self._collections.values())

    @property
    def parents(self):
        """Return parent commits."""
        return list(self.commit.parents)

    @property
    def paths(self):
        """Return all paths in the commit."""
        return {
            item.a_path
            for item in self.commit.diff(self.commit.parents or NULL_TREE)
            # if not item.deleted_file
        }

    @classmethod
    def generate_id(cls, commit):
        """Calculate action ID."""
        return 'commit/{commit.hexsha}'.format(commit=commit)

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        return self.generate_id(self.commit)

    @_message.default
    def default_message(self):
        """Generate a default message."""
        return self.commit.message

    @_was_informed_by.default
    def default_was_informed_by(self):
        """List parent actions."""
        return [{
            '@id': self.generate_id(parent),
        } for parent in self.commit.parents]

    @outputs.default
    def default_outputs(self):
        """Guess default outputs from a commit."""
        return {path: None for path in self.paths}

    @started_at_time.default
    def default_started_at_time(self):
        """Configure calculated properties."""
        return self.commit.authored_datetime.isoformat()

    @ended_at_time.default
    def default_ended_at_time(self):
        """Configure calculated properties."""
        return self.commit.committed_datetime.isoformat()

    @property
    def nodes(self):
        """Return topologically sorted nodes."""
        collections = OrderedDict()

        def _parents(node):
            if node.parent:
                yield from _parents(node.parent)
                yield node.parent

        for output in self.generated:
            for parent in _parents(output.entity):
                collections[parent.path] = parent

            yield from _nodes(output)

        yield from reversed(collections.values())

    @staticmethod
    def from_git_commit(commit, client, path=None):
        """Populate information from the given Git commit."""
        return from_git_commit(commit, client, path=None)
Exemplo n.º 17
0
class Activity(CommitMixin):
    """Represent an activity in the repository."""

    _id = jsonld.ib(context='@id', kw_only=True)
    _message = jsonld.ib(context='rdfs:comment', init=False, kw_only=True)
    _was_informed_by = jsonld.ib(
        context='prov:wasInformedBy',
        init=False,
        kw_only=True,
    )

    part_of = attr.ib(default=None, kw_only=True)

    process = attr.ib(default=None, kw_only=True)
    outputs = attr.ib(kw_only=True)

    generated = jsonld.ib(context={
        '@reverse': 'prov:activity',
    },
                          kw_only=True,
                          hash=False)

    started_at_time = jsonld.ib(
        context={
            '@id': 'prov:startedAtTime',
            '@type': 'http://www.w3.org/2001/XMLSchema#dateTime',
        },
        kw_only=True,
    )

    ended_at_time = jsonld.ib(
        context={
            '@id': 'prov:endedAtTime',
            '@type': 'http://www.w3.org/2001/XMLSchema#dateTime',
        },
        kw_only=True,
    )

    @generated.default
    def default_generated(self):
        """Calculate default values."""
        return [
            Generation(
                activity=self,
                entity=Entity(
                    commit=self.commit,
                    client=self.client,
                    submodules=self.submodules,
                    path=path,
                    parent=self,
                ),
                role=role,
            ) for path, role in self.outputs.items()
        ]

    @property
    def paths(self):
        """Return all paths in the commit."""
        return {
            item.a_path
            for item in self.commit.diff(self.commit.parents or NULL_TREE)
            # if not item.deleted_file
        }

    @classmethod
    def generate_id(cls, commit):
        """Calculate action ID."""
        return 'commit/{commit.hexsha}'.format(commit=commit)

    @_id.default
    def default_id(self):
        """Configure calculated ID."""
        return self.generate_id(self.commit)

    @_message.default
    def default_message(self):
        """Generate a default message."""
        return self.commit.message

    @_was_informed_by.default
    def default_was_informed_by(self):
        """List parent actions."""
        return [{
            '@id': self.generate_id(parent),
        } for parent in self.commit.parents]

    @outputs.default
    def default_outputs(self):
        """Guess default outputs from a commit."""
        return {path: None for path in self.paths}

    @started_at_time.default
    def default_started_at_time(self):
        """Configure calculated properties."""
        return self.commit.authored_datetime.isoformat()

    @ended_at_time.default
    def default_ended_at_time(self):
        """Configure calculated properties."""
        return self.commit.committed_datetime.isoformat()

    @property
    def nodes(self):
        """Return topologically sorted nodes."""
        return self.generated

    @property
    def parents(self):
        """Return a list of parents."""
        return []

    @staticmethod
    def from_git_commit(commit, client, path=None, submodules=None):
        """Populate information from the given Git commit."""
        return from_git_commit(commit,
                               client,
                               path=None,
                               submodules=submodules)
Exemplo n.º 18
0
class ProcessRun(Activity):
    """A process run is a particular execution of a Process description."""

    __association_cls__ = Process

    inputs = attr.ib(kw_only=True)
    outputs = attr.ib(kw_only=True)

    generated = jsonld.ib(
        context={
            '@reverse': 'prov:activity',
        },
        kw_only=True,
        hash=False,
    )

    association = jsonld.ib(
        context='prov:qualifiedAssociation',
        default=None,
        kw_only=True,
    )

    qualified_usage = jsonld.ib(context='prov:qualifiedUsage', kw_only=True)

    @generated.default
    def default_generated(self):
        """Calculate default values."""
        return super().default_generated()

    def __attrs_post_init__(self):
        """Calculate properties."""
        if self.association is None:
            self.association = Association.from_activity(self)

        if self.path is None:
            # FIXME only works for linking directory to file
            existing_outputs = set(self.outputs.values())
            for output_id, output_path in self.iter_output_files():
                if output_id not in existing_outputs:
                    self.outputs[os.path.join(
                        next(path for path, usage in self.inputs.items()
                             if usage.role == 'input_directory'),
                        output_path)] = output_id
                    break

    @inputs.default
    def default_inputs(self):
        """Guess default inputs from a process."""
        inputs = {}
        basedir = os.path.dirname(self.path)

        commit = self.commit
        client = self.client
        process = self.process

        revision = '{0}^'.format(commit)

        for input_id, input_path in process.iter_input_files(basedir):
            try:
                usage_id = self._id + '/inputs/' + input_id
                dependency = Usage.from_revision(
                    client=client,
                    path=input_path,
                    role=input_id,
                    revision=revision,
                    id=usage_id,
                )
                inputs[input_path] = dependency
            except KeyError:
                continue

        return inputs

    @qualified_usage.default
    def default_qualified_usage(self):
        """Generate list of used artifacts."""
        return list(self.inputs.values())

    def iter_output_files(self, commit=None):
        """Yield tuples with output id and path."""
        process = self.process

        for output in process.outputs:
            if output.type in {'stdout', 'stderr'}:
                stream = getattr(process, output.type)
                if stream:
                    yield output.id, stream
            elif output.type in PATH_OBJECTS:
                glob = output.outputBinding.glob
                # TODO better support for Expression
                if glob.startswith('$(inputs.'):
                    input_id = glob[len('$(inputs.'):-1]
                    for input_ in process.inputs:
                        if input_.id == input_id:
                            yield output.id, input_.default
                            break  # out from process.inputs
                else:
                    yield output.id, glob

    @outputs.default
    def default_outputs(self):
        """Guess default outputs from a process."""
        if self.path is None:
            return {}
        return {
            output_path: output_id
            for output_id, output_path in self.iter_output_files()
        }

    @property
    def parents(self):
        """Return parent commits."""
        return [
            member.commit for usage in self.qualified_usage
            for member in usage.entity.entities
        ] + super().parents

    @property
    def nodes(self):
        """Return topologically sorted nodes."""
        # Outputs go first
        yield from super().nodes

        # Activity itself
        yield self.association.plan
Exemplo n.º 19
0
class Project(object):
    """Represent a project."""

    _id = jsonld.ib(context='@id', kw_only=True)