Exemplo n.º 1
0
    def replacement_for_connection(self, connection, is_data=True):
        output_step_id = connection.output_step.id
        if output_step_id not in self.outputs:
            template = "No outputs found for step id %s, outputs are %s"
            message = template % (output_step_id, self.outputs)
            raise Exception(message)
        step_outputs = self.outputs[output_step_id]
        if step_outputs is STEP_OUTPUT_DELAYED:
            delayed_why = "dependent step [%s] delayed, so this step must be delayed" % output_step_id
            raise modules.DelayedWorkflowEvaluation(why=delayed_why)
        output_name = connection.output_name
        try:
            replacement = step_outputs[output_name]
        except KeyError:
            replacement = self.inputs_by_step_id.get(output_step_id)
            if connection.output_step.type == 'parameter_input' and output_step_id is not None:
                # FIXME: parameter_input step outputs should be properly recorded as step outputs, but for now we can
                # short-circuit and just pick the input value
                pass
            else:
                # Must resolve.
                template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s"
                message = template % (output_name, step_outputs)
                raise Exception(message)
        if isinstance(replacement, model.HistoryDatasetCollectionAssociation):
            if not replacement.collection.populated:
                if not replacement.collection.waiting_for_elements:
                    # If we are not waiting for elements, there was some
                    # problem creating the collection. Collection will never
                    # be populated.
                    # TODO: consider distinguish between cancelled and failed?
                    raise modules.CancelWorkflowEvaluation()

                delayed_why = "dependent collection [%s] not yet populated with datasets" % replacement.id
                raise modules.DelayedWorkflowEvaluation(why=delayed_why)

        data_inputs = (model.HistoryDatasetAssociation,
                       model.HistoryDatasetCollectionAssociation,
                       model.DatasetCollection)
        if not is_data and isinstance(replacement, data_inputs):
            if isinstance(replacement, model.HistoryDatasetAssociation):
                if replacement.is_pending:
                    raise modules.DelayedWorkflowEvaluation()
                if not replacement.is_ok:
                    raise modules.CancelWorkflowEvaluation()
            else:
                if not replacement.collection.populated:
                    raise modules.DelayedWorkflowEvaluation()
                pending = False
                for dataset_instance in replacement.dataset_instances:
                    if dataset_instance.is_pending:
                        pending = True
                    elif not dataset_instance.is_ok:
                        raise modules.CancelWorkflowEvaluation()
                if pending:
                    raise modules.DelayedWorkflowEvaluation()

        return replacement
Exemplo n.º 2
0
    def replacement_for_connection(self, connection, is_data=True):
        output_step_id = connection.output_step.id
        if output_step_id not in self.outputs:
            message = f"No outputs found for step id {output_step_id}, outputs are {self.outputs}"
            raise Exception(message)
        step_outputs = self.outputs[output_step_id]
        if step_outputs is STEP_OUTPUT_DELAYED:
            delayed_why = f"dependent step [{output_step_id}] delayed, so this step must be delayed"
            raise modules.DelayedWorkflowEvaluation(why=delayed_why)
        output_name = connection.output_name
        try:
            replacement = step_outputs[output_name]
        except KeyError:
            # Must resolve.
            template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s"
            message = template % (output_name, step_outputs)
            raise Exception(message)
        if isinstance(replacement, model.HistoryDatasetCollectionAssociation):
            if not replacement.collection.populated:
                if not replacement.waiting_for_elements:
                    # If we are not waiting for elements, there was some
                    # problem creating the collection. Collection will never
                    # be populated.
                    # TODO: consider distinguish between cancelled and failed?
                    raise modules.CancelWorkflowEvaluation()

                delayed_why = f"dependent collection [{replacement.id}] not yet populated with datasets"
                raise modules.DelayedWorkflowEvaluation(why=delayed_why)

        if isinstance(replacement, model.DatasetCollection):
            raise NotImplementedError
        if not is_data and isinstance(
                replacement, (model.HistoryDatasetAssociation,
                              model.HistoryDatasetCollectionAssociation)):
            if isinstance(replacement, model.HistoryDatasetAssociation):
                if replacement.is_pending:
                    raise modules.DelayedWorkflowEvaluation()
                if not replacement.is_ok:
                    raise modules.CancelWorkflowEvaluation()
            else:
                if not replacement.collection.populated:
                    raise modules.DelayedWorkflowEvaluation()
                pending = False
                for dataset_instance in replacement.dataset_instances:
                    if dataset_instance.is_pending:
                        pending = True
                    elif not dataset_instance.is_ok:
                        raise modules.CancelWorkflowEvaluation()
                if pending:
                    raise modules.DelayedWorkflowEvaluation()

        return replacement
Exemplo n.º 3
0
    def __check_implicitly_dependent_step(self, output_id):
        step_invocation = self.workflow_invocation.step_invocation_for_step_id(
            output_id)

        # No steps created yet - have to delay evaluation.
        if not step_invocation:
            delayed_why = "depends on step [%s] but that step has not been invoked yet" % output_id
            raise modules.DelayedWorkflowEvaluation(why=delayed_why)

        if step_invocation.state != 'scheduled':
            delayed_why = "depends on step [%s] job has not finished scheduling yet" % output_id
            raise modules.DelayedWorkflowEvaluation(delayed_why)

        for job_assoc in step_invocation.jobs:
            job = job_assoc.job
            if job:
                # At least one job in incomplete.
                if not job.finished:
                    delayed_why = "depends on step [%s] but one or more jobs created from that step have not finished yet" % output_id
                    raise modules.DelayedWorkflowEvaluation(why=delayed_why)

                if job.state != job.states.OK:
                    raise modules.CancelWorkflowEvaluation()

            else:
                # TODO: Handle implicit dependency on stuff like
                # pause steps.
                pass
Exemplo n.º 4
0
    def replacement_for_connection(self, connection, is_data=True):
        output_step_id = connection.output_step.id
        if output_step_id not in self.outputs:
            template = "No outputs found for step id %s, outputs are %s"
            message = template % (output_step_id, self.outputs)
            raise Exception(message)
        step_outputs = self.outputs[output_step_id]
        if step_outputs is STEP_OUTPUT_DELAYED:
            delayed_why = "dependent step [%s] delayed, so this step must be delayed" % output_step_id
            raise modules.DelayedWorkflowEvaluation(why=delayed_why)
        output_name = connection.output_name
        try:
            replacement = step_outputs[output_name]
        except KeyError:
            if is_data:
                # Must resolve.
                template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s"
                message = template % (output_name, step_outputs)
                raise Exception(message)
            else:
                replacement = modules.NO_REPLACEMENT
        if isinstance(replacement, model.HistoryDatasetCollectionAssociation):
            if not replacement.collection.populated:
                if not replacement.collection.waiting_for_elements:
                    # If we are not waiting for elements, there was some
                    # problem creating the collection. Collection will never
                    # be populated.
                    # TODO: consider distinguish between cancelled and failed?
                    raise modules.CancelWorkflowEvaluation()

                delayed_why = "dependent collection [%s] not yet populated with datasets" % replacement.id
                raise modules.DelayedWorkflowEvaluation(why=delayed_why)
        return replacement
Exemplo n.º 5
0
    def replacement_for_connection(self, connection):
        step_outputs = self.outputs[connection.output_step.id]
        if step_outputs is STEP_OUTPUT_DELAYED:
            raise modules.DelayedWorkflowEvaluation()
        replacement = step_outputs[connection.output_name]
        if isinstance(replacement, model.HistoryDatasetCollectionAssociation):
            if not replacement.collection.populated:
                if not replacement.collection.waiting_for_elements:
                    # If we are not waiting for elements, there was some
                    # problem creating the collection. Collection will never
                    # be populated.
                    # TODO: consider distinguish between cancelled and failed?
                    raise modules.CancelWorkflowEvaluation()

                raise modules.DelayedWorkflowEvaluation()
        return replacement
Exemplo n.º 6
0
    def replacement_for_connection(self, connection, is_data=True):
        output_step_id = connection.output_step.id
        if output_step_id not in self.outputs:
            template = "No outputs found for step id %s, outputs are %s"
            message = template % (output_step_id, self.outputs)
            raise Exception(message)
        step_outputs = self.outputs[output_step_id]
        if step_outputs is STEP_OUTPUT_DELAYED:
            raise modules.DelayedWorkflowEvaluation()
        output_name = connection.output_name
        for key in step_outputs:
            print "Output name: " + str(output_name)
            #print "Output name, output1, result of object at ouput name: " + str(step_outputs["output1"])
            #print "Result of object at output name: " + str(step_outputs[output_name])
            #print "Object stuff: " + str(dir(step_outputs[key]))
            print "Type of object: " + str(type(step_outputs))
        try:
            replacement = step_outputs[output_name]
        except KeyError:
            if is_data:
                # Must resolve.
                print "From run.py: " + str(step_outputs.viewvalues)
                for key in step_outputs:
                    print "Key: " + key + " Value: " + str(step_outputs[key])
                    print "Object stuff: " + str(dir(step_outputs[key]))
                template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s"
                message = template % (output_name, step_outputs)
                raise Exception(message)
            else:
                replacement = modules.NO_REPLACEMENT
        if isinstance(replacement, model.HistoryDatasetCollectionAssociation):
            if not replacement.collection.populated:
                if not replacement.collection.waiting_for_elements:
                    # If we are not waiting for elements, there was some
                    # problem creating the collection. Collection will never
                    # be populated.
                    # TODO: consider distinguish between cancelled and failed?
                    raise modules.CancelWorkflowEvaluation()

                raise modules.DelayedWorkflowEvaluation()
        return replacement
Exemplo n.º 7
0
    def __check_implicitly_dependent_step( self, output_id ):
        step_invocations = self.workflow_invocation.step_invocations_for_step_id( output_id )

        # No steps created yet - have to delay evaluation.
        if not step_invocations:
            raise modules.DelayedWorkflowEvaluation()

        for step_invocation in step_invocations:
            job = step_invocation.job
            if job:
                # At least one job in incomplete.
                if not job.finished:
                    raise modules.DelayedWorkflowEvaluation()

                if job.state != job.states.OK:
                    raise modules.CancelWorkflowEvaluation()

            else:
                # TODO: Handle implicit dependency on stuff like
                # pause steps.
                pass
Exemplo n.º 8
0
    def replacement_for_connection( self, connection ):
        step_outputs = self.outputs[ connection.output_step.id ]
        if step_outputs is STEP_OUTPUT_DELAYED:
            raise modules.DelayedWorkflowEvaluation()
        output_name = connection.output_name
        try:
            replacement = step_outputs[ output_name ]
        except KeyError:
            template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s"
            message = template % ( output_name, step_outputs )
            raise Exception( message )
        if isinstance( replacement, model.HistoryDatasetCollectionAssociation ):
            if not replacement.collection.populated:
                if not replacement.collection.waiting_for_elements:
                    # If we are not waiting for elements, there was some
                    # problem creating the collection. Collection will never
                    # be populated.
                    # TODO: consider distinguish between cancelled and failed?
                    raise modules.CancelWorkflowEvaluation()

                raise modules.DelayedWorkflowEvaluation()
        return replacement
Exemplo n.º 9
0
    def invoke(self):
        workflow_invocation = self.workflow_invocation
        config = self.trans.app.config
        maximum_duration = getattr(config,
                                   "maximum_workflow_invocation_duration", -1)
        if maximum_duration > 0 and workflow_invocation.seconds_since_created > maximum_duration:
            log.debug(
                "Workflow invocation [%s] exceeded maximum number of seconds allowed for scheduling [%s], failing."
                % (workflow_invocation.id, maximum_duration))
            workflow_invocation.state = model.WorkflowInvocation.states.FAILED
            # All jobs ran successfully, so we can save now
            self.trans.sa_session.add(workflow_invocation)

            # Not flushing in here, because web controller may create multiple
            # invocations.
            return self.progress.outputs

        if workflow_invocation.history.deleted:
            log.info("Cancelled workflow evaluation due to deleted history")
            raise modules.CancelWorkflowEvaluation()

        remaining_steps = self.progress.remaining_steps()
        delayed_steps = False
        for (step, workflow_invocation_step) in remaining_steps:
            step_delayed = False
            step_timer = ExecutionTimer()
            try:
                self.__check_implicitly_dependent_steps(step)

                if not workflow_invocation_step:
                    workflow_invocation_step = model.WorkflowInvocationStep()
                    workflow_invocation_step.workflow_invocation = workflow_invocation
                    workflow_invocation_step.workflow_step = step
                    workflow_invocation_step.state = 'new'

                    workflow_invocation.steps.append(workflow_invocation_step)

                incomplete_or_none = self._invoke_step(
                    workflow_invocation_step)
                if incomplete_or_none is False:
                    step_delayed = delayed_steps = True
                    workflow_invocation_step.state = 'ready'
                    self.progress.mark_step_outputs_delayed(
                        step, why="Not all jobs scheduled for state.")
                else:
                    workflow_invocation_step.state = 'scheduled'
            except modules.DelayedWorkflowEvaluation as de:
                step_delayed = delayed_steps = True
                self.progress.mark_step_outputs_delayed(step, why=de.why)
            except Exception:
                log.exception(
                    "Failed to schedule %s, problem occurred on %s.",
                    self.workflow_invocation.workflow.log_str(),
                    step.log_str(),
                )
                raise

            if not step_delayed:
                log.debug("Workflow step %s of invocation %s invoked %s" %
                          (step.id, workflow_invocation.id, step_timer))

        if delayed_steps:
            state = model.WorkflowInvocation.states.READY
        else:
            state = model.WorkflowInvocation.states.SCHEDULED
        workflow_invocation.state = state

        # All jobs ran successfully, so we can save now
        self.trans.sa_session.add(workflow_invocation)

        # Not flushing in here, because web controller may create multiple
        # invocations.
        return self.progress.outputs
Exemplo n.º 10
0
    def invoke(self):
        workflow_invocation = self.workflow_invocation
        maximum_duration = getattr(self.trans.app.config,
                                   "maximum_workflow_invocation_duration", -1)
        if maximum_duration > 0 and workflow_invocation.seconds_since_created > maximum_duration:
            log.debug(
                "Workflow invocation [%s] exceeded maximum number of seconds allowed for scheduling [%s], failing."
                % (workflow_invocation.id, maximum_duration))
            workflow_invocation.state = model.WorkflowInvocation.states.FAILED
            # All jobs ran successfully, so we can save now
            self.trans.sa_session.add(workflow_invocation)

            # Not flushing in here, because web controller may create multiple
            # invocations.
            return self.progress.outputs

        if workflow_invocation.history.deleted:
            log.info("Cancelled workflow evaluation due to deleted history")
            raise modules.CancelWorkflowEvaluation()

        remaining_steps = self.progress.remaining_steps()
        delayed_steps = False
        for step in remaining_steps:
            step_delayed = False
            step_timer = ExecutionTimer()
            jobs = None
            try:
                self.__check_implicitly_dependent_steps(step)

                # TODO: step may fail to invoke, do something about that.
                jobs = self._invoke_step(step)
                for job in (util.listify(jobs) or [None]):
                    # Record invocation
                    workflow_invocation_step = model.WorkflowInvocationStep()
                    workflow_invocation_step.workflow_invocation = workflow_invocation
                    workflow_invocation_step.workflow_step = step
                    # Job may not be generated in this thread if bursting is enabled
                    # https://github.com/galaxyproject/galaxy/issues/2259
                    if job:
                        workflow_invocation_step.job_id = job.id
            except modules.DelayedWorkflowEvaluation as de:
                step_delayed = delayed_steps = True
                self.progress.mark_step_outputs_delayed(step, why=de.why)
            except Exception:
                log.exception(
                    "Failed to schedule %s, problem occurred on %s.",
                    self.workflow_invocation.workflow.log_str(),
                    step.log_str(),
                )
                raise

            if not step_delayed:
                log.debug("Workflow step %s of invocation %s invoked %s" %
                          (step.id, workflow_invocation.id, step_timer))

        if delayed_steps:
            state = model.WorkflowInvocation.states.READY
        else:
            state = model.WorkflowInvocation.states.SCHEDULED
        workflow_invocation.state = state

        # All jobs ran successfully, so we can save now
        self.trans.sa_session.add(workflow_invocation)

        # Not flushing in here, because web controller may create multiple
        # invocations.
        return self.progress.outputs