def replacement_for_connection(self, connection, is_data=True): output_step_id = connection.output_step.id if output_step_id not in self.outputs: template = "No outputs found for step id %s, outputs are %s" message = template % (output_step_id, self.outputs) raise Exception(message) step_outputs = self.outputs[output_step_id] if step_outputs is STEP_OUTPUT_DELAYED: delayed_why = "dependent step [%s] delayed, so this step must be delayed" % output_step_id raise modules.DelayedWorkflowEvaluation(why=delayed_why) output_name = connection.output_name try: replacement = step_outputs[output_name] except KeyError: replacement = self.inputs_by_step_id.get(output_step_id) if connection.output_step.type == 'parameter_input' and output_step_id is not None: # FIXME: parameter_input step outputs should be properly recorded as step outputs, but for now we can # short-circuit and just pick the input value pass else: # Must resolve. template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s" message = template % (output_name, step_outputs) raise Exception(message) if isinstance(replacement, model.HistoryDatasetCollectionAssociation): if not replacement.collection.populated: if not replacement.collection.waiting_for_elements: # If we are not waiting for elements, there was some # problem creating the collection. Collection will never # be populated. # TODO: consider distinguish between cancelled and failed? raise modules.CancelWorkflowEvaluation() delayed_why = "dependent collection [%s] not yet populated with datasets" % replacement.id raise modules.DelayedWorkflowEvaluation(why=delayed_why) data_inputs = (model.HistoryDatasetAssociation, model.HistoryDatasetCollectionAssociation, model.DatasetCollection) if not is_data and isinstance(replacement, data_inputs): if isinstance(replacement, model.HistoryDatasetAssociation): if replacement.is_pending: raise modules.DelayedWorkflowEvaluation() if not replacement.is_ok: raise modules.CancelWorkflowEvaluation() else: if not replacement.collection.populated: raise modules.DelayedWorkflowEvaluation() pending = False for dataset_instance in replacement.dataset_instances: if dataset_instance.is_pending: pending = True elif not dataset_instance.is_ok: raise modules.CancelWorkflowEvaluation() if pending: raise modules.DelayedWorkflowEvaluation() return replacement
def replacement_for_connection(self, connection, is_data=True): output_step_id = connection.output_step.id if output_step_id not in self.outputs: message = f"No outputs found for step id {output_step_id}, outputs are {self.outputs}" raise Exception(message) step_outputs = self.outputs[output_step_id] if step_outputs is STEP_OUTPUT_DELAYED: delayed_why = f"dependent step [{output_step_id}] delayed, so this step must be delayed" raise modules.DelayedWorkflowEvaluation(why=delayed_why) output_name = connection.output_name try: replacement = step_outputs[output_name] except KeyError: # Must resolve. template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s" message = template % (output_name, step_outputs) raise Exception(message) if isinstance(replacement, model.HistoryDatasetCollectionAssociation): if not replacement.collection.populated: if not replacement.waiting_for_elements: # If we are not waiting for elements, there was some # problem creating the collection. Collection will never # be populated. # TODO: consider distinguish between cancelled and failed? raise modules.CancelWorkflowEvaluation() delayed_why = f"dependent collection [{replacement.id}] not yet populated with datasets" raise modules.DelayedWorkflowEvaluation(why=delayed_why) if isinstance(replacement, model.DatasetCollection): raise NotImplementedError if not is_data and isinstance( replacement, (model.HistoryDatasetAssociation, model.HistoryDatasetCollectionAssociation)): if isinstance(replacement, model.HistoryDatasetAssociation): if replacement.is_pending: raise modules.DelayedWorkflowEvaluation() if not replacement.is_ok: raise modules.CancelWorkflowEvaluation() else: if not replacement.collection.populated: raise modules.DelayedWorkflowEvaluation() pending = False for dataset_instance in replacement.dataset_instances: if dataset_instance.is_pending: pending = True elif not dataset_instance.is_ok: raise modules.CancelWorkflowEvaluation() if pending: raise modules.DelayedWorkflowEvaluation() return replacement
def __check_implicitly_dependent_step(self, output_id): step_invocation = self.workflow_invocation.step_invocation_for_step_id( output_id) # No steps created yet - have to delay evaluation. if not step_invocation: delayed_why = "depends on step [%s] but that step has not been invoked yet" % output_id raise modules.DelayedWorkflowEvaluation(why=delayed_why) if step_invocation.state != 'scheduled': delayed_why = "depends on step [%s] job has not finished scheduling yet" % output_id raise modules.DelayedWorkflowEvaluation(delayed_why) for job_assoc in step_invocation.jobs: job = job_assoc.job if job: # At least one job in incomplete. if not job.finished: delayed_why = "depends on step [%s] but one or more jobs created from that step have not finished yet" % output_id raise modules.DelayedWorkflowEvaluation(why=delayed_why) if job.state != job.states.OK: raise modules.CancelWorkflowEvaluation() else: # TODO: Handle implicit dependency on stuff like # pause steps. pass
def replacement_for_connection(self, connection, is_data=True): output_step_id = connection.output_step.id if output_step_id not in self.outputs: template = "No outputs found for step id %s, outputs are %s" message = template % (output_step_id, self.outputs) raise Exception(message) step_outputs = self.outputs[output_step_id] if step_outputs is STEP_OUTPUT_DELAYED: delayed_why = "dependent step [%s] delayed, so this step must be delayed" % output_step_id raise modules.DelayedWorkflowEvaluation(why=delayed_why) output_name = connection.output_name try: replacement = step_outputs[output_name] except KeyError: if is_data: # Must resolve. template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s" message = template % (output_name, step_outputs) raise Exception(message) else: replacement = modules.NO_REPLACEMENT if isinstance(replacement, model.HistoryDatasetCollectionAssociation): if not replacement.collection.populated: if not replacement.collection.waiting_for_elements: # If we are not waiting for elements, there was some # problem creating the collection. Collection will never # be populated. # TODO: consider distinguish between cancelled and failed? raise modules.CancelWorkflowEvaluation() delayed_why = "dependent collection [%s] not yet populated with datasets" % replacement.id raise modules.DelayedWorkflowEvaluation(why=delayed_why) return replacement
def replacement_for_connection(self, connection): step_outputs = self.outputs[connection.output_step.id] if step_outputs is STEP_OUTPUT_DELAYED: raise modules.DelayedWorkflowEvaluation() replacement = step_outputs[connection.output_name] if isinstance(replacement, model.HistoryDatasetCollectionAssociation): if not replacement.collection.populated: if not replacement.collection.waiting_for_elements: # If we are not waiting for elements, there was some # problem creating the collection. Collection will never # be populated. # TODO: consider distinguish between cancelled and failed? raise modules.CancelWorkflowEvaluation() raise modules.DelayedWorkflowEvaluation() return replacement
def replacement_for_connection(self, connection, is_data=True): output_step_id = connection.output_step.id if output_step_id not in self.outputs: template = "No outputs found for step id %s, outputs are %s" message = template % (output_step_id, self.outputs) raise Exception(message) step_outputs = self.outputs[output_step_id] if step_outputs is STEP_OUTPUT_DELAYED: raise modules.DelayedWorkflowEvaluation() output_name = connection.output_name for key in step_outputs: print "Output name: " + str(output_name) #print "Output name, output1, result of object at ouput name: " + str(step_outputs["output1"]) #print "Result of object at output name: " + str(step_outputs[output_name]) #print "Object stuff: " + str(dir(step_outputs[key])) print "Type of object: " + str(type(step_outputs)) try: replacement = step_outputs[output_name] except KeyError: if is_data: # Must resolve. print "From run.py: " + str(step_outputs.viewvalues) for key in step_outputs: print "Key: " + key + " Value: " + str(step_outputs[key]) print "Object stuff: " + str(dir(step_outputs[key])) template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s" message = template % (output_name, step_outputs) raise Exception(message) else: replacement = modules.NO_REPLACEMENT if isinstance(replacement, model.HistoryDatasetCollectionAssociation): if not replacement.collection.populated: if not replacement.collection.waiting_for_elements: # If we are not waiting for elements, there was some # problem creating the collection. Collection will never # be populated. # TODO: consider distinguish between cancelled and failed? raise modules.CancelWorkflowEvaluation() raise modules.DelayedWorkflowEvaluation() return replacement
def __check_implicitly_dependent_step( self, output_id ): step_invocations = self.workflow_invocation.step_invocations_for_step_id( output_id ) # No steps created yet - have to delay evaluation. if not step_invocations: raise modules.DelayedWorkflowEvaluation() for step_invocation in step_invocations: job = step_invocation.job if job: # At least one job in incomplete. if not job.finished: raise modules.DelayedWorkflowEvaluation() if job.state != job.states.OK: raise modules.CancelWorkflowEvaluation() else: # TODO: Handle implicit dependency on stuff like # pause steps. pass
def replacement_for_connection( self, connection ): step_outputs = self.outputs[ connection.output_step.id ] if step_outputs is STEP_OUTPUT_DELAYED: raise modules.DelayedWorkflowEvaluation() output_name = connection.output_name try: replacement = step_outputs[ output_name ] except KeyError: template = "Workflow evaluation problem - failed to find output_name %s in step_outputs %s" message = template % ( output_name, step_outputs ) raise Exception( message ) if isinstance( replacement, model.HistoryDatasetCollectionAssociation ): if not replacement.collection.populated: if not replacement.collection.waiting_for_elements: # If we are not waiting for elements, there was some # problem creating the collection. Collection will never # be populated. # TODO: consider distinguish between cancelled and failed? raise modules.CancelWorkflowEvaluation() raise modules.DelayedWorkflowEvaluation() return replacement
def invoke(self): workflow_invocation = self.workflow_invocation config = self.trans.app.config maximum_duration = getattr(config, "maximum_workflow_invocation_duration", -1) if maximum_duration > 0 and workflow_invocation.seconds_since_created > maximum_duration: log.debug( "Workflow invocation [%s] exceeded maximum number of seconds allowed for scheduling [%s], failing." % (workflow_invocation.id, maximum_duration)) workflow_invocation.state = model.WorkflowInvocation.states.FAILED # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) # Not flushing in here, because web controller may create multiple # invocations. return self.progress.outputs if workflow_invocation.history.deleted: log.info("Cancelled workflow evaluation due to deleted history") raise modules.CancelWorkflowEvaluation() remaining_steps = self.progress.remaining_steps() delayed_steps = False for (step, workflow_invocation_step) in remaining_steps: step_delayed = False step_timer = ExecutionTimer() try: self.__check_implicitly_dependent_steps(step) if not workflow_invocation_step: workflow_invocation_step = model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.state = 'new' workflow_invocation.steps.append(workflow_invocation_step) incomplete_or_none = self._invoke_step( workflow_invocation_step) if incomplete_or_none is False: step_delayed = delayed_steps = True workflow_invocation_step.state = 'ready' self.progress.mark_step_outputs_delayed( step, why="Not all jobs scheduled for state.") else: workflow_invocation_step.state = 'scheduled' except modules.DelayedWorkflowEvaluation as de: step_delayed = delayed_steps = True self.progress.mark_step_outputs_delayed(step, why=de.why) except Exception: log.exception( "Failed to schedule %s, problem occurred on %s.", self.workflow_invocation.workflow.log_str(), step.log_str(), ) raise if not step_delayed: log.debug("Workflow step %s of invocation %s invoked %s" % (step.id, workflow_invocation.id, step_timer)) if delayed_steps: state = model.WorkflowInvocation.states.READY else: state = model.WorkflowInvocation.states.SCHEDULED workflow_invocation.state = state # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) # Not flushing in here, because web controller may create multiple # invocations. return self.progress.outputs
def invoke(self): workflow_invocation = self.workflow_invocation maximum_duration = getattr(self.trans.app.config, "maximum_workflow_invocation_duration", -1) if maximum_duration > 0 and workflow_invocation.seconds_since_created > maximum_duration: log.debug( "Workflow invocation [%s] exceeded maximum number of seconds allowed for scheduling [%s], failing." % (workflow_invocation.id, maximum_duration)) workflow_invocation.state = model.WorkflowInvocation.states.FAILED # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) # Not flushing in here, because web controller may create multiple # invocations. return self.progress.outputs if workflow_invocation.history.deleted: log.info("Cancelled workflow evaluation due to deleted history") raise modules.CancelWorkflowEvaluation() remaining_steps = self.progress.remaining_steps() delayed_steps = False for step in remaining_steps: step_delayed = False step_timer = ExecutionTimer() jobs = None try: self.__check_implicitly_dependent_steps(step) # TODO: step may fail to invoke, do something about that. jobs = self._invoke_step(step) for job in (util.listify(jobs) or [None]): # Record invocation workflow_invocation_step = model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step # Job may not be generated in this thread if bursting is enabled # https://github.com/galaxyproject/galaxy/issues/2259 if job: workflow_invocation_step.job_id = job.id except modules.DelayedWorkflowEvaluation as de: step_delayed = delayed_steps = True self.progress.mark_step_outputs_delayed(step, why=de.why) except Exception: log.exception( "Failed to schedule %s, problem occurred on %s.", self.workflow_invocation.workflow.log_str(), step.log_str(), ) raise if not step_delayed: log.debug("Workflow step %s of invocation %s invoked %s" % (step.id, workflow_invocation.id, step_timer)) if delayed_steps: state = model.WorkflowInvocation.states.READY else: state = model.WorkflowInvocation.states.SCHEDULED workflow_invocation.state = state # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) # Not flushing in here, because web controller may create multiple # invocations. return self.progress.outputs