class ToolModule( WorkflowModule ): type = "tool" def __init__( self, trans, tool_id, tool_version=None ): self.trans = trans self.tool_id = tool_id self.tool = trans.app.toolbox.get_tool( tool_id, tool_version=tool_version ) self.post_job_actions = {} self.runtime_post_job_actions = {} self.workflow_outputs = [] self.state = None self.version_changes = [] if self.tool: self.errors = None else: self.errors = {} self.errors[ tool_id ] = 'Tool unavailable' @classmethod def new( Class, trans, tool_id=None ): module = Class( trans, tool_id ) if module.tool is None: error_message = "Attempted to create new workflow module for invalid tool_id, no tool with id - %s." % tool_id raise Exception( error_message ) module.state = module.tool.new_state( trans, all_pages=True ) return module @classmethod def from_dict( Class, trans, d, secure=True ): tool_id = d[ 'tool_id' ] tool_version = str( d.get( 'tool_version', None ) ) module = Class( trans, tool_id, tool_version=tool_version ) module.state = galaxy.tools.DefaultToolState() if module.tool is not None: if d.get('tool_version', 'Unspecified') != module.get_tool_version(): message = "%s: using version '%s' instead of version '%s' indicated in this workflow." % ( tool_id, d.get( 'tool_version', 'Unspecified' ), module.get_tool_version() ) log.debug(message) module.version_changes.append(message) if d[ "tool_state" ]: module.state.decode( d[ "tool_state" ], module.tool, module.trans.app, secure=secure ) module.errors = d.get( "tool_errors", None ) module.post_job_actions = d.get( "post_job_actions", {} ) module.workflow_outputs = d.get( "workflow_outputs", [] ) return module @classmethod def from_workflow_step( Class, trans, step ): toolbox = trans.app.toolbox tool_id = step.tool_id if toolbox: # See if we have access to a different version of the tool. # TODO: If workflows are ever enhanced to use tool version # in addition to tool id, enhance the selection process here # to retrieve the correct version of the tool. tool_id = toolbox.get_tool_id( tool_id ) if ( toolbox and tool_id ): if step.config: # This step has its state saved in the config field due to the # tool being previously unavailable. return module_factory.from_dict(trans, loads(step.config), secure=False) tool_version = step.tool_version module = Class( trans, tool_id, tool_version=tool_version ) if step.tool_version and (step.tool_version != module.tool.version): message = "%s: using version '%s' instead of version '%s' indicated in this workflow." % (tool_id, module.tool.version, step.tool_version) log.debug(message) module.version_changes.append(message) module.recover_state( step.tool_inputs ) module.errors = step.tool_errors module.workflow_outputs = step.workflow_outputs pjadict = {} for pja in step.post_job_actions: pjadict[pja.action_type] = pja module.post_job_actions = pjadict return module return None def recover_state( self, state, **kwds ): """ Recover module configuration state property (a `DefaultToolState` object) using the tool's `params_from_strings` method. """ app = self.trans.app self.state = galaxy.tools.DefaultToolState() params_from_kwds = dict( ignore_errors=kwds.get( "ignore_errors", True ) ) self.state.inputs = self.tool.params_from_strings( state, app, **params_from_kwds ) def recover_runtime_state( self, runtime_state ): """ Take secure runtime state from persisted invocation and convert it into a DefaultToolState object for use during workflow invocation. """ state = galaxy.tools.DefaultToolState() app = self.trans.app state.decode( runtime_state, self.tool, app, secure=False ) state_dict = loads( runtime_state ) if RUNTIME_STEP_META_STATE_KEY in state_dict: self.__restore_step_meta_runtime_state( loads( state_dict[ RUNTIME_STEP_META_STATE_KEY ] ) ) return state def normalize_runtime_state( self, runtime_state ): return runtime_state.encode( self.tool, self.trans.app, secure=False ) def save_to_step( self, step ): step.type = self.type step.tool_id = self.tool_id if self.tool: step.tool_version = self.get_tool_version() step.tool_inputs = self.tool.params_to_strings( self.state.inputs, self.trans.app ) else: step.tool_version = None step.tool_inputs = None step.tool_errors = self.errors for k, v in self.post_job_actions.iteritems(): pja = self.__to_pja( k, v, step ) self.trans.sa_session.add( pja ) def __to_pja( self, key, value, step ): if 'output_name' in value: output_name = value['output_name'] else: output_name = None if 'action_arguments' in value: action_arguments = value['action_arguments'] else: action_arguments = None return PostJobAction(value['action_type'], step, output_name, action_arguments) def get_name( self ): if self.tool: return self.tool.name return 'unavailable' def get_tool_id( self ): return self.tool_id def get_tool_version( self ): return self.tool.version def get_state( self, secure=True ): return self.state.encode( self.tool, self.trans.app, secure=secure ) def get_errors( self ): return self.errors def get_tooltip( self, static_path='' ): if self.tool.help: return self.tool.help.render( host_url=web.url_for('/'), static_path=static_path ) else: return None def get_data_inputs( self ): data_inputs = [] def callback( input, value, prefixed_name, prefixed_label ): if isinstance( input, DataToolParameter ): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, extensions=input.extensions, input_type="dataset", ) ) if isinstance( input, DataCollectionToolParameter ): data_inputs.append( dict( name=prefixed_name, label=prefixed_label, multiple=input.multiple, input_type="dataset_collection", collection_type=input.collection_type, extensions=input.extensions, ) ) visit_input_values( self.tool.inputs, self.state.inputs, callback ) return data_inputs def get_data_outputs( self ): data_outputs = [] data_inputs = None for name, tool_output in self.tool.outputs.iteritems(): extra_kwds = {} if tool_output.collection: extra_kwds["collection"] = True extra_kwds["collection_type"] = tool_output.structure.collection_type formats = [ 'input' ] # TODO: fix elif tool_output.format_source is not None: formats = [ 'input' ] # default to special name "input" which remove restrictions on connections if data_inputs is None: data_inputs = self.get_data_inputs() # find the input parameter referenced by format_source for di in data_inputs: # input names come prefixed with conditional and repeat names separated by '|' # remove prefixes when comparing with format_source if di['name'] is not None and di['name'].split('|')[-1] == tool_output.format_source: formats = di['extensions'] else: formats = [ tool_output.format ] for change_elem in tool_output.change_format: for when_elem in change_elem.findall( 'when' ): format = when_elem.get( 'format', None ) if format and format not in formats: formats.append( format ) data_outputs.append( dict( name=name, extensions=formats, **extra_kwds ) ) return data_outputs def get_runtime_input_dicts( self, step_annotation ): # Step is a tool and may have runtime inputs. input_dicts = [] for name, val in self.state.inputs.items(): input_type = type( val ) if input_type == RuntimeValue: input_dicts.append( { "name": name, "description": "runtime parameter for tool %s" % self.get_name() } ) elif input_type == dict: # Input type is described by a dict, e.g. indexed parameters. for partval in val.values(): if type( partval ) == RuntimeValue: input_dicts.append( { "name": name, "description": "runtime parameter for tool %s" % self.get_name() } ) return input_dicts def get_post_job_actions( self, incoming=None): if incoming is None: return self.post_job_actions else: return ActionBox.handle_incoming(incoming) def get_config_form( self ): self.add_dummy_datasets() return self.trans.fill_template( "workflow/editor_tool_form.mako", module=self, tool=self.tool, values=self.state.inputs, errors=( self.errors or {} ) ) def encode_runtime_state( self, trans, state ): encoded = state.encode( self.tool, self.trans.app ) return encoded def update_state( self, incoming ): # Build a callback that handles setting an input to be required at # runtime. We still process all other parameters the user might have # set. We also need to make sure all datasets have a dummy value # for dependencies to see self.post_job_actions = ActionBox.handle_incoming(incoming) make_runtime_key = incoming.get( 'make_runtime', None ) make_buildtime_key = incoming.get( 'make_buildtime', None ) def item_callback( trans, key, input, value, error, old_value, context ): # Dummy value for Data parameters if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): return DummyDataset(), None # Deal with build/runtime (does not apply to Data parameters) if key == make_buildtime_key: return input.get_initial_value( trans, context ), None elif isinstance( old_value, RuntimeValue ): return old_value, None elif key == make_runtime_key: return RuntimeValue(), None elif isinstance(value, basestring) and re.search("\$\{.+?\}", str(value)): # Workflow Parameter Replacement, so suppress error from going to the workflow level. return value, None else: return value, error # Update state using incoming values errors = self.tool.update_state( self.trans, self.tool.inputs, self.state.inputs, incoming, item_callback=item_callback ) self.errors = errors or None def check_and_update_state( self ): inputs = self.state.inputs return self.tool.check_and_update_param_values( inputs, self.trans, allow_workflow_parameters=True ) def compute_runtime_state( self, trans, step_updates=None, source="html" ): # Warning: This method destructively modifies existing step state. step_errors = None state = self.state self.runtime_post_job_actions = {} if step_updates: # Get the tool tool = self.tool # Get old errors old_errors = state.inputs.pop( "__errors__", {} ) # Update the state self.runtime_post_job_actions = step_updates.get(RUNTIME_POST_JOB_ACTIONS_KEY, {}) step_errors = tool.update_state( trans, tool.inputs, state.inputs, step_updates, update_only=True, old_errors=old_errors, source=source ) step_metadata_runtime_state = self.__step_meta_runtime_state() if step_metadata_runtime_state: state.inputs[ RUNTIME_STEP_META_STATE_KEY ] = step_metadata_runtime_state return state, step_errors def __step_meta_runtime_state( self ): """ Build a dictionary a of meta-step runtime state (state about how the workflow step - not the tool state) to be serialized with the Tool state. """ return { RUNTIME_POST_JOB_ACTIONS_KEY: self.runtime_post_job_actions } def __restore_step_meta_runtime_state( self, step_runtime_state ): if RUNTIME_POST_JOB_ACTIONS_KEY in step_runtime_state: self.runtime_post_job_actions = step_runtime_state[ RUNTIME_POST_JOB_ACTIONS_KEY ] def execute( self, trans, progress, invocation, step ): tool = trans.app.toolbox.get_tool( step.tool_id, tool_version=step.tool_version ) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[ RUNTIME_STEP_META_STATE_KEY ] collections_to_match = self._find_collections_to_match( tool, progress, step ) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections( collections_to_match ) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [ None ] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy( execution_state.inputs ) # Connect up def callback( input, value, prefixed_name, prefixed_label ): replacement = None if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): if iteration_elements and prefixed_name in iteration_elements: if isinstance( input, DataToolParameter ): # Pull out dataset instance from element. replacement = iteration_elements[ prefixed_name ].dataset_instance else: # If collection - just use element model object. replacement = iteration_elements[ prefixed_name ] else: replacement = progress.replacement_for_tool_input( step, input, prefixed_name ) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values( tool.inputs, execution_state.inputs, callback ) except KeyError, k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException( message ) param_combinations.append( execution_state.inputs ) execution_tracker = execute( trans=self.trans, tool=tool, param_combinations=param_combinations, history=invocation.history, collection_info=collection_info, workflow_invocation_uuid=invocation.uuid.hex ) if collection_info: step_outputs = dict( execution_tracker.implicit_collections ) else: step_outputs = dict( execution_tracker.output_datasets ) step_outputs.update( execution_tracker.output_collections ) progress.set_step_outputs( step, step_outputs ) jobs = execution_tracker.successful_jobs for job in jobs: self._handle_post_job_actions( step, job, invocation.replacement_dict ) return jobs
class WorkflowInvoker(object): def __init__(self, trans, workflow, workflow_run_config): self.trans = trans self.workflow = workflow self.target_history = workflow_run_config.target_history self.replacement_dict = workflow_run_config.replacement_dict self.copy_inputs_to_history = workflow_run_config.copy_inputs_to_history self.inputs = workflow_run_config.inputs self.inputs_by = workflow_run_config.inputs_by self.inputs_by_step_id = {} self.param_map = workflow_run_config.param_map self.outputs = odict() def invoke(self): workflow_invocation = model.WorkflowInvocation() workflow_invocation.workflow = self.workflow # Web controller will populate state on each step before calling # invoke but not API controller. More work should be done to further # harmonize these methods going forward if possible - if possible # moving more web controller logic here. state_populated = not self.workflow.steps or hasattr( self.workflow.steps[0], "state") if not state_populated: self._populate_state() for step in self.workflow.steps: jobs = self._invoke_step(step) for job in util.listify(jobs): # Record invocation workflow_invocation_step = model.WorkflowInvocationStep() workflow_invocation_step.workflow_invocation = workflow_invocation workflow_invocation_step.workflow_step = step workflow_invocation_step.job = job # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) # Not flushing in here, because web controller may create multiple # invokations. return self.outputs def _invoke_step(self, step): if step.type == 'tool' or step.type is None: jobs = self._execute_tool_step(step) else: jobs = self._execute_input_step(step) return jobs def _execute_tool_step(self, step): trans = self.trans outputs = self.outputs tool = trans.app.toolbox.get_tool(step.tool_id) tool_state = step.state collections_to_match = self._find_collections_to_match(tool, step) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections( collections_to_match) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [None] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy(execution_state.inputs) # Connect up def callback(input, value, prefixed_name, prefixed_label): replacement = None if isinstance(input, DataToolParameter) or isinstance( input, DataCollectionToolParameter): if iteration_elements and prefixed_name in iteration_elements: if isinstance(input, DataToolParameter): # Pull out dataset instance from element. replacement = iteration_elements[ prefixed_name].dataset_instance else: # If collection - just use element model object. replacement = iteration_elements[prefixed_name] else: replacement = self._replacement_for_input( input, prefixed_name, step) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values(tool.inputs, execution_state.inputs, callback) except KeyError, k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException(message) param_combinations.append(execution_state.inputs) execution_tracker = execute( trans=self.trans, tool=tool, param_combinations=param_combinations, history=self.target_history, collection_info=collection_info, ) if collection_info: outputs[step.id] = dict(execution_tracker.created_collections) else: outputs[step.id] = dict(execution_tracker.output_datasets) jobs = execution_tracker.successful_jobs for job in jobs: self._handle_post_job_actions(step, job) return jobs
def execute(self, trans, progress, invocation, step): tool = trans.app.toolbox.get_tool(step.tool_id, tool_version=step.tool_version) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[RUNTIME_STEP_META_STATE_KEY] collections_to_match = self._find_collections_to_match(tool, progress, step) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections(collections_to_match) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [None] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy(execution_state.inputs) expected_replacement_keys = set(step.input_connections_by_name.keys()) found_replacement_keys = set() # Connect up def callback(input, prefixed_name, **kwargs): replacement = NO_REPLACEMENT if isinstance(input, DataToolParameter) or isinstance(input, DataCollectionToolParameter): if iteration_elements and prefixed_name in iteration_elements: if isinstance(input, DataToolParameter): # Pull out dataset instance from element. replacement = iteration_elements[prefixed_name].dataset_instance if hasattr(iteration_elements[prefixed_name], u'element_identifier') and iteration_elements[prefixed_name].element_identifier: replacement.element_identifier = iteration_elements[prefixed_name].element_identifier else: # If collection - just use element model object. replacement = iteration_elements[prefixed_name] else: replacement = progress.replacement_for_tool_input(step, input, prefixed_name) else: replacement = progress.replacement_for_tool_input(step, input, prefixed_name) if replacement is not NO_REPLACEMENT: found_replacement_keys.add(prefixed_name) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values(tool.inputs, execution_state.inputs, callback, no_replacement_value=NO_REPLACEMENT) except KeyError as k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException(message) unmatched_input_connections = expected_replacement_keys - found_replacement_keys if unmatched_input_connections: log.warn("Failed to use input connections for inputs [%s]" % unmatched_input_connections) param_combinations.append(execution_state.inputs) try: execution_tracker = execute( trans=self.trans, tool=tool, param_combinations=param_combinations, history=invocation.history, collection_info=collection_info, workflow_invocation_uuid=invocation.uuid.hex ) except ToolInputsNotReadyException: delayed_why = "tool [%s] inputs are not ready, this special tool requires inputs to be ready" % tool.id raise DelayedWorkflowEvaluation(why=delayed_why) if collection_info: step_outputs = dict(execution_tracker.implicit_collections) else: step_outputs = dict(execution_tracker.output_datasets) step_outputs.update(execution_tracker.output_collections) progress.set_step_outputs(step, step_outputs) jobs = execution_tracker.successful_jobs for job in jobs: self._handle_post_job_actions(step, job, invocation.replacement_dict) if execution_tracker.execution_errors: failed_count = len(execution_tracker.execution_errors) success_count = len(execution_tracker.successful_jobs) all_count = failed_count + success_count message = "Failed to create %d out of %s job(s) for workflow step." % (failed_count, all_count) raise Exception(message) return jobs
def execute( self, trans, progress, invocation, step ): tool = trans.app.toolbox.get_tool( step.tool_id, tool_version=step.tool_version ) tool_state = step.state # Not strictly needed - but keep Tool state clean by stripping runtime # metadata parameters from it. if RUNTIME_STEP_META_STATE_KEY in tool_state.inputs: del tool_state.inputs[ RUNTIME_STEP_META_STATE_KEY ] collections_to_match = self._find_collections_to_match( tool, progress, step ) # Have implicit collections... if collections_to_match.has_collections(): collection_info = self.trans.app.dataset_collections_service.match_collections( collections_to_match ) else: collection_info = None param_combinations = [] if collection_info: iteration_elements_iter = collection_info.slice_collections() else: iteration_elements_iter = [ None ] for iteration_elements in iteration_elements_iter: execution_state = tool_state.copy() # TODO: Move next step into copy() execution_state.inputs = make_dict_copy( execution_state.inputs ) # Connect up def callback( input, prefixed_name, **kwargs ): replacement = NO_REPLACEMENT if isinstance( input, DataToolParameter ) or isinstance( input, DataCollectionToolParameter ): if iteration_elements and prefixed_name in iteration_elements: if isinstance( input, DataToolParameter ): # Pull out dataset instance from element. replacement = iteration_elements[ prefixed_name ].dataset_instance if hasattr(iteration_elements[ prefixed_name ], u'element_identifier') and iteration_elements[ prefixed_name ].element_identifier: replacement.element_identifier = iteration_elements[ prefixed_name ].element_identifier else: # If collection - just use element model object. replacement = iteration_elements[ prefixed_name ] else: replacement = progress.replacement_for_tool_input( step, input, prefixed_name ) else: replacement = progress.replacement_for_tool_input( step, input, prefixed_name ) return replacement try: # Replace DummyDatasets with historydatasetassociations visit_input_values( tool.inputs, execution_state.inputs, callback, no_replacement_value=NO_REPLACEMENT ) except KeyError as k: message_template = "Error due to input mapping of '%s' in '%s'. A common cause of this is conditional outputs that cannot be determined until runtime, please review your workflow." message = message_template % (tool.name, k.message) raise exceptions.MessageException( message ) param_combinations.append( execution_state.inputs ) try: execution_tracker = execute( trans=self.trans, tool=tool, param_combinations=param_combinations, history=invocation.history, collection_info=collection_info, workflow_invocation_uuid=invocation.uuid.hex ) except ToolInputsNotReadyException: delayed_why = "tool [%s] inputs are not ready, this special tool requires inputs to be ready" % tool.id raise DelayedWorkflowEvaluation(why=delayed_why) if collection_info: step_outputs = dict( execution_tracker.implicit_collections ) else: step_outputs = dict( execution_tracker.output_datasets ) step_outputs.update( execution_tracker.output_collections ) progress.set_step_outputs( step, step_outputs ) jobs = execution_tracker.successful_jobs for job in jobs: self._handle_post_job_actions( step, job, invocation.replacement_dict ) if execution_tracker.execution_errors: failed_count = len(execution_tracker.execution_errors) success_count = len(execution_tracker.successful_jobs) all_count = failed_count + success_count message = "Failed to create %d out of %s job(s) for workflow step." % (failed_count, all_count) raise Exception(message) return jobs