def check_inputs_ready(self, tool, trans, incoming, history, execution_cache=None, collection_info=None): if execution_cache is None: execution_cache = ToolExecutionCache(trans) current_user_roles = execution_cache.current_user_roles history, inp_data, inp_dataset_collections, _, _ = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info) tool.check_inputs_ready(inp_data, inp_dataset_collections)
def execute(self, tool, trans, incoming=None, set_output_hid=False, overwrite=True, history=None, job_params=None, execution_cache=None, collection_info=None, job_callback=None, **kwargs): incoming = incoming or {} trans.check_user_activation() if execution_cache is None: execution_cache = ToolExecutionCache(trans) current_user_roles = execution_cache.current_user_roles history, inp_data, inp_dataset_collections, preserved_tags, all_permissions = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info) # Build name for output datasets based on tool name and input names on_text = self._get_on_text(inp_data) # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed wrapped_params = self._wrapped_params(trans, tool, incoming) out_data = {} input_collections = {k: v[0][0] for k, v in inp_dataset_collections.items()} output_collections = OutputCollections( trans, history, tool=tool, tool_action=self, input_collections=input_collections, dataset_collection_elements=kwargs.get("dataset_collection_elements", None), on_text=on_text, incoming=incoming, params=wrapped_params.params, job_params=job_params, tags=preserved_tags, ) # # Create job. # job, galaxy_session = self._new_job_for_session(trans, tool, history) self._produce_outputs(trans, tool, out_data, output_collections, incoming=incoming, history=history, tags=preserved_tags) self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections) self._record_outputs(job, out_data, output_collections) if job_callback: job_callback(job) job.state = job.states.OK trans.sa_session.add(job) # Queue the job for execution # trans.app.job_manager.job_queue.put( job.id, tool.id ) # trans.log_event( "Added database job action to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) log.info("Calling produce_outputs, tool is %s" % tool) return job, out_data, history
def execute( self, tool, trans, incoming={}, set_output_hid=False, overwrite=True, history=None, job_params=None, mapping_over_collection=False, execution_cache=None, **kwargs ): if execution_cache is None: execution_cache = ToolExecutionCache(trans) current_user_roles = execution_cache.current_user_roles history, inp_data, inp_dataset_collections = self._collect_inputs(tool, trans, incoming, history, current_user_roles) # Build name for output datasets based on tool name and input names on_text = self._get_on_text( inp_data ) # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed wrapped_params = self._wrapped_params( trans, tool, incoming ) out_data = odict() input_collections = dict( [ (k, v[0][0]) for k, v in inp_dataset_collections.iteritems() ] ) output_collections = OutputCollections( trans, history, tool=tool, tool_action=self, input_collections=input_collections, mapping_over_collection=mapping_over_collection, on_text=on_text, incoming=incoming, params=wrapped_params.params, job_params=job_params, ) # # Create job. # job, galaxy_session = self._new_job_for_session( trans, tool, history ) self._produce_outputs( trans, tool, out_data, output_collections, incoming=incoming, history=history ) self._record_inputs( trans, tool, job, incoming, inp_data, inp_dataset_collections, current_user_roles ) self._record_outputs( job, out_data, output_collections ) job.state = job.states.OK trans.sa_session.add( job ) trans.sa_session.flush() # ensure job.id are available # Queue the job for execution # trans.app.job_queue.put( job.id, tool.id ) # trans.log_event( "Added database job action to the job queue, id: %s" % str(job.id), tool_id=job.tool_id ) log.info("Calling produce_outputs, tool is %s" % tool) return job, out_data
def execute(trans, tool, mapping_params, history, rerun_remap_job_id=None, collection_info=None, workflow_invocation_uuid=None, invocation_step=None, max_num_jobs=None, job_callback=None, completed_jobs=None): """ Execute a tool and return object containing summary (output data, number of failures, etc...). """ if max_num_jobs: assert invocation_step is not None if rerun_remap_job_id: assert invocation_step is None all_jobs_timer = ExecutionTimer() if invocation_step is None: execution_tracker = ToolExecutionTracker(trans, tool, mapping_params, collection_info) else: execution_tracker = WorkflowStepExecutionTracker( trans, tool, mapping_params, collection_info, invocation_step, job_callback=job_callback) app = trans.app execution_cache = ToolExecutionCache(trans) def execute_single_job(execution_slice, completed_job): job_timer = ExecutionTimer() params = execution_slice.param_combination if workflow_invocation_uuid: params['__workflow_invocation_uuid__'] = workflow_invocation_uuid elif '__workflow_invocation_uuid__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params['__workflow_invocation_uuid__'] job, result = tool.handle_single_execution(trans, rerun_remap_job_id, execution_slice, history, execution_cache, completed_job) if job: message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer) log.debug(message) execution_tracker.record_success(execution_slice, job, result) else: execution_tracker.record_error(result) tool_action = tool.tool_action if hasattr(tool_action, "check_inputs_ready"): for params in execution_tracker.param_combinations: # This will throw an exception if the tool is not ready. tool_action.check_inputs_ready(tool, trans, params, history) execution_tracker.ensure_implicit_collections_populated( history, mapping_params.param_template) config = app.config burst_at = getattr(config, 'tool_submission_burst_at', 10) burst_threads = getattr(config, 'tool_submission_burst_threads', 1) job_count = len(execution_tracker.param_combinations) jobs_executed = 0 has_remaining_jobs = False if (job_count < burst_at or burst_threads < 2): for i, execution_slice in enumerate( execution_tracker.new_execution_slices()): if max_num_jobs and jobs_executed >= max_num_jobs: has_remaining_jobs = True break else: execute_single_job(execution_slice, completed_jobs[i]) else: # TODO: re-record success... q = Queue() def worker(): while True: params = q.get() execute_single_job(params) q.task_done() for i in range(burst_threads): t = Thread(target=worker) t.daemon = True t.start() for i, execution_slice in enumerate( execution_tracker.new_execution_slices()): if max_num_jobs and jobs_executed >= max_num_jobs: has_remaining_jobs = True break else: q.put(execution_slice, completed_jobs[i]) jobs_executed += 1 q.join() if has_remaining_jobs: raise PartialJobExecution(execution_tracker) else: execution_tracker.finalize_dataset_collections(trans) log.debug("Executed %d job(s) for tool %s request: %s" % (job_count, tool.id, all_jobs_timer)) return execution_tracker
def execute(trans, tool, param_combinations, history, rerun_remap_job_id=None, collection_info=None, workflow_invocation_uuid=None): """ Execute a tool and return object containing summary (output data, number of failures, etc...). """ all_jobs_timer = ExecutionTimer() execution_tracker = ToolExecutionTracker(tool, param_combinations, collection_info) app = trans.app execution_cache = ToolExecutionCache(trans) def execute_single_job(params): job_timer = ExecutionTimer() if workflow_invocation_uuid: params['__workflow_invocation_uuid__'] = workflow_invocation_uuid elif '__workflow_invocation_uuid__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params['__workflow_invocation_uuid__'] job, result = tool.handle_single_execution(trans, rerun_remap_job_id, params, history, collection_info, execution_cache) if job: message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer) log.debug(message) execution_tracker.record_success(job, result) else: execution_tracker.record_error(result) config = app.config burst_at = getattr(config, 'tool_submission_burst_at', 10) burst_threads = getattr(config, 'tool_submission_burst_threads', 1) tool_action = tool.action if hasattr(tool_action, "check_inputs_ready"): for params in execution_tracker.param_combinations: # This will throw an exception if the tool is not ready. tool_action.check_inputs_ready(tool, trans, params, history) job_count = len(execution_tracker.param_combinations) if job_count < burst_at or burst_threads < 2: for params in execution_tracker.param_combinations: execute_single_job(params) else: q = Queue() def worker(): while True: params = q.get() execute_single_job(params) q.task_done() for i in range(burst_threads): t = Thread(target=worker) t.daemon = True t.start() for params in execution_tracker.param_combinations: q.put(params) q.join() log.debug("Executed %d job(s) for tool %s request: %s" % (job_count, tool.id, all_jobs_timer)) if collection_info: history = history or tool.get_default_history_by_trans(trans) if len(param_combinations) == 0: template = "Attempting to map over an empty collection, this is not yet implemented. colleciton_info is [%s]" message = template % collection_info log.warn(message) raise Exception(message) params = param_combinations[0] execution_tracker.create_output_collections(trans, history, params) return execution_tracker
def execute( trans, tool, param_combinations, history, rerun_remap_job_id=None, collection_info=None, workflow_invocation_uuid=None ): """ Execute a tool and return object containing summary (output data, number of failures, etc...). """ all_jobs_timer = ExecutionTimer() execution_tracker = ToolExecutionTracker( tool, param_combinations, collection_info ) app = trans.app execution_cache = ToolExecutionCache(trans) def execute_single_job(params): job_timer = ExecutionTimer() if workflow_invocation_uuid: params[ '__workflow_invocation_uuid__' ] = workflow_invocation_uuid elif '__workflow_invocation_uuid__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params[ '__workflow_invocation_uuid__' ] # If this is a workflow, everything has now been connected so we should validate # the state we about to execute one last time. Consider whether tool executions # should run this as well. if workflow_invocation_uuid: messages = tool.check_and_update_param_values( params, trans, update_values=False, allow_workflow_parameters=False ) if messages: execution_tracker.record_error( messages ) return job, result = tool.handle_single_execution( trans, rerun_remap_job_id, params, history, collection_info, execution_cache ) if job: message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer) log.debug(message) execution_tracker.record_success( job, result ) else: execution_tracker.record_error( result ) config = app.config burst_at = getattr( config, 'tool_submission_burst_at', 10 ) burst_threads = getattr( config, 'tool_submission_burst_threads', 1 ) if len(execution_tracker.param_combinations) < burst_at or burst_threads < 2: for params in execution_tracker.param_combinations: execute_single_job(params) else: q = Queue() def worker(): while True: params = q.get() execute_single_job(params) q.task_done() for i in range(burst_threads): t = Thread(target=worker) t.daemon = True t.start() for params in execution_tracker.param_combinations: q.put(params) q.join() log.debug("Executed all jobs for tool request: %s" % all_jobs_timer) if collection_info: history = history or tool.get_default_history_by_trans( trans ) params = param_combinations[0] execution_tracker.create_output_collections( trans, history, params ) return execution_tracker
def execute(trans, tool, mapping_params, history, rerun_remap_job_id=None, collection_info=None, workflow_invocation_uuid=None, invocation_step=None, max_num_jobs=None, job_callback=None, completed_jobs=None, workflow_resource_parameters=None, validate_outputs=False): """ Execute a tool and return object containing summary (output data, number of failures, etc...). """ if max_num_jobs is not None: assert invocation_step is not None if rerun_remap_job_id: assert invocation_step is None all_jobs_timer = tool.app.execution_timer_factory.get_timer( 'internals.galaxy.tools.execute.job_batch', BATCH_EXECUTION_MESSAGE) if invocation_step is None: execution_tracker = ToolExecutionTracker(trans, tool, mapping_params, collection_info, completed_jobs=completed_jobs) else: execution_tracker = WorkflowStepExecutionTracker( trans, tool, mapping_params, collection_info, invocation_step, completed_jobs=completed_jobs) execution_cache = ToolExecutionCache(trans) def execute_single_job(execution_slice, completed_job): job_timer = tool.app.execution_timer_factory.get_timer( 'internals.galaxy.tools.execute.job_single', SINGLE_EXECUTION_SUCCESS_MESSAGE) params = execution_slice.param_combination if workflow_invocation_uuid: params['__workflow_invocation_uuid__'] = workflow_invocation_uuid elif '__workflow_invocation_uuid__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params['__workflow_invocation_uuid__'] if workflow_resource_parameters: params[ '__workflow_resource_params__'] = workflow_resource_parameters elif '__workflow_resource_params__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params['__workflow_resource_params__'] if validate_outputs: params['__validate_outputs__'] = True job, result = tool.handle_single_execution(trans, rerun_remap_job_id, execution_slice, history, execution_cache, completed_job, collection_info, job_callback=job_callback, flush_job=False) if job: log.debug(job_timer.to_str(tool_id=tool.id, job_id=job.id)) execution_tracker.record_success(execution_slice, job, result) # associate dataset instances with the job that creates them if result: instance_types = (model.HistoryDatasetAssociation, model.LibraryDatasetDatasetAssociation) datasets = [ pair[1] for pair in result if type(pair[1]) in instance_types ] if datasets: job_datasets[job] = datasets else: execution_tracker.record_error(result) tool_action = tool.tool_action if hasattr(tool_action, "check_inputs_ready"): for params in execution_tracker.param_combinations: # This will throw an exception if the tool is not ready. tool_action.check_inputs_ready( tool, trans, params, history, execution_cache=execution_cache, collection_info=collection_info, ) execution_tracker.ensure_implicit_collections_populated( history, mapping_params.param_template) job_count = len(execution_tracker.param_combinations) jobs_executed = 0 has_remaining_jobs = False execution_slice = None job_datasets = {} # job: list of dataset instances created by job for i, execution_slice in enumerate( execution_tracker.new_execution_slices()): if max_num_jobs is not None and jobs_executed >= max_num_jobs: has_remaining_jobs = True break else: execute_single_job(execution_slice, completed_jobs[i]) history = execution_slice.history or history jobs_executed += 1 if execution_slice: # a side effect of adding datasets to a history is a commit within db_next_hid (even with flush=False). history.add_pending_items() else: # Make sure collections, implicit jobs etc are flushed even if there are no precreated output datasets trans.sa_session.flush() if job_datasets: for job, datasets in job_datasets.items(): for dataset_instance in datasets: dataset_instance.dataset.job = job tool_id = tool.id for job in execution_tracker.successful_jobs: # Put the job in the queue if tracking in memory tool.app.job_manager.enqueue(job, tool=tool, flush=False) trans.log_event(f"Added job to the job queue, id: {str(job.id)}", tool_id=tool_id) trans.sa_session.flush() if has_remaining_jobs: raise PartialJobExecution(execution_tracker) else: execution_tracker.finalize_dataset_collections(trans) log.debug(all_jobs_timer.to_str(job_count=job_count, tool_id=tool.id)) return execution_tracker
def execute(trans, tool, mapping_params, history, rerun_remap_job_id=None, collection_info=None, workflow_invocation_uuid=None, invocation_step=None, max_num_jobs=None, job_callback=None, completed_jobs=None, workflow_resource_parameters=None): """ Execute a tool and return object containing summary (output data, number of failures, etc...). """ if max_num_jobs: assert invocation_step is not None if rerun_remap_job_id: assert invocation_step is None all_jobs_timer = ExecutionTimer() if invocation_step is None: execution_tracker = ToolExecutionTracker(trans, tool, mapping_params, collection_info) else: execution_tracker = WorkflowStepExecutionTracker( trans, tool, mapping_params, collection_info, invocation_step, job_callback=job_callback) execution_cache = ToolExecutionCache(trans) def execute_single_job(execution_slice, completed_job): job_timer = ExecutionTimer() params = execution_slice.param_combination if workflow_invocation_uuid: params['__workflow_invocation_uuid__'] = workflow_invocation_uuid elif '__workflow_invocation_uuid__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params['__workflow_invocation_uuid__'] if workflow_resource_parameters: params[ '__workflow_resource_params__'] = workflow_resource_parameters elif '__workflow_resource_params__' in params: # Only workflow invocation code gets to set this, ignore user supplied # values or rerun parameters. del params['__workflow_resource_params__'] job, result = tool.handle_single_execution(trans, rerun_remap_job_id, execution_slice, history, execution_cache, completed_job, collection_info) if job: message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer) log.debug(message) execution_tracker.record_success(execution_slice, job, result) else: execution_tracker.record_error(result) tool_action = tool.tool_action if hasattr(tool_action, "check_inputs_ready"): for params in execution_tracker.param_combinations: # This will throw an exception if the tool is not ready. tool_action.check_inputs_ready( tool, trans, params, history, execution_cache=execution_cache, collection_info=collection_info, ) execution_tracker.ensure_implicit_collections_populated( history, mapping_params.param_template) job_count = len(execution_tracker.param_combinations) jobs_executed = 0 has_remaining_jobs = False for i, execution_slice in enumerate( execution_tracker.new_execution_slices()): if max_num_jobs and jobs_executed >= max_num_jobs: has_remaining_jobs = True break else: execute_single_job(execution_slice, completed_jobs[i]) if has_remaining_jobs: raise PartialJobExecution(execution_tracker) else: execution_tracker.finalize_dataset_collections(trans) log.debug("Executed %d job(s) for tool %s request: %s" % (job_count, tool.id, all_jobs_timer)) return execution_tracker