Exemplo n.º 1
0
    def check_inputs_ready(self, tool, trans, incoming, history, execution_cache=None, collection_info=None):
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)

        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections, _, _ = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info)

        tool.check_inputs_ready(inp_data, inp_dataset_collections)
Exemplo n.º 2
0
    def execute(self, tool, trans, incoming=None, set_output_hid=False, overwrite=True, history=None, job_params=None, execution_cache=None, collection_info=None, job_callback=None, **kwargs):
        incoming = incoming or {}
        trans.check_user_activation()

        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)

        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections, preserved_tags, all_permissions = self._collect_inputs(tool, trans, incoming, history, current_user_roles, collection_info)

        # Build name for output datasets based on tool name and input names
        on_text = self._get_on_text(inp_data)

        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = self._wrapped_params(trans, tool, incoming)

        out_data = {}
        input_collections = {k: v[0][0] for k, v in inp_dataset_collections.items()}
        output_collections = OutputCollections(
            trans,
            history,
            tool=tool,
            tool_action=self,
            input_collections=input_collections,
            dataset_collection_elements=kwargs.get("dataset_collection_elements", None),
            on_text=on_text,
            incoming=incoming,
            params=wrapped_params.params,
            job_params=job_params,
            tags=preserved_tags,
        )

        #
        # Create job.
        #
        job, galaxy_session = self._new_job_for_session(trans, tool, history)
        self._produce_outputs(trans, tool, out_data, output_collections, incoming=incoming, history=history, tags=preserved_tags)
        self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections)
        self._record_outputs(job, out_data, output_collections)
        if job_callback:
            job_callback(job)
        job.state = job.states.OK
        trans.sa_session.add(job)

        # Queue the job for execution
        # trans.app.job_manager.job_queue.put( job.id, tool.id )
        # trans.log_event( "Added database job action to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
        log.info("Calling produce_outputs, tool is %s" % tool)
        return job, out_data, history
Exemplo n.º 3
0
    def execute( self, tool, trans, incoming={}, set_output_hid=False, overwrite=True, history=None, job_params=None, mapping_over_collection=False, execution_cache=None, **kwargs ):
        if execution_cache is None:
            execution_cache = ToolExecutionCache(trans)

        current_user_roles = execution_cache.current_user_roles
        history, inp_data, inp_dataset_collections = self._collect_inputs(tool, trans, incoming, history, current_user_roles)

        # Build name for output datasets based on tool name and input names
        on_text = self._get_on_text( inp_data )

        # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
        wrapped_params = self._wrapped_params( trans, tool, incoming )

        out_data = odict()
        input_collections = dict( [ (k, v[0][0]) for k, v in inp_dataset_collections.iteritems() ] )
        output_collections = OutputCollections(
            trans,
            history,
            tool=tool,
            tool_action=self,
            input_collections=input_collections,
            mapping_over_collection=mapping_over_collection,
            on_text=on_text,
            incoming=incoming,
            params=wrapped_params.params,
            job_params=job_params,
        )

        #
        # Create job.
        #
        job, galaxy_session = self._new_job_for_session( trans, tool, history )
        self._produce_outputs( trans, tool, out_data, output_collections, incoming=incoming, history=history )
        self._record_inputs( trans, tool, job, incoming, inp_data, inp_dataset_collections, current_user_roles )
        self._record_outputs( job, out_data, output_collections )
        job.state = job.states.OK
        trans.sa_session.add( job )
        trans.sa_session.flush()  # ensure job.id are available

        # Queue the job for execution
        # trans.app.job_queue.put( job.id, tool.id )
        # trans.log_event( "Added database job action to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
        log.info("Calling produce_outputs, tool is %s" % tool)
        return job, out_data
Exemplo n.º 4
0
def execute(trans,
            tool,
            mapping_params,
            history,
            rerun_remap_job_id=None,
            collection_info=None,
            workflow_invocation_uuid=None,
            invocation_step=None,
            max_num_jobs=None,
            job_callback=None,
            completed_jobs=None):
    """
    Execute a tool and return object containing summary (output data, number of
    failures, etc...).
    """
    if max_num_jobs:
        assert invocation_step is not None
    if rerun_remap_job_id:
        assert invocation_step is None

    all_jobs_timer = ExecutionTimer()
    if invocation_step is None:
        execution_tracker = ToolExecutionTracker(trans, tool, mapping_params,
                                                 collection_info)
    else:
        execution_tracker = WorkflowStepExecutionTracker(
            trans,
            tool,
            mapping_params,
            collection_info,
            invocation_step,
            job_callback=job_callback)
    app = trans.app
    execution_cache = ToolExecutionCache(trans)

    def execute_single_job(execution_slice, completed_job):
        job_timer = ExecutionTimer()
        params = execution_slice.param_combination
        if workflow_invocation_uuid:
            params['__workflow_invocation_uuid__'] = workflow_invocation_uuid
        elif '__workflow_invocation_uuid__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params['__workflow_invocation_uuid__']

        job, result = tool.handle_single_execution(trans, rerun_remap_job_id,
                                                   execution_slice, history,
                                                   execution_cache,
                                                   completed_job)
        if job:
            message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer)
            log.debug(message)
            execution_tracker.record_success(execution_slice, job, result)
        else:
            execution_tracker.record_error(result)

    tool_action = tool.tool_action
    if hasattr(tool_action, "check_inputs_ready"):
        for params in execution_tracker.param_combinations:
            # This will throw an exception if the tool is not ready.
            tool_action.check_inputs_ready(tool, trans, params, history)

    execution_tracker.ensure_implicit_collections_populated(
        history, mapping_params.param_template)
    config = app.config
    burst_at = getattr(config, 'tool_submission_burst_at', 10)
    burst_threads = getattr(config, 'tool_submission_burst_threads', 1)

    job_count = len(execution_tracker.param_combinations)

    jobs_executed = 0
    has_remaining_jobs = False

    if (job_count < burst_at or burst_threads < 2):
        for i, execution_slice in enumerate(
                execution_tracker.new_execution_slices()):
            if max_num_jobs and jobs_executed >= max_num_jobs:
                has_remaining_jobs = True
                break
            else:
                execute_single_job(execution_slice, completed_jobs[i])
    else:
        # TODO: re-record success...
        q = Queue()

        def worker():
            while True:
                params = q.get()
                execute_single_job(params)
                q.task_done()

        for i in range(burst_threads):
            t = Thread(target=worker)
            t.daemon = True
            t.start()

        for i, execution_slice in enumerate(
                execution_tracker.new_execution_slices()):
            if max_num_jobs and jobs_executed >= max_num_jobs:
                has_remaining_jobs = True
                break
            else:
                q.put(execution_slice, completed_jobs[i])
                jobs_executed += 1

        q.join()

    if has_remaining_jobs:
        raise PartialJobExecution(execution_tracker)
    else:
        execution_tracker.finalize_dataset_collections(trans)

    log.debug("Executed %d job(s) for tool %s request: %s" %
              (job_count, tool.id, all_jobs_timer))
    return execution_tracker
Exemplo n.º 5
0
def execute(trans,
            tool,
            param_combinations,
            history,
            rerun_remap_job_id=None,
            collection_info=None,
            workflow_invocation_uuid=None):
    """
    Execute a tool and return object containing summary (output data, number of
    failures, etc...).
    """
    all_jobs_timer = ExecutionTimer()
    execution_tracker = ToolExecutionTracker(tool, param_combinations,
                                             collection_info)
    app = trans.app
    execution_cache = ToolExecutionCache(trans)

    def execute_single_job(params):
        job_timer = ExecutionTimer()
        if workflow_invocation_uuid:
            params['__workflow_invocation_uuid__'] = workflow_invocation_uuid
        elif '__workflow_invocation_uuid__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params['__workflow_invocation_uuid__']
        job, result = tool.handle_single_execution(trans, rerun_remap_job_id,
                                                   params, history,
                                                   collection_info,
                                                   execution_cache)
        if job:
            message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer)
            log.debug(message)
            execution_tracker.record_success(job, result)
        else:
            execution_tracker.record_error(result)

    config = app.config
    burst_at = getattr(config, 'tool_submission_burst_at', 10)
    burst_threads = getattr(config, 'tool_submission_burst_threads', 1)

    tool_action = tool.action
    if hasattr(tool_action, "check_inputs_ready"):
        for params in execution_tracker.param_combinations:
            # This will throw an exception if the tool is not ready.
            tool_action.check_inputs_ready(tool, trans, params, history)

    job_count = len(execution_tracker.param_combinations)
    if job_count < burst_at or burst_threads < 2:
        for params in execution_tracker.param_combinations:
            execute_single_job(params)
    else:
        q = Queue()

        def worker():
            while True:
                params = q.get()
                execute_single_job(params)
                q.task_done()

        for i in range(burst_threads):
            t = Thread(target=worker)
            t.daemon = True
            t.start()

        for params in execution_tracker.param_combinations:
            q.put(params)

        q.join()

    log.debug("Executed %d job(s) for tool %s request: %s" %
              (job_count, tool.id, all_jobs_timer))
    if collection_info:
        history = history or tool.get_default_history_by_trans(trans)
        if len(param_combinations) == 0:
            template = "Attempting to map over an empty collection, this is not yet implemented. colleciton_info is [%s]"
            message = template % collection_info
            log.warn(message)
            raise Exception(message)
        params = param_combinations[0]
        execution_tracker.create_output_collections(trans, history, params)

    return execution_tracker
Exemplo n.º 6
0
def execute( trans, tool, param_combinations, history, rerun_remap_job_id=None, collection_info=None, workflow_invocation_uuid=None ):
    """
    Execute a tool and return object containing summary (output data, number of
    failures, etc...).
    """
    all_jobs_timer = ExecutionTimer()
    execution_tracker = ToolExecutionTracker( tool, param_combinations, collection_info )
    app = trans.app
    execution_cache = ToolExecutionCache(trans)

    def execute_single_job(params):
        job_timer = ExecutionTimer()
        if workflow_invocation_uuid:
            params[ '__workflow_invocation_uuid__' ] = workflow_invocation_uuid
        elif '__workflow_invocation_uuid__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params[ '__workflow_invocation_uuid__' ]

        # If this is a workflow, everything has now been connected so we should validate
        # the state we about to execute one last time. Consider whether tool executions
        # should run this as well.
        if workflow_invocation_uuid:
            messages = tool.check_and_update_param_values( params, trans, update_values=False, allow_workflow_parameters=False )
            if messages:
                execution_tracker.record_error( messages )
                return

        job, result = tool.handle_single_execution( trans, rerun_remap_job_id, params, history, collection_info, execution_cache )
        if job:
            message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer)
            log.debug(message)
            execution_tracker.record_success( job, result )
        else:
            execution_tracker.record_error( result )

    config = app.config
    burst_at = getattr( config, 'tool_submission_burst_at', 10 )
    burst_threads = getattr( config, 'tool_submission_burst_threads', 1 )

    if len(execution_tracker.param_combinations) < burst_at or burst_threads < 2:
        for params in execution_tracker.param_combinations:
            execute_single_job(params)
    else:
        q = Queue()

        def worker():
            while True:
                params = q.get()
                execute_single_job(params)
                q.task_done()

        for i in range(burst_threads):
            t = Thread(target=worker)
            t.daemon = True
            t.start()

        for params in execution_tracker.param_combinations:
            q.put(params)

        q.join()

    log.debug("Executed all jobs for tool request: %s" % all_jobs_timer)
    if collection_info:
        history = history or tool.get_default_history_by_trans( trans )
        params = param_combinations[0]
        execution_tracker.create_output_collections( trans, history, params )

    return execution_tracker
Exemplo n.º 7
0
def execute(trans,
            tool,
            mapping_params,
            history,
            rerun_remap_job_id=None,
            collection_info=None,
            workflow_invocation_uuid=None,
            invocation_step=None,
            max_num_jobs=None,
            job_callback=None,
            completed_jobs=None,
            workflow_resource_parameters=None,
            validate_outputs=False):
    """
    Execute a tool and return object containing summary (output data, number of
    failures, etc...).
    """
    if max_num_jobs is not None:
        assert invocation_step is not None
    if rerun_remap_job_id:
        assert invocation_step is None

    all_jobs_timer = tool.app.execution_timer_factory.get_timer(
        'internals.galaxy.tools.execute.job_batch', BATCH_EXECUTION_MESSAGE)

    if invocation_step is None:
        execution_tracker = ToolExecutionTracker(trans,
                                                 tool,
                                                 mapping_params,
                                                 collection_info,
                                                 completed_jobs=completed_jobs)
    else:
        execution_tracker = WorkflowStepExecutionTracker(
            trans,
            tool,
            mapping_params,
            collection_info,
            invocation_step,
            completed_jobs=completed_jobs)
    execution_cache = ToolExecutionCache(trans)

    def execute_single_job(execution_slice, completed_job):
        job_timer = tool.app.execution_timer_factory.get_timer(
            'internals.galaxy.tools.execute.job_single',
            SINGLE_EXECUTION_SUCCESS_MESSAGE)
        params = execution_slice.param_combination
        if workflow_invocation_uuid:
            params['__workflow_invocation_uuid__'] = workflow_invocation_uuid
        elif '__workflow_invocation_uuid__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params['__workflow_invocation_uuid__']
        if workflow_resource_parameters:
            params[
                '__workflow_resource_params__'] = workflow_resource_parameters
        elif '__workflow_resource_params__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params['__workflow_resource_params__']
        if validate_outputs:
            params['__validate_outputs__'] = True
        job, result = tool.handle_single_execution(trans,
                                                   rerun_remap_job_id,
                                                   execution_slice,
                                                   history,
                                                   execution_cache,
                                                   completed_job,
                                                   collection_info,
                                                   job_callback=job_callback,
                                                   flush_job=False)
        if job:
            log.debug(job_timer.to_str(tool_id=tool.id, job_id=job.id))
            execution_tracker.record_success(execution_slice, job, result)
            # associate dataset instances with the job that creates them
            if result:
                instance_types = (model.HistoryDatasetAssociation,
                                  model.LibraryDatasetDatasetAssociation)
                datasets = [
                    pair[1] for pair in result
                    if type(pair[1]) in instance_types
                ]
                if datasets:
                    job_datasets[job] = datasets
        else:
            execution_tracker.record_error(result)

    tool_action = tool.tool_action
    if hasattr(tool_action, "check_inputs_ready"):
        for params in execution_tracker.param_combinations:
            # This will throw an exception if the tool is not ready.
            tool_action.check_inputs_ready(
                tool,
                trans,
                params,
                history,
                execution_cache=execution_cache,
                collection_info=collection_info,
            )

    execution_tracker.ensure_implicit_collections_populated(
        history, mapping_params.param_template)
    job_count = len(execution_tracker.param_combinations)

    jobs_executed = 0
    has_remaining_jobs = False
    execution_slice = None
    job_datasets = {}  # job: list of dataset instances created by job

    for i, execution_slice in enumerate(
            execution_tracker.new_execution_slices()):
        if max_num_jobs is not None and jobs_executed >= max_num_jobs:
            has_remaining_jobs = True
            break
        else:
            execute_single_job(execution_slice, completed_jobs[i])
            history = execution_slice.history or history
            jobs_executed += 1

    if execution_slice:
        # a side effect of adding datasets to a history is a commit within db_next_hid (even with flush=False).
        history.add_pending_items()
    else:
        # Make sure collections, implicit jobs etc are flushed even if there are no precreated output datasets
        trans.sa_session.flush()

    if job_datasets:
        for job, datasets in job_datasets.items():
            for dataset_instance in datasets:
                dataset_instance.dataset.job = job

    tool_id = tool.id
    for job in execution_tracker.successful_jobs:
        # Put the job in the queue if tracking in memory
        tool.app.job_manager.enqueue(job, tool=tool, flush=False)
        trans.log_event(f"Added job to the job queue, id: {str(job.id)}",
                        tool_id=tool_id)
    trans.sa_session.flush()

    if has_remaining_jobs:
        raise PartialJobExecution(execution_tracker)
    else:
        execution_tracker.finalize_dataset_collections(trans)

    log.debug(all_jobs_timer.to_str(job_count=job_count, tool_id=tool.id))
    return execution_tracker
Exemplo n.º 8
0
def execute(trans,
            tool,
            mapping_params,
            history,
            rerun_remap_job_id=None,
            collection_info=None,
            workflow_invocation_uuid=None,
            invocation_step=None,
            max_num_jobs=None,
            job_callback=None,
            completed_jobs=None,
            workflow_resource_parameters=None):
    """
    Execute a tool and return object containing summary (output data, number of
    failures, etc...).
    """
    if max_num_jobs:
        assert invocation_step is not None
    if rerun_remap_job_id:
        assert invocation_step is None

    all_jobs_timer = ExecutionTimer()
    if invocation_step is None:
        execution_tracker = ToolExecutionTracker(trans, tool, mapping_params,
                                                 collection_info)
    else:
        execution_tracker = WorkflowStepExecutionTracker(
            trans,
            tool,
            mapping_params,
            collection_info,
            invocation_step,
            job_callback=job_callback)
    execution_cache = ToolExecutionCache(trans)

    def execute_single_job(execution_slice, completed_job):
        job_timer = ExecutionTimer()
        params = execution_slice.param_combination
        if workflow_invocation_uuid:
            params['__workflow_invocation_uuid__'] = workflow_invocation_uuid
        elif '__workflow_invocation_uuid__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params['__workflow_invocation_uuid__']
        if workflow_resource_parameters:
            params[
                '__workflow_resource_params__'] = workflow_resource_parameters
        elif '__workflow_resource_params__' in params:
            # Only workflow invocation code gets to set this, ignore user supplied
            # values or rerun parameters.
            del params['__workflow_resource_params__']
        job, result = tool.handle_single_execution(trans, rerun_remap_job_id,
                                                   execution_slice, history,
                                                   execution_cache,
                                                   completed_job,
                                                   collection_info)
        if job:
            message = EXECUTION_SUCCESS_MESSAGE % (tool.id, job.id, job_timer)
            log.debug(message)
            execution_tracker.record_success(execution_slice, job, result)
        else:
            execution_tracker.record_error(result)

    tool_action = tool.tool_action
    if hasattr(tool_action, "check_inputs_ready"):
        for params in execution_tracker.param_combinations:
            # This will throw an exception if the tool is not ready.
            tool_action.check_inputs_ready(
                tool,
                trans,
                params,
                history,
                execution_cache=execution_cache,
                collection_info=collection_info,
            )

    execution_tracker.ensure_implicit_collections_populated(
        history, mapping_params.param_template)
    job_count = len(execution_tracker.param_combinations)

    jobs_executed = 0
    has_remaining_jobs = False

    for i, execution_slice in enumerate(
            execution_tracker.new_execution_slices()):
        if max_num_jobs and jobs_executed >= max_num_jobs:
            has_remaining_jobs = True
            break
        else:
            execute_single_job(execution_slice, completed_jobs[i])

    if has_remaining_jobs:
        raise PartialJobExecution(execution_tracker)
    else:
        execution_tracker.finalize_dataset_collections(trans)

    log.debug("Executed %d job(s) for tool %s request: %s" %
              (job_count, tool.id, all_jobs_timer))
    return execution_tracker