Exemplo n.º 1
0
    def __connect_workflow_steps(self, steps, steps_by_external_id):
        """ Second pass to deal with connections between steps.

        Create workflow connection objects using externally specified ids
        using during creation or update.
        """
        for step in steps:
            # Input connections
            for input_name, conn_list in step.temp_input_connections.items():
                if not conn_list:
                    continue
                if not isinstance(conn_list,
                                  list):  # Older style singleton connection
                    conn_list = [conn_list]
                for conn_dict in conn_list:
                    if 'output_name' not in conn_dict or 'id' not in conn_dict:
                        template = "Invalid connection [%s] - must be dict with output_name and id fields."
                        message = template % conn_dict
                        raise exceptions.MessageException(message)
                    conn = model.WorkflowStepConnection()
                    conn.input_step = step
                    conn.input_name = input_name
                    conn.output_name = conn_dict['output_name']
                    conn.output_step = steps_by_external_id[conn_dict['id']]

                    input_subworkflow_step_index = conn_dict.get(
                        'input_subworkflow_step_id', None)
                    if input_subworkflow_step_index is not None:
                        conn.input_subworkflow_step = step.subworkflow.step_by_index(
                            input_subworkflow_step_index)

            del step.temp_input_connections
Exemplo n.º 2
0
def yaml_to_model(has_dict, id_offset=100):
    if isinstance(has_dict, str):
        has_dict = yaml.safe_load(has_dict)

    workflow = model.Workflow()
    workflow.steps = []
    for i, step in enumerate(has_dict.get("steps", [])):
        workflow_step = model.WorkflowStep()
        if "order_index" not in step:
            step["order_index"] = i
        if "id" not in step:
            # Fixed Offset ids just to test against assuption order_index != id
            step["id"] = id_offset
            id_offset += 1
        step_type = step.get("type", None)
        assert step_type is not None

        if step_type == "subworkflow":
            subworkflow_dict = step["subworkflow"]
            del step["subworkflow"]
            subworkflow = yaml_to_model(subworkflow_dict, id_offset=id_offset)
            step["subworkflow"] = subworkflow
            id_offset += len(subworkflow.steps)

        for key, value in step.items():
            if key == "input_connections":
                raise NotImplementedError()
            if key == "inputs":
                inputs = []
                for input_name, input_def in value.items():
                    step_input = model.WorkflowStepInput(workflow_step)
                    step_input.name = input_name
                    connections = []
                    for conn_dict in input_def.get("connections", []):
                        conn = model.WorkflowStepConnection()
                        for conn_key, conn_value in conn_dict.items():
                            if conn_key == "@output_step":
                                target_step = workflow.steps[conn_value]
                                conn_value = target_step
                                conn_key = "output_step"
                            if conn_key == "@input_subworkflow_step":
                                conn_value = step["subworkflow"].step_by_index(
                                    conn_value)
                                conn_key = "input_subworkflow_step"
                            setattr(conn, conn_key, conn_value)
                        connections.append(conn)
                    step_input.connections = connections
                    inputs.append(step_input)
                value = inputs
            if key == "workflow_outputs":
                value = [
                    partial(_dict_to_workflow_output, workflow_step)(_)
                    for _ in value
                ]
            setattr(workflow_step, key, value)
        workflow.steps.append(workflow_step)

    return workflow
Exemplo n.º 3
0
    def test_connect_tool_output(self):
        self._setup_workflow(TEST_WORKFLOW_YAML)
        hda = model.HistoryDatasetAssociation()

        progress = self._new_workflow_progress()
        progress.set_step_outputs(self._invocation_step(2), {"out1": hda})

        conn = model.WorkflowStepConnection()
        conn.output_name = "out1"
        conn.output_step = self._step(2)
        assert progress.replacement_for_connection(conn) is hda
Exemplo n.º 4
0
    def test_connect_data_input(self):
        self._setup_workflow(TEST_WORKFLOW_YAML)
        hda = model.HistoryDatasetAssociation()

        self.inputs_by_step_id = {100: hda}
        progress = self._new_workflow_progress()
        progress.set_outputs_for_input(self._invocation_step(0))

        conn = model.WorkflowStepConnection()
        conn.output_name = "output"
        conn.output_step = self._step(0)
        assert progress.replacement_for_connection(conn) is hda
Exemplo n.º 5
0
def extract_steps(trans,
                  history=None,
                  job_ids=None,
                  dataset_ids=None,
                  dataset_collection_ids=None,
                  dataset_names=None,
                  dataset_collection_names=None):
    # Ensure job_ids and dataset_ids are lists (possibly empty)
    if job_ids is None:
        job_ids = []
    elif type(job_ids) is not list:
        job_ids = [job_ids]
    if dataset_ids is None:
        dataset_ids = []
    elif type(dataset_ids) is not list:
        dataset_ids = [dataset_ids]
    if dataset_collection_ids is None:
        dataset_collection_ids = []
    elif type(dataset_collection_ids) is not list:
        dataset_collection_ids = [dataset_collection_ids]
    # Convert both sets of ids to integers
    job_ids = [int(_) for _ in job_ids]
    dataset_ids = [int(_) for _ in dataset_ids]
    dataset_collection_ids = [int(_) for _ in dataset_collection_ids]
    # Find each job, for security we (implicitly) check that they are
    # associated with a job in the current history.
    summary = WorkflowSummary(trans, history)
    jobs = summary.jobs
    steps = []
    hid_to_output_pair = {}
    # Input dataset steps
    for i, hid in enumerate(dataset_ids):
        step = model.WorkflowStep()
        step.type = 'data_input'
        if dataset_names:
            name = dataset_names[i]
        else:
            name = "Input Dataset"
        step.tool_inputs = dict(name=name)
        hid_to_output_pair[hid] = (step, 'output')
        steps.append(step)
    for i, hid in enumerate(dataset_collection_ids):
        step = model.WorkflowStep()
        step.type = 'data_collection_input'
        if hid not in summary.collection_types:
            raise exceptions.RequestParameterInvalidException(
                "hid %s does not appear to be a collection" % hid)
        collection_type = summary.collection_types[hid]
        if dataset_collection_names:
            name = dataset_collection_names[i]
        else:
            name = "Input Dataset Collection"
        step.tool_inputs = dict(name=name, collection_type=collection_type)
        hid_to_output_pair[hid] = (step, 'output')
        steps.append(step)
    # Tool steps
    for job_id in job_ids:
        if job_id not in summary.job_id2representative_job:
            log.warning("job_id %s not found in job_id2representative_job %s" %
                        (job_id, summary.job_id2representative_job))
            raise AssertionError(
                "Attempt to create workflow with job not connected to current history"
            )
        job = summary.job_id2representative_job[job_id]
        tool_inputs, associations = step_inputs(trans, job)
        step = model.WorkflowStep()
        step.type = 'tool'
        step.tool_id = job.tool_id
        step.tool_version = job.tool_version
        step.tool_inputs = tool_inputs
        # NOTE: We shouldn't need to do two passes here since only
        #       an earlier job can be used as an input to a later
        #       job.
        for other_hid, input_name in associations:
            if job in summary.implicit_map_jobs:
                an_implicit_output_collection = jobs[job][0][1]
                input_collection = an_implicit_output_collection.find_implicit_input_collection(
                    input_name)
                if input_collection:
                    other_hid = input_collection.hid
                else:
                    log.info("Cannot find implicit input collection for %s" %
                             input_name)
            if other_hid in hid_to_output_pair:
                other_step, other_name = hid_to_output_pair[other_hid]
                conn = model.WorkflowStepConnection()
                conn.input_step = step
                conn.input_name = input_name
                # Should always be connected to an earlier step
                conn.output_step = other_step
                conn.output_name = other_name
        steps.append(step)
        # Store created dataset hids
        for assoc in (job.output_datasets +
                      job.output_dataset_collection_instances):
            assoc_name = assoc.name
            if ToolOutputCollectionPart.is_named_collection_part_name(
                    assoc_name):
                continue
            if job in summary.implicit_map_jobs:
                hid = None
                for implicit_pair in jobs[job]:
                    query_assoc_name, dataset_collection = implicit_pair
                    if query_assoc_name == assoc_name:
                        hid = dataset_collection.hid
                if hid is None:
                    template = "Failed to find matching implicit job - job id is %s, implicit pairs are %s, assoc_name is %s."
                    message = template % (job.id, jobs[job], assoc_name)
                    log.warning(message)
                    raise Exception("Failed to extract job.")
            else:
                if hasattr(assoc, "dataset"):
                    hid = assoc.dataset.hid
                else:
                    hid = assoc.dataset_collection_instance.hid
            hid_to_output_pair[hid] = (step, assoc.name)
    return steps
Exemplo n.º 6
0
 def connection(**kwds):
     conn = model.WorkflowStepConnection()
     for key, value in kwds.iteritems():
         setattr(conn, key, value)
     return conn
Exemplo n.º 7
0
 def _workflow_from_dict(self, trans, data, source=None):
     """
     RPARK: copied from galaxy.webapps.galaxy.controllers.workflows.py
     Creates a workflow from a dict. Created workflow is stored in the database and returned.
     """
     # Put parameters in workflow mode
     trans.workflow_building_mode = True
     # Create new workflow from incoming dict
     workflow = model.Workflow()
     # If there's a source, put it in the workflow name.
     if source:
         name = "%s (imported from %s)" % (data['name'], source)
     else:
         name = data['name']
     workflow.name = name
     # Assume no errors until we find a step that has some
     workflow.has_errors = False
     # Create each step
     steps = []
     # The editor will provide ids for each step that we don't need to save,
     # but do need to use to make connections
     steps_by_external_id = {}
     # Keep track of tools required by the workflow that are not available in
     # the local Galaxy instance.  Each tuple in the list of missing_tool_tups
     # will be ( tool_id, tool_name, tool_version ).
     missing_tool_tups = []
     # First pass to build step objects and populate basic values
     for key, step_dict in data['steps'].iteritems():
         # Create the model class for the step
         step = model.WorkflowStep()
         steps.append(step)
         steps_by_external_id[step_dict['id']] = step
         # FIXME: Position should be handled inside module
         step.position = step_dict['position']
         module = module_factory.from_dict(trans, step_dict, secure=False)
         if module.type == 'tool' and module.tool is None:
             # A required tool is not available in the local Galaxy instance.
             missing_tool_tup = (step_dict['tool_id'], step_dict['name'],
                                 step_dict['tool_version'])
             if missing_tool_tup not in missing_tool_tups:
                 missing_tool_tups.append(missing_tool_tup)
         module.save_to_step(step)
         if step.tool_errors:
             workflow.has_errors = True
         # Stick this in the step temporarily
         step.temp_input_connections = step_dict['input_connections']
         # Save step annotation.
         #annotation = step_dict[ 'annotation' ]
         #if annotation:
         #annotation = sanitize_html( annotation, 'utf-8', 'text/html' )
         # ------------------------------------------ #
         # RPARK REMOVING: user annotation b/c of API
         #self.add_item_annotation( trans.sa_session, trans.get_user(), step, annotation )
         # ------------------------------------------ #
         # Unpack and add post-job actions.
         post_job_actions = step_dict.get('post_job_actions', {})
         for name, pja_dict in post_job_actions.items():
             model.PostJobAction(pja_dict['action_type'], step,
                                 pja_dict['output_name'],
                                 pja_dict['action_arguments'])
     # Second pass to deal with connections between steps
     for step in steps:
         # Input connections
         for input_name, conn_dict in step.temp_input_connections.iteritems(
         ):
             if conn_dict:
                 conn = model.WorkflowStepConnection()
                 conn.input_step = step
                 conn.input_name = input_name
                 conn.output_name = conn_dict['output_name']
                 conn.output_step = steps_by_external_id[conn_dict['id']]
         del step.temp_input_connections
     # Order the steps if possible
     attach_ordered_steps(workflow, steps)
     # Connect up
     stored = model.StoredWorkflow()
     stored.name = workflow.name
     workflow.stored_workflow = stored
     stored.latest_workflow = workflow
     stored.user = trans.user
     # Persist
     trans.sa_session.add(stored)
     trans.sa_session.flush()
     return stored, missing_tool_tups
Exemplo n.º 8
0
def extract_steps(trans,
                  history=None,
                  job_ids=None,
                  dataset_ids=None,
                  dataset_collection_ids=None):
    # Ensure job_ids and dataset_ids are lists (possibly empty)
    if job_ids is None:
        job_ids = []
    elif type(job_ids) is not list:
        job_ids = [job_ids]
    if dataset_ids is None:
        dataset_ids = []
    elif type(dataset_ids) is not list:
        dataset_ids = [dataset_ids]
    if dataset_collection_ids is None:
        dataset_collection_ids = []
    elif type(dataset_collection_ids) is not list:
        dataset_collection_ids = [dataset_collection_ids]
    # Convert both sets of ids to integers
    job_ids = [int(id) for id in job_ids]
    dataset_ids = [int(id) for id in dataset_ids]
    dataset_collection_ids = [int(id) for id in dataset_collection_ids]
    # Find each job, for security we (implicately) check that they are
    # associated witha job in the current history.
    summary = WorkflowSummary(trans, history)
    jobs = summary.jobs
    jobs_by_id = dict((job.id, job) for job in jobs.keys())
    steps = []
    steps_by_job_id = {}
    hid_to_output_pair = {}
    # Input dataset steps
    for hid in dataset_ids:
        step = model.WorkflowStep()
        step.type = 'data_input'
        step.tool_inputs = dict(name="Input Dataset")
        hid_to_output_pair[hid] = (step, 'output')
        steps.append(step)
    for hid in dataset_collection_ids:
        step = model.WorkflowStep()
        step.type = 'data_collection_input'
        if hid not in summary.collection_types:
            raise exceptions.RequestParameterInvalidException(
                "hid %s does not appear to be a collection" % hid)
        collection_type = summary.collection_types[hid]
        step.tool_inputs = dict(name="Input Dataset Collection",
                                collection_type=collection_type)
        hid_to_output_pair[hid] = (step, 'output')
        steps.append(step)
    # Tool steps
    for job_id in job_ids:
        if job_id not in jobs_by_id:
            log.warn("job_id %s not found in jobs_by_id %s" %
                     (job_id, jobs_by_id))
            raise AssertionError(
                "Attempt to create workflow with job not connected to current history"
            )
        job = jobs_by_id[job_id]
        tool_inputs, associations = step_inputs(trans, job)
        step = model.WorkflowStep()
        step.type = 'tool'
        step.tool_id = job.tool_id
        step.tool_inputs = tool_inputs
        # NOTE: We shouldn't need to do two passes here since only
        #       an earlier job can be used as an input to a later
        #       job.
        for other_hid, input_name in associations:
            if job in summary.implicit_map_jobs:
                an_implicit_output_collection = jobs[job][0][1]
                input_collection = an_implicit_output_collection.find_implicit_input_collection(
                    input_name)
                if input_collection:
                    other_hid = input_collection.hid
            if other_hid in hid_to_output_pair:
                other_step, other_name = hid_to_output_pair[other_hid]
                conn = model.WorkflowStepConnection()
                conn.input_step = step
                conn.input_name = input_name
                # Should always be connected to an earlier step
                conn.output_step = other_step
                conn.output_name = other_name
        steps.append(step)
        steps_by_job_id[job_id] = step
        # Store created dataset hids
        for assoc in job.output_datasets:
            if job in summary.implicit_map_jobs:
                hid = None
                for implicit_pair in jobs[job]:
                    query_assoc_name, dataset_collection = implicit_pair
                    if query_assoc_name == assoc.name:
                        hid = dataset_collection.hid
                if hid is None:
                    log.warn("Failed to find matching implicit job.")
                    raise Exception("Failed to extract job.")
            else:
                hid = assoc.dataset.hid
            hid_to_output_pair[hid] = (step, assoc.name)
    return steps