Пример #1
0
    def test_generates_workflow_int_inputs(self):

        w = cwlgen.Workflow()
        tool = cwlgen.parse_cwl("test/int_tool.cwl")

        i = cwlgen.workflow.InputParameter('INTEGER', param_type='int')
        o1 = w.add('step', tool, {"INTEGER": i})
        o1['OUTPUT1'].store()

        expected = b"""#!/usr/bin/env cwl-runner

class: Workflow
cwlVersion: v1.0
inputs:
  INTEGER: {id: INTEGER, type: int}
outputs:
  step_OUTPUT1: {id: step_OUTPUT1, outputSource: step/OUTPUT1, type: File}
steps:
  step:
    id: step
    in: {INTEGER: INTEGER}
    out: [OUTPUT1]
    run: test/int_tool.cwl
"""
        generated = self.capture_tempfile(w.export)
        self.assertEqual(expected, generated)
Пример #2
0
    def test_generates_workflow_two_steps(self):

        w = cwlgen.Workflow()
        tool = cwlgen.parse_cwl("test/import_cwl.cwl")

        f = cwlgen.File("input_file")

        o1 = w.add('step-a', tool, {"INPUT1" : f})
        o2 = w.add('step-b', tool, {"INPUT1" : o1['OUTPUT1']})
        o2['OUTPUT1'].store()
        generated = self.capture_tempfile(w.export)
        expected = b"""#!/usr/bin/env cwl-runner

class: Workflow
cwlVersion: v1.0
inputs:
  INPUT1: {id: INPUT1, type: File}
outputs:
  step-b_OUTPUT1: {id: step-b_OUTPUT1, outputSource: step-b/OUTPUT1, type: File}
steps:
  step-a:
    id: step-a
    in: {INPUT1: INPUT1}
    out: [OUTPUT1]
    run: test/import_cwl.cwl
  step-b:
    id: step-b
    in: {INPUT1: step-a/OUTPUT1}
    out: [OUTPUT1]
    run: test/import_cwl.cwl
"""
        self.assertEqual(expected, generated)
Пример #3
0
    def translate_workflow_to_all_in_one(
            cls,
            wf,
            with_resource_overrides=False,
            is_nested_tool=False) -> cwlgen.Workflow:
        from janis_core.workflow.workflow import Workflow

        metadata = wf.metadata
        w = cwlgen.Workflow(
            wf.identifier,
            wf.friendly_name(),
            metadata.documentation,
            cwl_version=CWL_VERSION,
        )

        w.inputs: List[cwlgen.InputParameter] = [
            translate_input(i) for i in wf.input_nodes.values()
        ]

        resource_inputs = []
        if with_resource_overrides:
            resource_inputs = build_resource_override_maps_for_workflow(wf)
            w.inputs.extend(resource_inputs)

        w.steps: List[cwlgen.WorkflowStep] = []

        for s in wf.step_nodes.values():
            resource_overrides = {}
            for r in resource_inputs:
                if not r.id.startswith(s.id()):
                    continue

                resource_overrides[r.id[(len(s.id()) + 1):]] = r.id

            w.steps.append(
                translate_step(
                    s,
                    is_nested_tool=is_nested_tool,
                    resource_overrides=resource_overrides,
                    use_run_ref=False,
                ))

        w.outputs = [translate_output_node(o) for o in wf._outputs]

        w.requirements.append(cwlgen.InlineJavascriptReq())
        w.requirements.append(cwlgen.StepInputExpressionRequirement())

        if wf.has_scatter:
            w.requirements.append(cwlgen.ScatterFeatureRequirement())
        if wf.has_subworkflow:
            w.requirements.append(cwlgen.SubworkflowFeatureRequirement())
        if wf.has_multiple_inputs:
            w.requirements.append(cwlgen.MultipleInputFeatureRequirement())

        return w
Пример #4
0
 def test_inlinejs(self):
     w = cwlgen.Workflow()
     req = cwlgen.InlineJavascriptRequirement(["expression"])
     w.requirements.append(req)
     d = w.get_dict()
     self.assertIn("requirements", d)
     dr = d["requirements"]
     self.assertIn(req.get_class(), dr)
     drr = dr[req.get_class()]
     self.assertIn("expressionLib", drr)
     self.assertEqual(drr["expressionLib"], ["expression"])
Пример #5
0
    def test_add_requirements(self):
        w = cwlgen.Workflow()
        req = cwlgen.InlineJavascriptReq()
        w.requirements.append(req)
        generated = self.capture_tempfile(w.export)
        expected = b"""#!/usr/bin/env cwl-runner

class: Workflow
cwlVersion: v1.0
inputs: {}
outputs: {}
requirements:
  InlineJavascriptRequirement: {}
"""
        self.assertEqual(expected, generated)
Пример #6
0
 def test_workflow_export(self):
     import cwlgen
     w = cwlgen.Workflow("identifier")
     w.export()
Пример #7
0
    def _convert_composite(run, tmpdir, basedir, filename=None):
        """Converts a workflow made up of several steps."""
        inputs = {}
        arguments = {}
        outputs = {}
        consumed_outputs = set()
        steps = []

        input_index = 1
        argument_index = 1

        subprocesses, _ = _recurse_subprocesses(run, 1)

        # preprocess to add dummy outputs in case of output directories
        previous_output_dirs = defaultdict(list)
        for _, subprocess in subprocesses:
            for input in subprocess.inputs:
                entity = input.consumes
                key = (entity.commit.hexsha, entity.path)
                if key not in previous_output_dirs:
                    continue

                for previous_process in previous_output_dirs[key]:
                    previous_process.outputs.append(
                        CommandOutput(produces=entity, create_folder=False))

            for output in subprocess.outputs:
                entity = output.produces
                if not isinstance(entity, Collection):
                    continue

                for e in entity.entities:
                    if e.commit.hexsha != entity.commit.hexsha:
                        continue

                    key = (e.commit.hexsha, e.path)
                    previous_output_dirs[key].append(subprocess)

        # Convert workflow steps
        for i, subprocess in subprocesses:
            tool, path = CWLConverter._convert_step(subprocess, tmpdir,
                                                    basedir)
            step = WorkflowStep('step_{}'.format(i), path)

            for input in subprocess.inputs:
                input_path = input.consumes.path

                sanitized_id = input.sanitized_id
                if input.mapped_to:
                    sanitized_id = 'input_stdin'
                if input_path in inputs:
                    # already used as top-level input elsewhere, reuse
                    step.inputs.append(
                        cwlgen.WorkflowStepInput(input.sanitized_id,
                                                 source=inputs[input_path]))
                elif input_path in outputs:
                    # output of a previous step, refer to it
                    consumed_outputs.add(outputs[input_path][0])
                    step.inputs.append(
                        cwlgen.WorkflowStepInput(input.sanitized_id,
                                                 source='{}/{}'.format(
                                                     outputs[input_path][1],
                                                     outputs[input_path][0])))
                else:
                    # input isn't output and doesn't exist yet, add new
                    inputs[input_path] = 'input_{}'.format(input_index)
                    step.inputs.append(
                        cwlgen.WorkflowStepInput(input.sanitized_id,
                                                 source=inputs[input_path]))
                    input_index += 1

            for argument in subprocess.arguments:
                argument_id = 'argument_{}'.format(argument_index)
                arguments[argument_id] = argument.value
                step.inputs.append(
                    cwlgen.WorkflowStepInput(argument.sanitized_id,
                                             source=argument_id))
                argument_index += 1

            for output in subprocess.outputs:
                sanitized_id = output.sanitized_id

                if output.mapped_to:
                    sanitized_id = 'output_{}'.format(
                        output.mapped_to.stream_type)
                outputs[output.produces.path] = (sanitized_id, step.id)
                step.out.append(cwlgen.WorkflowStepOutput(sanitized_id))

            steps.append(step)

        workflow_object = cwlgen.Workflow(str(uuid4()), cwl_version='v1.0')
        workflow_object.hints = []
        workflow_object.requirements = []

        # check types of paths and add as top level inputs/outputs
        for path, id_ in inputs.items():
            type_ = 'Directory' if os.path.isdir(path) else 'File'
            workflow_object.inputs.append(
                cwlgen.InputParameter(id_,
                                      param_type=type_,
                                      default={
                                          'path':
                                          os.path.abspath(
                                              os.path.join(basedir, path)),
                                          'class':
                                          type_
                                      }))

        for id_, value in arguments.items():
            value, type_ = _get_argument_type(value)
            workflow_object.inputs.append(
                cwlgen.InputParameter(id_, param_type=type_, default=value))

        for index, (path, (id_, step_id)) in enumerate(outputs.items(), 1):
            type_ = 'Directory' if os.path.isdir(path) else 'File'
            workflow_object.outputs.append(
                cwlgen.WorkflowOutputParameter('output_{}'.format(index),
                                               output_source='{}/{}'.format(
                                                   step_id, id_),
                                               param_type=type_))
        workflow_object.steps.extend(steps)
        if not filename:
            filename = 'parent_{}.cwl'.format(uuid4())
        path = os.path.join(tmpdir, filename)
        workflow_object.export(path)

        return workflow_object, path
Пример #8
0
import cwlgen

cwl_workflow = cwlgen.Workflow('', label='', doc='', cwl_version='v1.0')

#print("*** cwlgen ***")
#print(dir(cwlgen))

#print("*** workflow ***")
#print(dir(cwl_workflow))

# add scatter requirement to workflow
cwl_workflow.requirements.append(cwlgen.ScatterFeatureRequirement())

# add multiple inputs requirement to workflow
cwl_workflow.requirements.append(cwlgen.MultipleInputFeatureRequirement())

# add inputs
input = cwlgen.InputParameter('message_array', param_type='string[]')
cwl_workflow.inputs.append(input)

# add outputs
output = cwlgen.WorkflowOutputParameter('output',
                                        output_source='cat_2/output',
                                        param_type='File')
cwl_workflow.outputs.append(output)

# add step 1 (echo)
step1 = cwlgen.WorkflowStep('echo', run='1st-tool.cwl', scatter='message')

#print(dir(step1))
Пример #9
0
def main():
    workflowName = sys.argv[1]
    listOfNames = sys.argv[2:]

    # Get information about workflow from the user
    questions = [
        inquirer.Text(
            'label',
            message=
            "Please provide some information about the workflow you are creating"
        )
    ]
    answers = inquirer.prompt(questions)

    # Initialize the tool we want to build
    cwl_tool = cwlgen.Workflow(workflow_id=os.path.splitext(workflowName)[0],
                               label=answers['label'],
                               cwl_version='v1.1')

    # Parse CLT tools which were provided on the command line to get a list of inputs and outputs
    CLT_Inputs = {}
    CLT_Outputs = {}
    for i in listOfNames:
        with open(os.path.abspath(i), 'r') as cwl_file:
            cwl_dict = yaml.safe_load(cwl_file)
            try:
                if not isinstance(cwl_dict.get('inputs'),
                                  dict) or not isinstance(
                                      cwl_dict.get('outputs'), dict):
                    print(
                        "Your CWL files are not all of the same format. Please use ToolJig to make sure they all"
                        " have the same format.")
                    sys.exit()
                else:
                    CLT_Inputs[i] = cwl_dict['inputs']
                    CLT_Outputs[i] = cwl_dict['outputs']
            except AttributeError:
                pass

    # Declare first step of our Workflow (cwl_tool)
    step = cwlgen.workflow.WorkflowStep(step_id=os.path.splitext(
        listOfNames[0])[0],
                                        run=listOfNames[0])

    # Parse the inputs of the first file to save as Workflow inputs
    workflowInputs = []
    for item in CLT_Inputs[listOfNames[0]]:
        input_Info = cwlgen.workflow.InputParameter(
            param_id=item,
            label=CLT_Inputs[listOfNames[0]][item].get('label'),
            doc=CLT_Inputs[listOfNames[0]][item].get('doc'),
            param_type=CLT_Inputs[listOfNames[0]][item].get('type'))
        cwl_tool.inputs.append(input_Info)
        idToShow = {
            'ID': item,
            'Label': CLT_Inputs[listOfNames[0]][item].get('label'),
            'Type': CLT_Inputs[listOfNames[0]][item].get('type')
        }
        workflowInputs.append(idToShow)
        step_inputs = cwlgen.WorkflowStepInput(input_id=item, source=item)
        step.inputs.append(step_inputs)

    # Get outputs of first step and append it to the whole workflow
    for y in CLT_Outputs[listOfNames[0]]:
        step_outputs = cwlgen.WorkflowStepOutput(output_id=y)
        step.out.append(step_outputs)
    cwl_tool.steps.append(step)

    # LARGE LOOP: Make the steps and designate how inputs and outputs fit together -------------------------------------
    for i in range(0, len(listOfNames)):
        # Get outputs from "i" step that are of the type Directory or File
        prevStepOutputs = CLT_Outputs[listOfNames[i]]
        importantOutputs = []
        for j in prevStepOutputs:
            idToAdd = {'id': j}
            idToAdd.update(prevStepOutputs[j])
            importantOutputs.append(idToAdd)

        # Get inputs from the "i+1" step that are of type Directory or File
        nextInputs = []
        importantInputs = []
        try:
            nextInputs = CLT_Inputs[listOfNames[i + 1]]
            step = cwlgen.workflow.WorkflowStep(step_id=os.path.splitext(
                listOfNames[i + 1])[0],
                                                run=listOfNames[i + 1])
        except:
            # This is at the end, when the last outputs are workflow outputs, designate them as such
            for x in importantOutputs:
                output = cwlgen.workflow.WorkflowOutputParameter(
                    param_id=x.get('id'),
                    doc=x.get('doc'),
                    param_type=x.get('doc'),
                    output_source=os.path.splitext(listOfNames[i])[0])
                cwl_tool.outputs.append(output)

        for k in nextInputs:
            if nextInputs[k]['type'] == 'File' or nextInputs[k][
                    'type'] == 'Directory':
                idToAdd = {'id': k}
                idToAdd.update(nextInputs[k])
                importantInputs.append(idToAdd)

        # Logic for matching inputs and outputs
        if len(importantInputs) == len(importantOutputs) and len(
                importantInputs) == 1:
            step_inputs = cwlgen.WorkflowStepInput(
                input_id=importantOutputs[0].get('id'),
                source=listOfNames[i] + "/" + importantOutputs[0].get('id'))
            step.inputs.append(step_inputs)
        elif len(importantInputs) != len(importantOutputs) or len(
                importantInputs) != 1 or len(importantOutputs) != 1:
            for m in importantInputs:
                # Declare variables ----------------------------------------------
                first_index = 0
                externalInputToName = 'It is an external input that has yet to be referenced'
                previousOutput = 'It is the output of the workflow, but not the most recently previous step'

                # Provide options ----------------------------------------------
                print("Input ",
                      importantInputs.index(m) + 1, "/", len(importantInputs),
                      "of Command Line File ", i + 1, "/", len(listOfNames))
                print(
                    "Your inputs and outputs don't match. Please specify where this input should be retrieved from:",
                    m)
                print("")
                options = ['It is the output of the previous step:']
                for t in importantOutputs:
                    options.append(t)
                    first_index = first_index + 1
                if cwl_tool.inputs:
                    options.append(
                        'It is an external input that already exists:')
                    for y in workflowInputs:
                        options.append(y)
                    captions = [
                        0, first_index + 1,
                        first_index + len(cwl_tool.inputs) + 2
                    ]
                else:
                    captions = [0, first_index + 1
                                ]  # This gets the first line and "other"
                options.append('Other')
                options.append(externalInputToName)
                options.append(previousOutput)
                selection = options[cutie.select(options,
                                                 caption_indices=captions)]

                # Logic for selection ----------------------------------------------
                if selection == externalInputToName:
                    questions = [
                        inquirer.Text(
                            'newID',
                            message="What is the ID of the new input?"),
                        inquirer.Text(
                            'newLabel',
                            message="What is the label of the new input?")
                    ]
                    answers = inquirer.prompt(questions)
                    # add it as a master input
                    input_Info = cwlgen.workflow.InputParameter(
                        param_id=answers.get('newID'),
                        label=answers.get('newLabel'),
                        param_type=m.get('type'))
                    cwl_tool.inputs.append(input_Info)
                    idToShow = {
                        'ID': answers.get('newID'),
                        'Label': answers.get('newLabel'),
                        'Type': m.get('type')
                    }
                    workflowInputs.append(idToShow)
                    # add it as a step input
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=answers.get('newID'),
                        source=answers.get('newID'))
                elif selection == previousOutput:
                    print(
                        "\nPlease select which previous output corresponds to your input:"
                    )
                    listOfAllOutputs = []
                    for o in range(0, i + 1):
                        for output in CLT_Outputs.get(listOfNames[o]):
                            toAdd = {'ID': output, 'From step': listOfNames[o]}
                            toAdd.update(
                                CLT_Outputs.get(listOfNames[o])[output])
                            listOfAllOutputs.append(toAdd)
                    selection = listOfAllOutputs[cutie.select(
                        listOfAllOutputs)]
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=selection['ID'],
                        source=selection['From step'] + "/" + selection['ID'])
                elif selection in workflowInputs:
                    print(selection)
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=selection.get('ID'),
                        source=selection.get('ID'))
                else:
                    step_inputs = cwlgen.WorkflowStepInput(
                        input_id=m.get('id'),
                        source=listOfNames[i] + "/" + selection.get('id'))
                step.inputs.append(step_inputs)

        try:
            for y in CLT_Outputs[listOfNames[i + 1]]:
                step_outputs = cwlgen.WorkflowStepOutput(output_id=y)
                step.out.append(step_outputs)
        except:
            pass
        cwl_tool.steps.append(step)

    cwl_tool.export(workflowName)
Пример #10
0
    def translate_workflow(cls,
                           wf,
                           with_docker=True,
                           with_resource_overrides=False,
                           is_nested_tool=False) -> Tuple[any, Dict[str, any]]:
        from janis_core.workflow.workflow import Workflow

        metadata = wf.metadata
        w = cwlgen.Workflow(wf.id(),
                            wf.friendly_name(),
                            metadata.documentation,
                            cwl_version=CWL_VERSION)

        w.inputs: List[cwlgen.InputParameter] = [
            translate_input(i) for i in wf.input_nodes.values()
        ]

        resource_inputs = []
        if with_resource_overrides:
            resource_inputs = build_resource_override_maps_for_workflow(wf)
            w.inputs.extend(resource_inputs)

        w.steps: List[cwlgen.WorkflowStep] = []

        for s in wf.step_nodes.values():
            resource_overrides = {}
            for r in resource_inputs:
                if not r.id.startswith(s.id()):
                    continue

                resource_overrides[r.id[(len(s.id()) + 1):]] = r.id
            w.steps.append(
                translate_step(
                    s,
                    is_nested_tool=is_nested_tool,
                    resource_overrides=resource_overrides,
                ))

        w.outputs = [
            translate_output_node(o) for o in wf.output_nodes.values()
        ]

        w.requirements.append(cwlgen.InlineJavascriptReq())
        w.requirements.append(cwlgen.StepInputExpressionRequirement())

        if wf.has_scatter:
            w.requirements.append(cwlgen.ScatterFeatureRequirement())
        if wf.has_subworkflow:
            w.requirements.append(cwlgen.SubworkflowFeatureRequirement())
        if wf.has_multiple_inputs:
            w.requirements.append(cwlgen.MultipleInputFeatureRequirement())

        tools = {}
        tools_to_build: Dict[str, Tool] = {
            s.tool.id(): s.tool
            for s in wf.step_nodes.values()
        }
        for t in tools_to_build:
            tool: Tool = tools_to_build[t]
            if isinstance(tool, Workflow):
                wf_cwl, subtools = cls.translate_workflow(
                    tool,
                    is_nested_tool=True,
                    with_docker=with_docker,
                    with_resource_overrides=with_resource_overrides,
                )
                tools[tool.id()] = wf_cwl
                tools.update(subtools)
            elif isinstance(tool, CommandTool):
                tool_cwl = cls.translate_tool_internal(
                    tool,
                    with_docker=with_docker,
                    with_resource_overrides=with_resource_overrides,
                )
                tools[tool.id()] = tool_cwl
            else:
                raise Exception(f"Unknown tool type: '{type(tool)}'")

        return w, tools
Пример #11
0
def initWorkflow():
    workflow = cwlgen.Workflow()
    workflow.requirements.append(cwlgen.SubworkflowFeatureRequirement())
    return workflow
Пример #12
0
def create_workflow(drops, cwl_filename, buffer):
    """
    Create a CWL workflow from a given Physical Graph Template

    A CWL workflow consists of multiple files. A single file describing the
    workflow, and multiple files each describing one step in the workflow. All
    the files are combined into one zip file, so that a single file can be
    downloaded by the user.

    NOTE: CWL only supports workflow steps that are bash shell applications
          Non-BashShellApp nodes are unable to be implemented in CWL
    """

    # search the drops for non-BashShellApp drops,
    # if found, the graph cannot be translated into CWL
    for index, node in enumerate(drops):
        dataType = node.get('dt', '')
        if dataType not in SUPPORTED_CATEGORIES:
            raise Exception('Node {0} has an unsupported category: {1}'.format(
                index, dataType))

    # create list for command line tool description files
    step_files = []

    # create the workflow
    cwl_workflow = cwlgen.Workflow('', label='', doc='', cwl_version='v1.0')

    # create files dictionary
    files = {}

    # look for input and output files in the pg_spec
    for index, node in enumerate(drops):
        command = node.get('command', None)
        dataType = node.get('dt', None)
        outputId = node.get('oid', None)
        outputs = node.get('outputs', [])

        if len(outputs) > 0:
            files[outputs[0]] = "step" + str(index) + "/output_file_0"

    # add steps to the workflow
    for index, node in enumerate(drops):
        dataType = node.get('dt', '')

        if dataType == 'BashShellApp':
            name = node.get('nm', '')
            inputs = node.get('inputs', [])
            outputs = node.get('outputs', [])

            # create command line tool description
            filename = "step" + str(index) + ".cwl"
            contents = create_command_line_tool(node)

            # add contents of command line tool description to list of step files
            step_files.append({"filename": filename, "contents": contents})

            # create step
            step = cwlgen.WorkflowStep("step" + str(index), run=filename)

            # add input to step
            for index, input in enumerate(inputs):
                step.inputs.append(
                    cwlgen.WorkflowStepInput('input_file_' + str(index),
                                             source=files[input]))

            # add output to step
            for index, output in enumerate(outputs):
                step.out.append(
                    cwlgen.WorkflowStepOutput('output_file_' + str(index)))

            # add step to workflow
            cwl_workflow.steps.append(step)

    # put workflow and command line tool description files all together in a zip
    zipObj = ZipFile(buffer, 'w')
    for step_file in step_files:
        zipObj.writestr(step_file["filename"], six.b(step_file["contents"]))
    zipObj.writestr(cwl_filename, six.b(cwl_workflow.export_string()))
    zipObj.close()