def test_dot_4(self): w = WorkflowBuilder("sbmf") w.input("inp", Array(str)) w.input("inp2", Array(str)) w.input("inp3", Array(str)) w.input("inp4", Array(str)) step = w.step( "dotTool", SingleTestTool(inputs=w.inp, input2=w.inp2, input3=w.inp3, input4=w.inp4), scatter=ScatterDescription( fields=["inputs", "input2", "input3", "input4"], method=ScatterMethods.dot, ), ) outp = wdl.translate_step_node( step, "A.SingleTestTool", {}, {"inp", "inp2", "inp3", "inp4"} ) expected = """\ scatter (Q in zip(inp, zip(inp2, zip(inp3, inp4)))) { call A.SingleTestTool as dotTool { input: inputs=Q.left, input2=Q.right.left, input3=Q.right.right.left, input4=Q.right.right.right } }""" self.assertEqual(expected, outp.get_string(indent=0))
def constructor(self): ins = self.inner.tool_inputs() inkeys = set(i.id() for i in ins) invalid_keys = self.fields - inkeys if len(invalid_keys) > 0: raise Exception( f"Couldn't create BatchRunTool from fields {', '.join(invalid_keys)} " f"as they do not exist on '{self.inner.id()}'" ) innode_map = {} for i in ins: intype = i.intype default = i.default if i.id() in self.fields: intype = Array(intype) default = None innode_map[i.id()] = self.input(i.id(), intype, default=default, doc=i.doc) self.step( self.inner.id(), self.inner(**innode_map), scatter=ScatterDescription(list(self.fields), ScatterMethod.dot), ) if isinstance(self.inner, WorkflowBase): # We can do special output_folder grouping for oid, o in self.output_nodes.items(): folders = o.output_folder if self.group_by: if not folders: folders = [] folders.append(self[self.group_by]) self.output( oid, o.datatype, output_folder=folders, output_name=o.output_name ) else: for o in self.inner.tool_outputs(): f = self[self.group_by] if o else None self.output(o.id(), o.outtype, output_folder=f)
def test_scatter_single(self): w = WorkflowBuilder("sbmf") w.input("inp", Array(str)) w.input("inp2", str) step = w.step( "dotTool", SingleTestTool(inputs=w.inp, input2=w.inp2), scatter=ScatterDescription(fields=["inputs"], method=ScatterMethods.dot), ) outp = wdl.translate_step_node(step, "A.SingleTestTool", {}, {"inp", "inp2"}) expected = """\ scatter (i in inp) { call A.SingleTestTool as dotTool { input: inputs=i, input2=inp2 } }""" self.assertEqual(expected, outp.get_string(indent=0))
def test_dot_2_secondary(self): w = WorkflowBuilder("sbmf") w.input("inp", Array(TxtSecondary)) w.input("inp2", Array(str)) step = w.step( "dotTool", MultipleEcho(input1=w.inp, input2=w.inp2), scatter=ScatterDescription( fields=["input1", "input2"], method=ScatterMethods.dot ), ) outp = wdl.translate_step_node(step, "A.SingleTestTool", {}, {"inp", "inp2"}) expected = """\ scatter (Q in zip(transpose([inp, inp_qt]), inp2)) { call A.SingleTestTool as dotTool { input: input1=Q.left[0], input1_qt=Q.left[1], input2=Q.right } }""" self.assertEqual(expected, outp.get_string(indent=0))
def step( self, identifier: str, tool: Tool, scatter: Union[str, ScatterDescription] = None, ignore_missing=False, ): """ Method to do :param identifier: :param tool: :param scatter: Indicate whether a scatter should occur, on what, and how :type scatter: Union[str, ScatterDescription] :param ignore_missing: Don't throw an error if required params are missing from this function :return: """ self.verify_identifier(identifier, tool.id()) if scatter is not None and isinstance(scatter, str): scatter = ScatterDescription( [scatter] if isinstance(scatter, str) else scatter ) # verify scatter if scatter: ins = set(tool.inputs_map().keys()) fields = set(scatter.fields) if any(f not in ins for f in fields): # if there is a field not in the input map, we have a problem extra_keys = ", ".join(f"'{f}'" for f in (fields - ins)) raise Exception( f"Couldn't scatter the field(s) for step '{identifier}' " f"(tool: '{tool}') {extra_keys} as they were not found in the input map" ) tool.workflow = self inputs = tool.inputs_map() connections = tool.connections provided_keys = set(connections.keys()) all_keys = set(inputs.keys()) required_keys = set( i for i, v in inputs.items() if not v.input_type.optional and not v.default ) if not provided_keys.issubset(all_keys): unrecparams = ", ".join(provided_keys - all_keys) tags = ", ".join([f"in.{i}" for i in all_keys]) raise Exception( f"Unrecognised parameters {unrecparams} when creating '{identifier}' ({tool.id()}). " f"Expected types: {tags}" ) if not ignore_missing and not required_keys.issubset(provided_keys): missing = ", ".join(f"'{i}'" for i in (required_keys - provided_keys)) raise Exception( f"Missing the parameters {missing} when creating '{identifier}' ({tool.id()})" ) stp = StepNode(self, identifier=identifier, tool=tool, scatter=scatter) added_edges = [] for (k, v) in connections.items(): if is_python_primitive(v): inp_identifier = f"{identifier}_{k}" referencedtype = copy.copy(inputs[k].input_type) parsed_type = get_instantiated_type(v) if parsed_type and not referencedtype.can_receive_from(parsed_type): raise TypeError( f"The type {parsed_type.id()} inferred from the value '{v}' is not " f"compatible with the '{identifier}.{k}' type: {referencedtype.id()}" ) referencedtype.optional = True v = self.input(inp_identifier, referencedtype, default=v) if v is None: inp_identifier = f"{identifier}_{k}" v = self.input(inp_identifier, inputs[k].input_type, default=v) verifiedsource = verify_or_try_get_source(v) if isinstance(verifiedsource, list): for vv in verifiedsource: added_edges.append(stp._add_edge(k, vv)) else: added_edges.append(stp._add_edge(k, verifiedsource)) for e in added_edges: si = e.finish.sources[e.ftag] if e.ftag else first_value(e.finish.sources) self.has_multiple_inputs = self.has_multiple_inputs or si.multiple_inputs self.has_scatter = self.has_scatter or scatter is not None self.has_subworkflow = self.has_subworkflow or isinstance(tool, Workflow) self.nodes[identifier] = stp self.step_nodes[identifier] = stp return stp
def step( self, identifier: str, tool: Tool, scatter: Union[str, List[str], ScatterDescription] = None, ignore_missing=False, doc: str = None, ): """ Construct a step on this workflow. :param identifier: The identifier of the step, unique within the workflow. :param tool: The tool that should run for this step. :param scatter: Indicate whether a scatter should occur, on what, and how. :type scatter: Union[str, ScatterDescription] :param ignore_missing: Don't throw an error if required params are missing from this function :return: """ self.verify_identifier(identifier, tool.id()) if scatter is not None and not isinstance(scatter, ScatterDescription): fields = None if isinstance(scatter, str): fields = [scatter] elif isinstance(scatter, list): fields = scatter else: raise Exception( f"Couldn't scatter with field '{scatter}' ({type(scatter)}" ) scatter = ScatterDescription(fields, method=ScatterMethods.dot) # verify scatter if scatter: ins = set(tool.inputs_map().keys()) fields = set(scatter.fields) if any(f not in ins for f in fields): # if there is a field not in the input map, we have a problem extra_keys = ", ".join(f"'{f}'" for f in (fields - ins)) raise Exception( f"Couldn't scatter the field(s) {extra_keys} for step '{identifier}' " f"as they are not inputs to the tool '{tool.id()}'") tool.workflow = self inputs = tool.inputs_map() connections = tool.connections provided_keys = set(connections.keys()) all_keys = set(inputs.keys()) required_keys = set( # The input is optional if it's optional or has default) i for i, v in inputs.items() if not (v.intype.optional or v.default is not None)) if not provided_keys.issubset(all_keys): unrecparams = ", ".join(provided_keys - all_keys) tags = ", ".join([f"in.{i}" for i in all_keys]) raise Exception( f"Unrecognised parameters {unrecparams} when creating '{identifier}' ({tool.id()}). " f"Expected types: {tags}") if not ignore_missing and not required_keys.issubset(provided_keys): missing = ", ".join(f"'{i}'" for i in (required_keys - provided_keys)) raise Exception( f"Missing the parameters {missing} when creating '{identifier}' ({tool.id()})" ) d = doc if isinstance(doc, DocumentationMeta) else DocumentationMeta( doc=doc) stp = StepNode(self, identifier=identifier, tool=tool, scatter=scatter, doc=d) added_edges = [] for (k, v) in connections.items(): isfilename = isinstance(v, Filename) if is_python_primitive(v) or isfilename: inp_identifier = f"{identifier}_{k}" referencedtype = copy.copy( inputs[k].intype) if not isfilename else v parsed_type = get_instantiated_type(v) if parsed_type and not referencedtype.can_receive_from( parsed_type): raise TypeError( f"The type {parsed_type.id()} inferred from the value '{v}' is not " f"compatible with the '{identifier}.{k}' type: {referencedtype.id()}" ) referencedtype.optional = True indoc = inputs[k].doc indoc.quality = InputQualityType.configuration v = self.input( inp_identifier, referencedtype, default=v.generated_filename() if isfilename else v, doc=indoc, ) if v is None: inp_identifier = f"{identifier}_{k}" v = self.input( inp_identifier, inputs[k].intype, default=v, doc=InputDocumentation( doc=None, quality=InputQualityType.configuration), ) verifiedsource = verify_or_try_get_source(v) if isinstance(verifiedsource, list): for vv in verifiedsource: added_edges.append(stp._add_edge(k, vv)) else: added_edges.append(stp._add_edge(k, verifiedsource)) for e in added_edges: si = e.finish.sources[e.ftag] if e.ftag else first_value( e.finish.sources) self.has_multiple_inputs = self.has_multiple_inputs or si.multiple_inputs self.has_scatter = self.has_scatter or scatter is not None self.has_subworkflow = self.has_subworkflow or isinstance( tool, Workflow) self.nodes[identifier] = stp self.step_nodes[identifier] = stp return stp