def check_types(self): from janis_core.workflow.workflow import InputNode, StepNode stoolin: TOutput = self.start.outputs()[ self.stag ] if self.stag is not None else first_value(self.start.outputs()) ftoolin: TInput = self.finish.inputs()[ self.ftag ] if self.ftag is not None else first_value(self.finish.inputs()) stype = stoolin.outtype ftype = ftoolin.intype start_is_scattered = ( isinstance(self.start, StepNode) and self.start.scatter is not None ) if start_is_scattered: Logger.log( f"This edge merges the inputs from '{full_dot(self.start, self.stag)}' for " f"'{full_dot(self.finish, self.ftag)}'" ) stype = Array(stype) if self.scatter: if not isinstance(stype, Array): raise Exception( f"Scatter was required for '{self.start.id()}.{self.stag} → '{self.finish.id()}.{self.ftag}' but " f"the input type was {type(stype).__name__} and not an array" ) stype = stype.subtype() source_has_default = ( isinstance(self.start, InputNode) and self.start.default is not None ) # Scatters are handled automatically by the StepTagInput Array unwrapping # Merges are handled automatically by the `start_is_scattered` Array wrap self.compatible_types = ftype.can_receive_from(stype, source_has_default) if not self.compatible_types: if isinstance(ftype, Array) and ftype.subtype().can_receive_from(stype): self.compatible_types = True if not self.compatible_types: s = full_dot(self.start, self.stag) f = full_dot(self.finish, self.ftag) message = ( f"Mismatch of types when joining '{s}' to '{f}': " f"{stoolin.outtype.id()} -/→ {ftoolin.intype.id()}" ) if isinstance(stype, Array) and ftype.can_receive_from(stype.subtype()): message += " (did you forget to SCATTER?)" Logger.critical(message)
def add_source(self, start: Node, stag: Optional[str], should_scatter) -> Edge: """ Add a connection :param start: :param stag: :param should_scatter: :return: """ from janis_core.workflow.workflow import StepNode stype = ( start.outputs()[stag] if stag is not None else first_value(start.outputs()) ).outtype ftype = ( self.finish.inputs()[self.ftag] if self.ftag is not None else first_value(self.finish.inputs()) ).intype start_is_scattered = isinstance(start, StepNode) and start.scatter is not None if start_is_scattered: Logger.log( f"This edge merges the inputs from '{full_dot(start, stag)}' for " f"'{full_dot(self.finish, self.ftag)}'" ) stype = Array(stype) if should_scatter: if not isinstance(stype, Array): raise Exception( f"Scatter was required for '{start.id()}.{stag} → '{self.finish.id()}.{self.ftag}' but " f"the input type was {type(stype).__name__} and not an array" ) stype = stype.subtype() if len(self.source_map) == 1 and start.id() not in self.source_map: self.multiple_inputs = True if not isinstance(ftype, Array): raise Exception( f"Adding multiple inputs to '{self.finish.id()}' and '{ftype.id()}' is not an array" ) if not isinstance(stype, Array) and isinstance(ftype, Array): # https://www.commonwl.org/user_guide/misc/#connect-a-solo-value-to-an-input-that-expects-an-array-of-that-type self.multiple_inputs = True e = Edge(start, stag, self.finish, self.ftag, should_scatter=should_scatter) self.source_map[start.id()] = e return e
def find_or_generate_config(self, identifier, config: CromwellConfiguration, config_path): from janis_assistant.management.configuration import JanisConfiguration jc = JanisConfiguration.manager() if config: self.config = config elif config_path: shutil.copyfile(config_path, self.config_path) elif jc.cromwell.configpath: shutil.copyfile(jc.cromwell.configpath, self.config_path) else: self.config: CromwellConfiguration = jc.template.template.engine_config( EngineType.cromwell, jc) or CromwellConfiguration() if not self.config.system: self.config.system = CromwellConfiguration.System() self.config.system.cromwell_id = identifier self.config.system.cromwell_id_random_suffix = False self.config.system.job_shell = "/bin/sh" if self.config: if self.config.backend: if len(self.config.backend.providers) == 1: cnf: CromwellConfiguration.Backend.Provider = first_value( self.config.backend.providers) if not cnf.config.root: cnf.config.root = self.execution_dir else: self.config.backend = CromwellConfiguration.Backend.with_new_local_exec_dir( self.execution_dir)
def slashed_source(self): n = len(self.source_map) if n == 0: return None elif n == 1: return first_value(self.source_map).source_slashed() else: return [e.source_slashed() for e in self.source_map.values()]
def source(self): n = len(self.source_map) if n == 0: return None elif n == 1: return first_value(self.source_map) else: return list(self.source_map.values())
def returntype(self): out = first_value(self.input_node.outputs()).outtype if self.input_node is not None and self.input_node.default is not None: import copy out = copy.copy(out) out.optional = False return out
def add_source(self, operator: Selector, should_scatter) -> Edge: """ Add a connection :param start: :param stag: :param should_scatter: :return: """ from janis_core.workflow.workflow import StepNode # start: Node, stag: Optional[str] # stype = (start.outputs()[stag] if stag is not None else first_value(start.outputs())).outtype stype = get_instantiated_type(operator.returntype()) ftype = (self.finish.inputs()[self.ftag] if self.ftag is not None else first_value(self.finish.inputs())).intype # start_is_scattered = isinstance(start, StepNode) and start.scatter is not None # # if start_is_scattered: # Logger.log( # f"This edge merges the inputs from '{full_dot(start, stag)}' for " # f"'{full_dot(self.finish, self.ftag)}'" # ) # stype = Array(stype) if should_scatter: if not stype.is_array(): raise Exception( f"Scatter was required for '{operator} → '{self.finish.id()}.{self.ftag}' but " f"the input type was {type(stype).__name__} and not an array" ) stype = get_instantiated_type(stype.subtype()) if len(self.source_map) == 1: # and start.id() not in self.source_map: self.multiple_inputs = True if not ftype.is_array(): raise Exception( f"Adding multiple inputs to '{self.finish.id()}' and '{ftype.id()}' is not an array" ) if not stype.is_array() and ftype.is_array(): # https://www.commonwl.org/user_guide/misc/#connect-a-solo-value-to-an-input-that-expects-an-array-of-that-type self.multiple_inputs = True e = Edge(operator, self.finish, self.ftag, should_scatter=should_scatter) # todo: deal with source_map self.source_map.append(e) return e
def evaluate(self, inputs): resolvedvalues = { k: self.evaluate_arg(v, inputs) for k, v in self.kwargs.items() } values_that_are_lists = { k: v for k, v in resolvedvalues.items() if isinstance(v, list) } inp_combinations: List[dict] = [{}] if len(values_that_are_lists) > 0: l = len(first_value(values_that_are_lists)) list_values_that_are_different = sum( 0 if len(v) == l else 1 for v in values_that_are_lists.values()) if list_values_that_are_different == 0: # dot product inp_combinations = [{ k: v[i] for k, v in values_that_are_lists.items() } for i in range(l)] elif list_values_that_are_different == 1: # cross product inp_combinations = self.generate_combinations_of_input_dicts( values_that_are_lists=list(values_that_are_lists.items())) else: l_lengths = ", ".join( f"{k}={len(v)}" for k, v in values_that_are_lists.items()) raise Exception( "String Formatter evaluation doesn't support scattering for list of " ) evaluated_combinations = [ self.resolve_with_resolved_values(**{ **resolvedvalues, **c }) for c in inp_combinations ] if len(evaluated_combinations) == 0: raise Exception( "Something happened when resolving inputs with input values " + str(inputs)) elif len(evaluated_combinations) == 1: return evaluated_combinations[0] else: return evaluated_combinations
def try_get_outputs_for(self, inpid, wf, inputs, output_dir, description): from janis_assistant.main import WorkflowManager, run_with_outputs if os.path.exists(output_dir): try: wm = WorkflowManager.from_path_get_latest_manager( output_dir, readonly=True ) outs_raw = wm.database.outputsDB.get() outs = { o.id_: o.value or o.new_path for o in outs_raw if o.value or o.new_path } if len(outs) > 0: out_val = first_value(outs) Logger.info( f"Using cached value of transformation ({description}) for {inpid}: {out_val}" ) return out_val Logger.log( f"Didn't get any outputs from previous workflow manager when deriving input {inpid} ({description})" ) except Exception as e: Logger.debug( f"Couldn't get outputs from existing output_path for {inpid}, '{output_dir}' ({description}): {e}" ) outs = run_with_outputs(wf, inputs=inputs, output_dir=output_dir) if not outs or len(outs) < 1: Logger.critical( f"Couldn't get outputs from transformation ({description}) for '{inpid}'" ) return None return first_value(outs)
def check_types(self): from janis_core.workflow.workflow import InputNode, StepNode # stoolin: TOutput = self.start.outputs()[ # self.stag # ] if self.stag is not None else first_value(self.start.outputs()) ftoolin: TInput = (self.finish.inputs()[self.ftag] if self.ftag is not None else first_value(self.finish.inputs())) stype = get_instantiated_type(self.source.returntype()) ftype = get_instantiated_type(ftoolin.intype) if self.scatter: if not stype.is_array(): raise Exception( f"Scatter was required for '{self.source} → '{self.finish.id()}.{self.ftag}' but " f"the input type was {type(stype).__name__} and not an array" ) stype = stype.subtype() # Scatters are handled automatically by the StepTagInput Array unwrapping # Merges are handled automatically by the `start_is_scattered` Array wrap self.compatible_types = ftype.can_receive_from(stype, False) if not self.compatible_types: if ftype.is_array() and ftype.subtype().can_receive_from(stype): self.compatible_types = True if not self.compatible_types: s = str(self.source) f = full_dot(self.finish, self.ftag) message = (f"Mismatch of types when joining '{s}' to '{f}': " f"{stype.id()} -/→ {ftoolin.intype.id()}") if stype.is_array() and ftype.can_receive_from(stype.subtype()): message += " (did you forget to SCATTER?)" Logger.critical(message)
def find_or_generate_config(self, identifier, config: CromwellConfiguration, config_path): from ...data.models.preparedjob import PreparedJob job = PreparedJob.instance() if config: self.config = config elif config_path: shutil.copyfile(config_path, self.config_path) elif job and job.cromwell.config_path: shutil.copyfile(job.cromwell.config_path, self.config_path) else: self.config: CromwellConfiguration = ( job.template.template.engine_config(EngineType.cromwell, job) or CromwellConfiguration()) if not self.config.system: self.config.system = CromwellConfiguration.System() self.config.system.cromwell_id = identifier self.config.system.cromwell_id_random_suffix = False self.config.system.job_shell = "/bin/sh" if self.config: if self.config.backend: if len(self.config.backend.providers) == 1: cnf: CromwellConfiguration.Backend.Provider = first_value( self.config.backend.providers) if not cnf.config.root: cnf.config.root = self.execution_dir else: self.config.backend = CromwellConfiguration.Backend.with_new_local_exec_dir( self.execution_dir)
def step( self, identifier: str, tool: Tool, scatter: Union[str, ScatterDescription] = None, ignore_missing=False, ): """ Method to do :param identifier: :param tool: :param scatter: Indicate whether a scatter should occur, on what, and how :type scatter: Union[str, ScatterDescription] :param ignore_missing: Don't throw an error if required params are missing from this function :return: """ self.verify_identifier(identifier, tool.id()) if scatter is not None and isinstance(scatter, str): scatter = ScatterDescription( [scatter] if isinstance(scatter, str) else scatter ) # verify scatter if scatter: ins = set(tool.inputs_map().keys()) fields = set(scatter.fields) if any(f not in ins for f in fields): # if there is a field not in the input map, we have a problem extra_keys = ", ".join(f"'{f}'" for f in (fields - ins)) raise Exception( f"Couldn't scatter the field(s) for step '{identifier}' " f"(tool: '{tool}') {extra_keys} as they were not found in the input map" ) tool.workflow = self inputs = tool.inputs_map() connections = tool.connections provided_keys = set(connections.keys()) all_keys = set(inputs.keys()) required_keys = set( i for i, v in inputs.items() if not v.input_type.optional and not v.default ) if not provided_keys.issubset(all_keys): unrecparams = ", ".join(provided_keys - all_keys) tags = ", ".join([f"in.{i}" for i in all_keys]) raise Exception( f"Unrecognised parameters {unrecparams} when creating '{identifier}' ({tool.id()}). " f"Expected types: {tags}" ) if not ignore_missing and not required_keys.issubset(provided_keys): missing = ", ".join(f"'{i}'" for i in (required_keys - provided_keys)) raise Exception( f"Missing the parameters {missing} when creating '{identifier}' ({tool.id()})" ) stp = StepNode(self, identifier=identifier, tool=tool, scatter=scatter) added_edges = [] for (k, v) in connections.items(): if is_python_primitive(v): inp_identifier = f"{identifier}_{k}" referencedtype = copy.copy(inputs[k].input_type) parsed_type = get_instantiated_type(v) if parsed_type and not referencedtype.can_receive_from(parsed_type): raise TypeError( f"The type {parsed_type.id()} inferred from the value '{v}' is not " f"compatible with the '{identifier}.{k}' type: {referencedtype.id()}" ) referencedtype.optional = True v = self.input(inp_identifier, referencedtype, default=v) if v is None: inp_identifier = f"{identifier}_{k}" v = self.input(inp_identifier, inputs[k].input_type, default=v) verifiedsource = verify_or_try_get_source(v) if isinstance(verifiedsource, list): for vv in verifiedsource: added_edges.append(stp._add_edge(k, vv)) else: added_edges.append(stp._add_edge(k, verifiedsource)) for e in added_edges: si = e.finish.sources[e.ftag] if e.ftag else first_value(e.finish.sources) self.has_multiple_inputs = self.has_multiple_inputs or si.multiple_inputs self.has_scatter = self.has_scatter or scatter is not None self.has_subworkflow = self.has_subworkflow or isinstance(tool, Workflow) self.nodes[identifier] = stp self.step_nodes[identifier] = stp return stp
def translate_step( step: StepNode, is_nested_tool=False, resource_overrides=Dict[str, str], use_run_ref=True, allow_empty_container=False, container_override=None, ): tool = step.tool if use_run_ref: prefix = "" if is_nested_tool else "tools/" run_ref = prefix + CwlTranslator.tool_filename(tool) else: from janis_core.workflow.workflow import Workflow has_resources_overrides = len(resource_overrides) > 0 if isinstance(tool, Workflow): run_ref = CwlTranslator.translate_workflow_to_all_in_one( tool, with_resource_overrides=has_resources_overrides, allow_empty_container=allow_empty_container, container_override=container_override, ) elif isinstance(tool, CodeTool): run_ref = CwlTranslator.translate_code_tool_internal( tool, allow_empty_container=allow_empty_container, container_override=container_override, ) else: run_ref = CwlTranslator.translate_tool_internal( tool, True, with_resource_overrides=has_resources_overrides, allow_empty_container=allow_empty_container, container_override=container_override, ) cwlstep = cwlgen.WorkflowStep( id=step.id(), run=run_ref, # label=step.step.label, doc=step.doc.doc if step.doc else None, scatter=None, # Filled by StepNode scatterMethod=None, # Filled by StepNode in_=[], out=[], ) cwlstep.out = [ cwlgen.WorkflowStepOutput(id=o.tag) for o in step.tool.tool_outputs() ] ins = step.inputs() for k in ins: inp = ins[k] if k not in step.sources: if inp.intype.optional or inp.default: continue else: raise Exception( f"Error when building connections for cwlstep '{step.id()}', " f"could not find required connection: '{k}'") edge = step.sources[k] ss = edge.slashed_source() link_merge = None if (ss is not None and not isinstance(ss, list) and isinstance(inp.intype, Array)): start = edge.source().start outssval = start.outputs() source_type = (first_value(outssval) if len(outssval) == 1 else outssval[edge.source().stag]).outtype # has scattered = isinstance(start, StepNode) and start.scatter if not isinstance(source_type, Array) and not (isinstance( start, StepNode) and start.scatter): ss = [ss] link_merge = "merge_nested" d = cwlgen.WorkflowStepInput( id=inp.tag, source=ss, linkMerge= link_merge, # this will need to change when edges have multiple source_map valueFrom=None, ) cwlstep.in_.append(d) for r in resource_overrides: cwlstep.in_.append( cwlgen.WorkflowStepInput(id=r, source=resource_overrides[r])) if step.scatter: if len(step.scatter.fields) > 1: cwlstep.scatterMethod = step.scatter.method.cwl() cwlstep.scatter = step.scatter.fields return cwlstep
def translate_step( step: StepNode, is_nested_tool=False, resource_overrides=Dict[str, str], use_run_ref=True, ): tool = step.tool if use_run_ref: run_ref = ("{tool}.cwl" if is_nested_tool else "tools/{tool}.cwl").format(tool=tool.id()) else: from janis_core.workflow.workflow import Workflow has_resources_overrides = len(resource_overrides) > 0 if isinstance(tool, Workflow): run_ref = CwlTranslator.translate_workflow_to_all_in_one( tool, with_resource_overrides=has_resources_overrides) else: run_ref = CwlTranslator.translate_tool_internal( tool, True, with_resource_overrides=has_resources_overrides) cwlstep = cwlgen.WorkflowStep( step_id=step.id(), run=run_ref, # label=step.step.label, doc=step.doc, scatter=None, # Filled by StepNode scatter_method=None, # Filled by StepNode ) cwlstep.out = [ cwlgen.WorkflowStepOutput(output_id=o.tag) for o in step.tool.outputs() ] ins = step.inputs() for k in ins: inp = ins[k] if k not in step.sources: if inp.input_type.optional or inp.default: continue else: raise Exception( f"Error when building connections for cwlstep '{step.id()}', " f"could not find required connection: '{k}'") edge = step.sources[k] ss = edge.slashed_source() link_merge = None if (ss is not None and not isinstance(ss, list) and isinstance(inp.input_type, Array)): start = edge.source().start outssval = start.outputs() source_type = (first_value(outssval) if len(outssval) == 1 else outssval[edge.source().stag]).output_type # has scattered = isinstance(start, StepNode) and start.scatter if not isinstance(source_type, Array) and not (isinstance( start, StepNode) and start.scatter): ss = [ss] link_merge = "merge_nested" d = cwlgen.WorkflowStepInput( input_id=inp.tag, source=ss, link_merge= link_merge, # this will need to change when edges have multiple source_map value_from=None, ) cwlstep.inputs.append(d) for r in resource_overrides: cwlstep.inputs.append( cwlgen.WorkflowStepInput(input_id=r, source=resource_overrides[r])) if step.scatter: if len(step.scatter.fields) > 1: Logger.info( "Discovered more than one scatterable field on cwlstep '{step_id}', " "deciding scatterMethod to be '{method}".format( step_id=step.id(), method=step.scatter.method)) cwlstep.scatterMethod = step.scatter.method.cwl() cwlstep.scatter = step.scatter.fields return cwlstep
def step( self, identifier: str, tool: Tool, scatter: Union[str, List[str], ScatterDescription] = None, ignore_missing=False, doc: str = None, ): """ Construct a step on this workflow. :param identifier: The identifier of the step, unique within the workflow. :param tool: The tool that should run for this step. :param scatter: Indicate whether a scatter should occur, on what, and how. :type scatter: Union[str, ScatterDescription] :param ignore_missing: Don't throw an error if required params are missing from this function :return: """ self.verify_identifier(identifier, tool.id()) if scatter is not None and not isinstance(scatter, ScatterDescription): fields = None if isinstance(scatter, str): fields = [scatter] elif isinstance(scatter, list): fields = scatter else: raise Exception( f"Couldn't scatter with field '{scatter}' ({type(scatter)}" ) scatter = ScatterDescription(fields, method=ScatterMethods.dot) # verify scatter if scatter: ins = set(tool.inputs_map().keys()) fields = set(scatter.fields) if any(f not in ins for f in fields): # if there is a field not in the input map, we have a problem extra_keys = ", ".join(f"'{f}'" for f in (fields - ins)) raise Exception( f"Couldn't scatter the field(s) {extra_keys} for step '{identifier}' " f"as they are not inputs to the tool '{tool.id()}'") tool.workflow = self inputs = tool.inputs_map() connections = tool.connections provided_keys = set(connections.keys()) all_keys = set(inputs.keys()) required_keys = set( # The input is optional if it's optional or has default) i for i, v in inputs.items() if not (v.intype.optional or v.default is not None)) if not provided_keys.issubset(all_keys): unrecparams = ", ".join(provided_keys - all_keys) tags = ", ".join([f"in.{i}" for i in all_keys]) raise Exception( f"Unrecognised parameters {unrecparams} when creating '{identifier}' ({tool.id()}). " f"Expected types: {tags}") if not ignore_missing and not required_keys.issubset(provided_keys): missing = ", ".join(f"'{i}'" for i in (required_keys - provided_keys)) raise Exception( f"Missing the parameters {missing} when creating '{identifier}' ({tool.id()})" ) d = doc if isinstance(doc, DocumentationMeta) else DocumentationMeta( doc=doc) stp = StepNode(self, identifier=identifier, tool=tool, scatter=scatter, doc=d) added_edges = [] for (k, v) in connections.items(): isfilename = isinstance(v, Filename) if is_python_primitive(v) or isfilename: inp_identifier = f"{identifier}_{k}" referencedtype = copy.copy( inputs[k].intype) if not isfilename else v parsed_type = get_instantiated_type(v) if parsed_type and not referencedtype.can_receive_from( parsed_type): raise TypeError( f"The type {parsed_type.id()} inferred from the value '{v}' is not " f"compatible with the '{identifier}.{k}' type: {referencedtype.id()}" ) referencedtype.optional = True indoc = inputs[k].doc indoc.quality = InputQualityType.configuration v = self.input( inp_identifier, referencedtype, default=v.generated_filename() if isfilename else v, doc=indoc, ) if v is None: inp_identifier = f"{identifier}_{k}" v = self.input( inp_identifier, inputs[k].intype, default=v, doc=InputDocumentation( doc=None, quality=InputQualityType.configuration), ) verifiedsource = verify_or_try_get_source(v) if isinstance(verifiedsource, list): for vv in verifiedsource: added_edges.append(stp._add_edge(k, vv)) else: added_edges.append(stp._add_edge(k, verifiedsource)) for e in added_edges: si = e.finish.sources[e.ftag] if e.ftag else first_value( e.finish.sources) self.has_multiple_inputs = self.has_multiple_inputs or si.multiple_inputs self.has_scatter = self.has_scatter or scatter is not None self.has_subworkflow = self.has_subworkflow or isinstance( tool, Workflow) self.nodes[identifier] = stp self.step_nodes[identifier] = stp return stp