def print_pack(loadingContext: LoadingContext, uri: str,) -> str: """Return a CWL serialization of the CWL document in JSON.""" packed = pack(loadingContext, uri) if len(cast(Sized, packed["$graph"])) > 1: return json_dumps(packed, indent=4) return json_dumps( cast(MutableSequence[CWLObjectType], packed["$graph"])[0], indent=4 )
def print_pack( loadingContext, # type: LoadingContext uri, # type: str ): # type: (...) -> str """Return a CWL serialization of the CWL document in JSON.""" packed = pack(loadingContext, uri) if len(packed["$graph"]) > 1: return json_dumps(packed, indent=4) return json_dumps(packed["$graph"][0], indent=4)
def __eq__(self, actual): expected_json = json_dumps(self.expected, sort_keys=True, indent=2) actual_json = json_dumps(actual, sort_keys=True, indent=2) if expected_json != actual_json: raise AssertionError("".join( difflib.context_diff(expected_json.splitlines(1), actual_json.splitlines(1), fromfile="Expected", tofile="Actual"))) return True
def print_pack( document_loader, # type: Loader processobj, # type: CommentedMap uri, # type: str metadata, # type: Dict[str, Any] ): # type: (...) -> str """Return a CWL serialization of the CWL document in JSON.""" packed = pack(document_loader, processobj, uri, metadata) if len(packed["$graph"]) > 1: return json_dumps(packed, indent=4) return json_dumps(packed["$graph"][0], indent=4)
def create_job(self, builder_job: CWLObjectType, is_output: bool = False) -> CWLObjectType: # TODO customise the file """Generate the new job object with RO specific relative paths.""" copied = copy.deepcopy(builder_job) relativised_input_objecttemp = {} # type: CWLObjectType self._relativise_files(copied) def jdefault(o: Any) -> Dict[Any, Any]: return dict(o) if is_output: rel_path = PurePosixPath(WORKFLOW) / "primary-output.json" else: rel_path = PurePosixPath(WORKFLOW) / "primary-job.json" j = json_dumps(copied, indent=4, ensure_ascii=False, default=jdefault) with self.write_bag_file(str(rel_path)) as file_path: file_path.write(j + "\n") _logger.debug("[provenance] Generated customised job file: %s", rel_path) # Generate dictionary with keys as workflow level input IDs and values # as # 1) for files the relativised location containing hash # 2) for other attributes, the actual value. for key, value in copied.items(): if isinstance(value, MutableMapping): if value.get("class") in ("File", "Directory"): relativised_input_objecttemp[key] = value else: relativised_input_objecttemp[key] = value self.relativised_input_object.update( {k: v for k, v in relativised_input_objecttemp.items() if v}) return self.relativised_input_object
def _setup(self, runtimeContext: RuntimeContext) -> None: if not os.path.exists(self.outdir): os.makedirs(self.outdir) for knownfile in self.pathmapper.files(): p = self.pathmapper.mapper(knownfile) if p.type == "File" and not os.path.isfile(p[0]) and p.staged: raise WorkflowException( "Input file %s (at %s) not found or is not a regular " "file." % (knownfile, self.pathmapper.mapper(knownfile)[0]) ) if "listing" in self.generatefiles: runtimeContext = runtimeContext.copy() runtimeContext.outdir = self.outdir self.generatemapper = self.make_path_mapper( self.generatefiles["listing"], self.builder.outdir, runtimeContext, False, ) if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[job %s] initial work dir %s", self.name, json_dumps( { p: self.generatemapper.mapper(p) for p in self.generatemapper.files() }, indent=4, ), )
def receive_output(self, step, outputparms, final_output_callback, jobout, processStatus): # type: (WorkflowJobStep, List[Dict[str,str]], Callable[[Any, Any], Any], Dict[str,str], str) -> None for i in outputparms: if "id" in i: if i["id"] in jobout: self.state[i["id"]] = WorkflowStateItem( i, jobout[i["id"]], processStatus) else: _logger.error("[%s] Output is missing expected field %s", step.name, i["id"]) processStatus = "permanentFail" if _logger.isEnabledFor(logging.DEBUG): _logger.debug("[%s] produced output %s", step.name, json_dumps(jobout, indent=4)) if processStatus != "success": if self.processStatus != "permanentFail": self.processStatus = processStatus _logger.warning("[%s] completed %s", step.name, processStatus) else: _logger.info("[%s] completed %s", step.name, processStatus) step.completed = True # Release the iterable related to this step to # reclaim memory. step.iterable = None self.made_progress = True completed = sum(1 for s in self.steps if s.completed) if completed == len(self.steps): self.do_output_callback(final_output_callback)
def _write_ro_manifest(self) -> None: # Does not have to be this order, but it's nice to be consistent filename = "manifest.json" createdOn, createdBy = self._self_made() manifest = OrderedDict({ "@context": [ { "@base": "%s%s/" % (self.base_uri, posix_path(METADATA)) }, "https://w3id.org/bundle/context", ], "id": "/", "conformsTo": CWLPROV_VERSION, "manifest": filename, "createdOn": createdOn, "createdBy": createdBy, "authoredBy": self._authored_by(), "aggregates": self._ro_aggregates(), "annotations": self._ro_annotations(), }) json_manifest = json_dumps(manifest, indent=4, ensure_ascii=False) rel_path = str(PurePosixPath(METADATA) / filename) json_manifest += "\n" with self.write_bag_file(rel_path) as manifest_file: manifest_file.write(json_manifest)
def jshead(engine_config: List[str], rootvars: CWLObjectType) -> str: # make sure all the byte strings are converted # to str in `rootvars` dict. return "\n".join(engine_config + [ "var {} = {};".format(k, json_dumps(v, indent=4)) for k, v in rootvars.items() ])
def _convert_stdstreams_to_files( workflowobj: Union[MutableMapping[str, Any], MutableSequence[Union[Dict[str, Any], str, int]], str] ) -> None: if isinstance(workflowobj, MutableMapping): if workflowobj.get("class") == "CommandLineTool": with SourceLine( workflowobj, "outputs", ValidationException, _logger.isEnabledFor(logging.DEBUG), ): outputs = workflowobj.get("outputs", []) if not isinstance(outputs, CommentedSeq): raise ValidationException('"outputs" section is not ' "valid.") for out in workflowobj.get("outputs", []): if not isinstance(out, CommentedMap): raise ValidationException( "Output '{}' is not a valid " "OutputParameter.".format(out)) for streamtype in ["stdout", "stderr"]: if out.get("type") == streamtype: if "outputBinding" in out: raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: filename = workflowobj[streamtype] else: filename = str( hashlib.sha1( # nosec json_dumps(workflowobj, sort_keys=True).encode( "utf-8")).hexdigest()) workflowobj[streamtype] = filename out["type"] = "File" out["outputBinding"] = cmap({"glob": filename}) for inp in workflowobj.get("inputs", []): if inp.get("type") == "stdin": if "inputBinding" in inp: raise ValidationException( "Not allowed to specify inputBinding when" " using stdin shortcut.") if "stdin" in workflowobj: raise ValidationException( "Not allowed to specify stdin path when" " using stdin type shortcut.") else: workflowobj["stdin"] = ("$(inputs.%s.path)" % inp["id"].rpartition("#")[2]) inp["type"] = "File" else: for entry in workflowobj.values(): _convert_stdstreams_to_files(entry) if isinstance(workflowobj, MutableSequence): for entry in workflowobj: _convert_stdstreams_to_files(entry)
def jshead(engine_config, rootvars): # type: (List[str], Dict[str, Any]) -> str # make sure all the byte strings are converted # to str in `rootvars` dict. return "\n".join(engine_config + [ "var {} = {};".format(k, json_dumps(v, indent=4)) for k, v in rootvars.items() ])
def check_format( actual_file: Union[CWLObjectType, List[CWLObjectType]], input_formats: Union[List[str], str], ontology: Optional[Graph], ) -> None: """Confirm that the format present is valid for the allowed formats.""" for afile in aslist(actual_file): if not afile: continue if "format" not in afile: raise ValidationException( "File has no 'format' defined: {}".format( json_dumps(afile, indent=4))) for inpf in aslist(input_formats): if afile["format"] == inpf or formatSubclassOf( afile["format"], inpf, ontology, set()): return raise ValidationException("File has an incompatible format: {}".format( json_dumps(afile, indent=4)))
def check_format( actual_file, # type: Union[Dict[str, Any], List[Dict[str, Any]], str] input_formats, # type: Union[List[str], str] ontology, # type: Optional[Graph] ): # type: (...) -> None """Confirm that the format present is valid for the allowed formats.""" for afile in aslist(actual_file): if not afile: continue if "format" not in afile: raise validate.ValidationException( "File has no 'format' defined: {}".format( json_dumps(afile, indent=4))) for inpf in aslist(input_formats): if afile["format"] == inpf or formatSubclassOf( afile["format"], inpf, ontology, set()): return raise validate.ValidationException( "File has an incompatible format: {}".format( json_dumps(afile, indent=4)))
def _setup(self, runtimeContext: RuntimeContext) -> None: cuda_req, _ = self.builder.get_requirement( "http://commonwl.org/cwltool#CUDARequirement") if cuda_req: count = cuda_check( cuda_req, math.ceil(self.builder.resources["cudaDeviceCount"])) if count == 0: raise WorkflowException("Could not satisfy CUDARequirement") if not os.path.exists(self.outdir): os.makedirs(self.outdir) def is_streamable(file: str) -> bool: if not runtimeContext.streaming_allowed: return False for inp in self.joborder.values(): if isinstance(inp, dict) and inp.get("location", None) == file: return inp.get("streamable", False) return False for knownfile in self.pathmapper.files(): p = self.pathmapper.mapper(knownfile) if p.type == "File" and not os.path.isfile(p[0]) and p.staged: if not (is_streamable(knownfile) and stat.S_ISFIFO(os.stat(p[0]).st_mode)): raise WorkflowException( "Input file %s (at %s) not found or is not a regular " "file." % (knownfile, self.pathmapper.mapper(knownfile)[0])) if "listing" in self.generatefiles: runtimeContext = runtimeContext.copy() runtimeContext.outdir = self.outdir self.generatemapper = self.make_path_mapper( self.generatefiles["listing"], self.builder.outdir, runtimeContext, False, ) if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[job %s] initial work dir %s", self.name, json_dumps( { p: self.generatemapper.mapper(p) for p in self.generatemapper.files() }, indent=4, ), ) self.base_path_logs = runtimeContext.set_log_dir( self.outdir, runtimeContext.log_dir, self.name)
def do_output_callback(self, final_output_callback: OutputCallbackType) -> None: supportsMultipleInput = bool( self.workflow.get_requirement("MultipleInputFeatureRequirement")[0] ) wo = None # type: Optional[CWLObjectType] try: wo = object_from_state( self.state, self.tool["outputs"], True, supportsMultipleInput, "outputSource", incomplete=True, ) except WorkflowException as err: _logger.error( "[%s] Cannot collect workflow output: %s", self.name, str(err) ) self.processStatus = "permanentFail" if ( self.prov_obj and self.parent_wf and self.prov_obj.workflow_run_uri != self.parent_wf.workflow_run_uri ): process_run_id = None # type: Optional[str] self.prov_obj.generate_output_prov(wo or {}, process_run_id, self.name) self.prov_obj.document.wasEndedBy( self.prov_obj.workflow_run_uri, None, self.prov_obj.engine_uuid, datetime.datetime.now(), ) prov_ids = self.prov_obj.finalize_prov_profile(self.name) # Tell parent to associate our provenance files with our wf run self.parent_wf.activity_has_provenance( self.prov_obj.workflow_run_uri, prov_ids ) _logger.info("[%s] completed %s", self.name, self.processStatus) if _logger.isEnabledFor(logging.DEBUG): _logger.debug("[%s] outputs %s", self.name, json_dumps(wo, indent=4)) self.did_callback = True final_output_callback(wo, self.processStatus)
def printdeps( obj: CWLObjectType, document_loader: Loader, stdout: Union[TextIO, StreamWriter], relative_deps: str, uri: str, basedir: Optional[str] = None, nestdirs: bool = True, ) -> None: """Print a JSON representation of the dependencies of the CWL document.""" deps = find_deps(obj, document_loader, uri, basedir=basedir, nestdirs=nestdirs) if relative_deps == "primary": base = basedir if basedir else os.path.dirname(uri_file_path(str(uri))) elif relative_deps == "cwd": base = os.getcwd() visit_class(deps, ("File", "Directory"), functools.partial(make_relative, base)) stdout.write(json_dumps(deps, indent=4))
def interpolate( scan, # type: str rootvars, # type: Dict[str, Any] timeout=default_timeout, # type: float fullJS=False, # type: bool jslib="", # type: str force_docker_pull=False, # type: bool debug=False, # type: bool js_console=False, # type: bool strip_whitespace=True, # type: bool ): # type: (...) -> JSON if strip_whitespace: scan = scan.strip() parts = [] w = scanner(scan) while w: parts.append(scan[0:w[0]]) if scan[w[0]] == "$": e = evaluator( scan[w[0] + 1:w[1]], jslib, rootvars, timeout, fullJS=fullJS, force_docker_pull=force_docker_pull, debug=debug, js_console=js_console, ) if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1: return e leaf = json_dumps(e, sort_keys=True) if leaf[0] == '"': leaf = leaf[1:-1] parts.append(leaf) elif scan[w[0]] == "\\": e = scan[w[1] - 1] parts.append(e) scan = scan[w[1]:] w = scanner(scan) parts.append(scan) return "".join(parts)
def interpolate( scan: str, rootvars: CWLObjectType, timeout: float = default_timeout, fullJS: bool = False, jslib: str = "", force_docker_pull: bool = False, debug: bool = False, js_console: bool = False, strip_whitespace: bool = True, ) -> Optional[CWLOutputType]: if strip_whitespace: scan = scan.strip() parts = [] w = scanner(scan) while w: parts.append(scan[0 : w[0]]) if scan[w[0]] == "$": e = evaluator( scan[w[0] + 1 : w[1]], jslib, rootvars, timeout, fullJS=fullJS, force_docker_pull=force_docker_pull, debug=debug, js_console=js_console, ) if w[0] == 0 and w[1] == len(scan) and len(parts) <= 1: return e leaf = json_dumps(e, sort_keys=True) if leaf[0] == '"': leaf = leaf[1:-1] parts.append(leaf) elif scan[w[0]] == "\\": e = scan[w[1] - 1] parts.append(e) scan = scan[w[1] :] w = scanner(scan) parts.append(scan) return "".join(parts)
def _setup(self, runtimeContext: RuntimeContext) -> None: if not os.path.exists(self.outdir): os.makedirs(self.outdir) def is_streamable(file: str) -> bool: if not runtimeContext.streaming_allowed: return False for inp in self.joborder.values(): if isinstance(inp, dict) and inp.get("location", None) == file: return inp.get("streamable", False) return False for knownfile in self.pathmapper.files(): p = self.pathmapper.mapper(knownfile) if p.type == "File" and not os.path.isfile(p[0]) and p.staged: if not (is_streamable(knownfile) and stat.S_ISFIFO(os.stat(p[0]).st_mode)): raise WorkflowException( "Input file %s (at %s) not found or is not a regular " "file." % (knownfile, self.pathmapper.mapper(knownfile)[0])) if "listing" in self.generatefiles: runtimeContext = runtimeContext.copy() runtimeContext.outdir = self.outdir self.generatemapper = self.make_path_mapper( self.generatefiles["listing"], self.builder.outdir, runtimeContext, False, ) if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[job %s] initial work dir %s", self.name, json_dumps( { p: self.generatemapper.mapper(p) for p in self.generatemapper.files() }, indent=4, ), )
def receive_output( self, step: WorkflowJobStep, outputparms: List[CWLObjectType], final_output_callback: OutputCallbackType, jobout: CWLObjectType, processStatus: str, ) -> None: for i in outputparms: if "id" in i: iid = cast(str, i["id"]) if iid in jobout: self.state[iid] = WorkflowStateItem(i, jobout[iid], processStatus) else: _logger.error( "[%s] Output is missing expected field %s", step.name, iid ) processStatus = "permanentFail" if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[%s] produced output %s", step.name, json_dumps(jobout, indent=4) ) if processStatus not in ("success", "skipped"): if self.processStatus != "permanentFail": self.processStatus = processStatus _logger.warning("[%s] completed %s", step.name, processStatus) else: _logger.info("[%s] completed %s", step.name, processStatus) step.completed = True # Release the iterable related to this step to # reclaim memory. step.iterable = None self.made_progress = True completed = sum(1 for s in self.steps if s.completed) if completed == len(self.steps): self.do_output_callback(final_output_callback)
def printdeps( obj, # type: Mapping[str, Any] document_loader, # type: Loader stdout, # type: Union[TextIO, StreamWriter] relative_deps, # type: bool uri, # type: str basedir=None, # type: Optional[str] nestdirs=True, # type: bool ): # type: (...) -> None """Print a JSON representation of the dependencies of the CWL document.""" deps = find_deps(obj, document_loader, uri, basedir=basedir, nestdirs=nestdirs) if relative_deps == "primary": base = basedir if basedir else os.path.dirname(uri_file_path(str(uri))) elif relative_deps == "cwd": base = os.getcwd() visit_class(deps, ("File", "Directory"), functools.partial(make_relative, base)) stdout.write(json_dumps(deps, indent=4))
def get_metaschema() -> Tuple[Names, List[Dict[str, str]], Loader]: """Instantiate the metaschema.""" loader = ref_resolver.Loader( { "Any": saladp + "Any", "ArraySchema": saladp + "ArraySchema", "Array_symbol": saladp + "ArraySchema/type/Array_symbol", "DocType": saladp + "DocType", "Documentation": saladp + "Documentation", "Documentation_symbol": saladp + "Documentation/type/Documentation_symbol", "Documented": saladp + "Documented", "EnumSchema": saladp + "EnumSchema", "Enum_symbol": saladp + "EnumSchema/type/Enum_symbol", "JsonldPredicate": saladp + "JsonldPredicate", "NamedType": saladp + "NamedType", "PrimitiveType": saladp + "PrimitiveType", "RecordField": saladp + "RecordField", "RecordSchema": saladp + "RecordSchema", "Record_symbol": saladp + "RecordSchema/type/Record_symbol", "SaladEnumSchema": saladp + "SaladEnumSchema", "SaladRecordField": saladp + "SaladRecordField", "SaladRecordSchema": saladp + "SaladRecordSchema", "SchemaDefinedType": saladp + "SchemaDefinedType", "SpecializeDef": saladp + "SpecializeDef", "_container": saladp + "JsonldPredicate/_container", "_id": {"@id": saladp + "_id", "@type": "@id", "identity": True}, "_type": saladp + "JsonldPredicate/_type", "abstract": saladp + "SaladRecordSchema/abstract", "array": saladp + "array", "boolean": "http://www.w3.org/2001/XMLSchema#boolean", "dct": "http://purl.org/dc/terms/", "default": {"@id": saladp + "default", "noLinkCheck": True}, "doc": "rdfs:comment", "docAfter": {"@id": saladp + "docAfter", "@type": "@id"}, "docChild": {"@id": saladp + "docChild", "@type": "@id"}, "docParent": {"@id": saladp + "docParent", "@type": "@id"}, "documentRoot": saladp + "SchemaDefinedType/documentRoot", "documentation": saladp + "documentation", "double": "http://www.w3.org/2001/XMLSchema#double", "enum": saladp + "enum", "extends": {"@id": saladp + "extends", "@type": "@id", "refScope": 1}, "fields": { "@id": saladp + "fields", "mapPredicate": "type", "mapSubject": "name", }, "float": "http://www.w3.org/2001/XMLSchema#float", "identity": saladp + "JsonldPredicate/identity", "inVocab": saladp + "NamedType/inVocab", "int": "http://www.w3.org/2001/XMLSchema#int", "items": {"@id": saladp + "items", "@type": "@vocab", "refScope": 2}, "jsonldPredicate": "sld:jsonldPredicate", "long": "http://www.w3.org/2001/XMLSchema#long", "mapPredicate": saladp + "JsonldPredicate/mapPredicate", "mapSubject": saladp + "JsonldPredicate/mapSubject", "name": "@id", "noLinkCheck": saladp + "JsonldPredicate/noLinkCheck", "null": saladp + "null", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "record": saladp + "record", "refScope": saladp + "JsonldPredicate/refScope", "sld": saladp, "specialize": { "@id": saladp + "specialize", "mapPredicate": "specializeTo", "mapSubject": "specializeFrom", }, "specializeFrom": { "@id": saladp + "specializeFrom", "@type": "@id", "refScope": 1, }, "specializeTo": { "@id": saladp + "specializeTo", "@type": "@id", "refScope": 1, }, "string": "http://www.w3.org/2001/XMLSchema#string", "subscope": saladp + "JsonldPredicate/subscope", "symbols": {"@id": saladp + "symbols", "@type": "@id", "identity": True}, "type": { "@id": saladp + "type", "@type": "@vocab", "refScope": 2, "typeDSL": True, }, "typeDSL": saladp + "JsonldPredicate/typeDSL", "xsd": "http://www.w3.org/2001/XMLSchema#", } ) for salad in SALAD_FILES: with resource_stream("schema_salad", "metaschema/" + salad) as stream: loader.cache["https://w3id.org/cwl/" + salad] = stream.read().decode( "UTF-8" ) with resource_stream("schema_salad", "metaschema/metaschema.yml") as stream: loader.cache["https://w3id.org/cwl/salad"] = stream.read().decode("UTF-8") j = yaml.main.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) add_lc_filename(j, "metaschema.yml") j2 = loader.resolve_all(j, saladp)[0] if not isinstance(j2, list): _logger.error("%s", j2) raise SchemaParseException("Not a list: {}".format(j2)) else: sch_obj = make_avro(j2, loader) try: sch_names = make_avro_schema_from_avro(sch_obj) except SchemaParseException: _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4)) raise validate_doc(sch_names, j2, loader, strict=True) return (sch_names, j2, loader)
def get_metaschema(): # type: () -> Tuple[Names, List[Dict[Text, Any]], Loader] loader = ref_resolver.Loader({ "Any": "https://w3id.org/cwl/salad#Any", "ArraySchema": "https://w3id.org/cwl/salad#ArraySchema", "Array_symbol": "https://w3id.org/cwl/salad#ArraySchema/type/Array_symbol", "DocType": "https://w3id.org/cwl/salad#DocType", "Documentation": "https://w3id.org/cwl/salad#Documentation", "Documentation_symbol": "https://w3id.org/cwl/salad#Documentation/type/Documentation_symbol", "Documented": "https://w3id.org/cwl/salad#Documented", "EnumSchema": "https://w3id.org/cwl/salad#EnumSchema", "Enum_symbol": "https://w3id.org/cwl/salad#EnumSchema/type/Enum_symbol", "JsonldPredicate": "https://w3id.org/cwl/salad#JsonldPredicate", "NamedType": "https://w3id.org/cwl/salad#NamedType", "PrimitiveType": "https://w3id.org/cwl/salad#PrimitiveType", "RecordField": "https://w3id.org/cwl/salad#RecordField", "RecordSchema": "https://w3id.org/cwl/salad#RecordSchema", "Record_symbol": "https://w3id.org/cwl/salad#RecordSchema/type/Record_symbol", "SaladEnumSchema": "https://w3id.org/cwl/salad#SaladEnumSchema", "SaladRecordField": "https://w3id.org/cwl/salad#SaladRecordField", "SaladRecordSchema": "https://w3id.org/cwl/salad#SaladRecordSchema", "SchemaDefinedType": "https://w3id.org/cwl/salad#SchemaDefinedType", "SpecializeDef": "https://w3id.org/cwl/salad#SpecializeDef", "_container": "https://w3id.org/cwl/salad#JsonldPredicate/_container", "_id": { "@id": "https://w3id.org/cwl/salad#_id", "@type": "@id", "identity": True }, "_type": "https://w3id.org/cwl/salad#JsonldPredicate/_type", "abstract": "https://w3id.org/cwl/salad#SaladRecordSchema/abstract", "array": "https://w3id.org/cwl/salad#array", "boolean": "http://www.w3.org/2001/XMLSchema#boolean", "dct": "http://purl.org/dc/terms/", "default": { "@id": "https://w3id.org/cwl/salad#default", "noLinkCheck": True }, "doc": "rdfs:comment", "docAfter": { "@id": "https://w3id.org/cwl/salad#docAfter", "@type": "@id" }, "docChild": { "@id": "https://w3id.org/cwl/salad#docChild", "@type": "@id" }, "docParent": { "@id": "https://w3id.org/cwl/salad#docParent", "@type": "@id" }, "documentRoot": "https://w3id.org/cwl/salad#SchemaDefinedType/documentRoot", "documentation": "https://w3id.org/cwl/salad#documentation", "double": "http://www.w3.org/2001/XMLSchema#double", "enum": "https://w3id.org/cwl/salad#enum", "extends": { "@id": "https://w3id.org/cwl/salad#extends", "@type": "@id", "refScope": 1 }, "fields": { "@id": "https://w3id.org/cwl/salad#fields", "mapPredicate": "type", "mapSubject": "name" }, "float": "http://www.w3.org/2001/XMLSchema#float", "identity": "https://w3id.org/cwl/salad#JsonldPredicate/identity", "inVocab": "https://w3id.org/cwl/salad#NamedType/inVocab", "int": "http://www.w3.org/2001/XMLSchema#int", "items": { "@id": "https://w3id.org/cwl/salad#items", "@type": "@vocab", "refScope": 2 }, "jsonldPredicate": "sld:jsonldPredicate", "long": "http://www.w3.org/2001/XMLSchema#long", "mapPredicate": "https://w3id.org/cwl/salad#JsonldPredicate/mapPredicate", "mapSubject": "https://w3id.org/cwl/salad#JsonldPredicate/mapSubject", "name": "@id", "noLinkCheck": "https://w3id.org/cwl/salad#JsonldPredicate/noLinkCheck", "null": "https://w3id.org/cwl/salad#null", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "record": "https://w3id.org/cwl/salad#record", "refScope": "https://w3id.org/cwl/salad#JsonldPredicate/refScope", "sld": "https://w3id.org/cwl/salad#", "specialize": { "@id": "https://w3id.org/cwl/salad#specialize", "mapPredicate": "specializeTo", "mapSubject": "specializeFrom" }, "specializeFrom": { "@id": "https://w3id.org/cwl/salad#specializeFrom", "@type": "@id", "refScope": 1 }, "specializeTo": { "@id": "https://w3id.org/cwl/salad#specializeTo", "@type": "@id", "refScope": 1 }, "string": "http://www.w3.org/2001/XMLSchema#string", "subscope": "https://w3id.org/cwl/salad#JsonldPredicate/subscope", "symbols": { "@id": "https://w3id.org/cwl/salad#symbols", "@type": "@id", "identity": True }, "type": { "@id": "https://w3id.org/cwl/salad#type", "@type": "@vocab", "refScope": 2, "typeDSL": True }, "typeDSL": "https://w3id.org/cwl/salad#JsonldPredicate/typeDSL", "xsd": "http://www.w3.org/2001/XMLSchema#" }) for f in salad_files: rs = resource_stream(__name__, 'metaschema/' + f) loader.cache["https://w3id.org/cwl/" + f] = rs.read() rs.close() rs = resource_stream(__name__, 'metaschema/metaschema.yml') loader.cache["https://w3id.org/cwl/salad"] = rs.read() rs.close() j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) add_lc_filename(j, "metaschema.yml") j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#") (sch_names, sch_obj) = make_avro_schema(j, loader) if isinstance(sch_names, Exception): _logger.error("Metaschema error, avro was:\n%s", json_dumps(sch_obj, indent=4)) raise sch_names validate_doc(sch_names, j, loader, strict=True) return (sch_names, j, loader)
def jshint_js( js_text: str, globals: Optional[List[str]] = None, options: Optional[Dict[str, Union[List[str], str, int]]] = None, ) -> JSHintJSReturn: if globals is None: globals = [] if options is None: options = { "includewarnings": [ "W117", # <VARIABLE> not defined "W104", "W119", # using ES6 features ], "strict": "implied", "esversion": 5, } with resource_stream(__name__, "jshint/jshint.js") as res: # NOTE: we need a global variable for lodash (which jshint depends on) jshint_functions_text = "var global = this;" + res.read().decode("utf-8") with resource_stream(__name__, "jshint/jshint_wrapper.js") as res2: # NOTE: we need to assign to ob, as the expression {validateJS: validateJS} as an expression # is interpreted as a block with a label `validateJS` jshint_functions_text += ( "\n" + res2.read().decode("utf-8") + "\nvar ob = {validateJS: validateJS}; ob" ) returncode, stdout, stderr = exec_js_process( "validateJS(%s)" % json_dumps({"code": js_text, "options": options, "globals": globals}), timeout=30, context=jshint_functions_text, ) def dump_jshint_error(): # type: () -> None raise RuntimeError( 'jshint failed to run succesfully\nreturncode: %d\nstdout: "%s"\nstderr: "%s"' % (returncode, stdout, stderr) ) if returncode == -1: _logger.warning("jshint process timed out") if returncode != 0: dump_jshint_error() try: jshint_json = json.loads(stdout) except ValueError: dump_jshint_error() jshint_errors = [] # type: List[str] js_text_lines = js_text.split("\n") for jshint_error_obj in jshint_json.get("errors", []): text = "JSHINT: " + js_text_lines[jshint_error_obj["line"] - 1] + "\n" text += "JSHINT: " + " " * (jshint_error_obj["character"] - 1) + "^\n" text += "JSHINT: %s: %s" % ( jshint_error_obj["code"], jshint_error_obj["reason"], ) jshint_errors.append(text) return JSHintJSReturn(jshint_errors, jshint_json.get("globals", []))
def _execute( self, runtime: List[str], env: MutableMapping[str, str], runtimeContext: RuntimeContext, monitor_function=None, # type: Optional[Callable[[subprocess.Popen[str]], None]] ) -> None: """Execute the tool, either directly or via script. Note: we are now at the point where self.environment is ignored. The caller is responsible for correctly splitting that into the runtime and env arguments. `runtime` is the list of arguments to put at the start of the command (e.g. docker run). `env` is the enviroment to be set for running the resulting command line. """ scr = self.get_requirement("ShellCommandRequirement")[0] shouldquote = needs_shell_quoting_re.search if scr is not None: shouldquote = neverquote # If mpi_procs (is not None and > 0) then prepend the # appropriate MPI job launch command and flags before the # execution. if self.mpi_procs: menv = runtimeContext.mpi_config mpi_runtime = [ menv.runner, menv.nproc_flag, str(self.mpi_procs), ] + menv.extra_flags runtime = mpi_runtime + runtime menv.pass_through_env_vars(env) menv.set_env_vars(env) _logger.info( "[job %s] %s$ %s%s%s%s", self.name, self.outdir, " \\\n ".join( [ shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line) ] ), " < %s" % self.stdin if self.stdin else "", " > %s" % os.path.join(self.outdir, self.stdout) if self.stdout else "", " 2> %s" % os.path.join(self.outdir, self.stderr) if self.stderr else "", ) if self.joborder is not None and runtimeContext.research_obj is not None: job_order = self.joborder if ( runtimeContext.process_run_id is not None and runtimeContext.prov_obj is not None and isinstance(job_order, (list, dict)) ): runtimeContext.prov_obj.used_artefacts( job_order, runtimeContext.process_run_id, str(self.name) ) else: _logger.warning( "research_obj set but one of process_run_id " "or prov_obj is missing from runtimeContext: " "{}".format(runtimeContext) ) outputs = {} # type: CWLObjectType try: stdin_path = None if self.stdin is not None: rmap = self.pathmapper.reversemap(self.stdin) if rmap is None: raise WorkflowException(f"{self.stdin} missing from pathmapper") else: stdin_path = rmap[1] stderr_path = None if self.stderr is not None: abserr = os.path.join(self.outdir, self.stderr) dnerr = os.path.dirname(abserr) if dnerr and not os.path.exists(dnerr): os.makedirs(dnerr) stderr_path = abserr stdout_path = None if self.stdout is not None: absout = os.path.join(self.outdir, self.stdout) dnout = os.path.dirname(absout) if dnout and not os.path.exists(dnout): os.makedirs(dnout) stdout_path = absout commands = [str(x) for x in runtime + self.command_line] if runtimeContext.secret_store is not None: commands = cast( List[str], runtimeContext.secret_store.retrieve(cast(CWLOutputType, commands)), ) env = cast( MutableMapping[str, str], runtimeContext.secret_store.retrieve(cast(CWLOutputType, env)), ) job_script_contents = None # type: Optional[str] builder = getattr(self, "builder", None) # type: Builder if builder is not None: job_script_contents = builder.build_job_script(commands) rcode = _job_popen( commands, stdin_path=stdin_path, stdout_path=stdout_path, stderr_path=stderr_path, env=env, cwd=self.outdir, make_job_dir=lambda: runtimeContext.create_outdir(), job_script_contents=job_script_contents, timelimit=self.timelimit, name=self.name, monitor_function=monitor_function, default_stdout=runtimeContext.default_stdout, default_stderr=runtimeContext.default_stderr, ) if rcode in self.successCodes: processStatus = "success" elif rcode in self.temporaryFailCodes: processStatus = "temporaryFail" elif rcode in self.permanentFailCodes: processStatus = "permanentFail" elif rcode == 0: processStatus = "success" else: processStatus = "permanentFail" if "listing" in self.generatefiles: if self.generatemapper: relink_initialworkdir( self.generatemapper, self.outdir, self.builder.outdir, inplace_update=self.inplace_update, ) else: raise ValueError( "'listing' in self.generatefiles but no " "generatemapper was setup." ) outputs = self.collect_outputs(self.outdir, rcode) outputs = bytes2str_in_dicts(outputs) # type: ignore except OSError as e: if e.errno == 2: if runtime: _logger.error("'%s' not found: %s", runtime[0], str(e)) else: _logger.error("'%s' not found: %s", self.command_line[0], str(e)) else: _logger.exception("Exception while running job") processStatus = "permanentFail" except WorkflowException as err: _logger.error("[job %s] Job error:\n%s", self.name, str(err)) processStatus = "permanentFail" except Exception: _logger.exception("Exception while running job") processStatus = "permanentFail" if ( runtimeContext.research_obj is not None and self.prov_obj is not None and runtimeContext.process_run_id is not None ): # creating entities for the outputs produced by each step (in the provenance document) self.prov_obj.record_process_end( str(self.name), runtimeContext.process_run_id, outputs, datetime.datetime.now(), ) if processStatus != "success": _logger.warning("[job %s] completed %s", self.name, processStatus) else: _logger.info("[job %s] completed %s", self.name, processStatus) if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[job %s] outputs %s", self.name, json_dumps(outputs, indent=4) ) if self.generatemapper is not None and runtimeContext.secret_store is not None: # Delete any runtime-generated files containing secrets. for _, p in self.generatemapper.items(): if p.type == "CreateFile": if runtimeContext.secret_store.has_secret(p.resolved): host_outdir = self.outdir container_outdir = self.builder.outdir host_outdir_tgt = p.target if p.target.startswith(container_outdir + "/"): host_outdir_tgt = os.path.join( host_outdir, p.target[len(container_outdir) + 1 :] ) os.remove(host_outdir_tgt) if runtimeContext.workflow_eval_lock is None: raise WorkflowException( "runtimeContext.workflow_eval_lock must not be None" ) if self.output_callback: with runtimeContext.workflow_eval_lock: self.output_callback(outputs, processStatus) if self.stagedir is not None and os.path.exists(self.stagedir): _logger.debug( "[job %s] Removing input staging directory %s", self.name, self.stagedir, ) shutil.rmtree(self.stagedir, True) if runtimeContext.rm_tmpdir: _logger.debug( "[job %s] Removing temporary directory %s", self.name, self.tmpdir ) shutil.rmtree(self.tmpdir, True)
def main( argsl: Optional[List[str]] = None, args: Optional[argparse.Namespace] = None, job_order_object: Optional[CWLObjectType] = None, stdin: IO[Any] = sys.stdin, stdout: Optional[Union[TextIO, StreamWriter]] = None, stderr: IO[Any] = sys.stderr, versionfunc: Callable[[], str] = versionstring, logger_handler: Optional[logging.Handler] = None, custom_schema_callback: Optional[Callable[[], None]] = None, executor: Optional[JobExecutor] = None, loadingContext: Optional[LoadingContext] = None, runtimeContext: Optional[RuntimeContext] = None, input_required: bool = True, ) -> int: if not stdout: # force UTF-8 even if the console is configured differently if hasattr(sys.stdout, "encoding") and sys.stdout.encoding.upper() not in ( "UTF-8", "UTF8", ): if hasattr(sys.stdout, "detach"): stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8") else: stdout = getwriter("utf-8")(sys.stdout) # type: ignore else: stdout = sys.stdout _logger.removeHandler(defaultStreamHandler) stderr_handler = logger_handler if stderr_handler is not None: _logger.addHandler(stderr_handler) else: coloredlogs.install(logger=_logger, stream=stderr) stderr_handler = _logger.handlers[-1] workflowobj = None prov_log_handler = None # type: Optional[logging.StreamHandler] try: if args is None: if argsl is None: argsl = sys.argv[1:] addl = [] # type: List[str] if "CWLTOOL_OPTIONS" in os.environ: addl = os.environ["CWLTOOL_OPTIONS"].split(" ") parser = arg_parser() argcomplete.autocomplete(parser) args = parser.parse_args(addl + argsl) if args.record_container_id: if not args.cidfile_dir: args.cidfile_dir = os.getcwd() del args.record_container_id if runtimeContext is None: runtimeContext = RuntimeContext(vars(args)) else: runtimeContext = runtimeContext.copy() # If on Windows platform, a default Docker Container is used if not # explicitely provided by user if onWindows() and not runtimeContext.default_container: # This docker image is a minimal alpine image with bash installed # (size 6 mb). source: https://github.com/frol/docker-alpine-bash runtimeContext.default_container = windows_default_container_id # If caller parsed its own arguments, it may not include every # cwltool option, so fill in defaults to avoid crashing when # dereferencing them in args. for key, val in get_default_args().items(): if not hasattr(args, key): setattr(args, key, val) configure_logging(args, stderr_handler, runtimeContext) if args.version: print(versionfunc()) return 0 _logger.info(versionfunc()) if args.print_supported_versions: print("\n".join(supported_cwl_versions(args.enable_dev))) return 0 if not args.workflow: if os.path.isfile("CWLFile"): args.workflow = "CWLFile" else: _logger.error( "CWL document required, no input file was provided") parser.print_help() return 1 if args.relax_path_checks: command_line_tool.ACCEPTLIST_RE = command_line_tool.ACCEPTLIST_EN_RELAXED_RE if args.ga4gh_tool_registries: ga4gh_tool_registries[:] = args.ga4gh_tool_registries if not args.enable_ga4gh_tool_registry: del ga4gh_tool_registries[:] if args.mpi_config_file is not None: runtimeContext.mpi_config = MpiConfig.load(args.mpi_config_file) setup_schema(args, custom_schema_callback) if args.provenance: if argsl is None: raise Exception("argsl cannot be None") if setup_provenance(args, argsl, runtimeContext) is not None: return 1 loadingContext = setup_loadingContext(loadingContext, runtimeContext, args) uri, tool_file_uri = resolve_tool_uri( args.workflow, resolver=loadingContext.resolver, fetcher_constructor=loadingContext.fetcher_constructor, ) try_again_msg = ("" if args.debug else ", try again with --debug for more information") try: job_order_object, input_basedir, jobloader = load_job_order( args, stdin, loadingContext.fetcher_constructor, loadingContext.overrides_list, tool_file_uri, ) if args.overrides: loadingContext.overrides_list.extend( load_overrides(file_uri(os.path.abspath(args.overrides)), tool_file_uri)) loadingContext, workflowobj, uri = fetch_document( uri, loadingContext) if args.print_deps and loadingContext.loader: printdeps(workflowobj, loadingContext.loader, stdout, args.relative_deps, uri) return 0 loadingContext, uri = resolve_and_validate_document( loadingContext, workflowobj, uri, preprocess_only=(args.print_pre or args.pack), skip_schemas=args.skip_schemas, ) if loadingContext.loader is None: raise Exception("Impossible code path.") processobj, metadata = loadingContext.loader.resolve_ref(uri) processobj = cast(CommentedMap, processobj) if args.pack: stdout.write(print_pack(loadingContext, uri)) return 0 if args.provenance and runtimeContext.research_obj: # Can't really be combined with args.pack at same time runtimeContext.research_obj.packed_workflow( print_pack(loadingContext, uri)) if args.print_pre: stdout.write( json_dumps(processobj, indent=4, sort_keys=True, separators=(",", ": "))) return 0 tool = make_tool(uri, loadingContext) if args.make_template: make_template(tool) return 0 if args.validate: print("{} is valid CWL.".format(args.workflow)) return 0 if args.print_rdf: stdout.write( printrdf(tool, loadingContext.loader.ctx, args.rdf_serializer)) return 0 if args.print_dot: printdot(tool, loadingContext.loader.ctx, stdout) return 0 if args.print_targets: for f in ("outputs", "steps", "inputs"): if tool.tool[f]: _logger.info("%s%s targets:", f[0].upper(), f[1:-1]) stdout.write(" " + "\n ".join( [shortname(t["id"]) for t in tool.tool[f]]) + "\n") return 0 if args.target: ctool = choose_target(args, tool, loadingContext) if ctool is None: return 1 else: tool = ctool if args.print_subgraph: if "name" in tool.tool: del tool.tool["name"] stdout.write( json_dumps(tool.tool, indent=4, sort_keys=True, separators=(",", ": "))) return 0 except (ValidationException) as exc: _logger.error("Tool definition failed validation:\n%s", str(exc), exc_info=args.debug) return 1 except (RuntimeError, WorkflowException) as exc: _logger.error( "Tool definition failed initialization:\n%s", str(exc), exc_info=args.debug, ) return 1 except Exception as exc: _logger.error( "I'm sorry, I couldn't load this CWL file%s.\nThe error was: %s", try_again_msg, str(exc) if not args.debug else "", exc_info=args.debug, ) return 1 if isinstance(tool, int): return tool # If on MacOS platform, TMPDIR must be set to be under one of the # shared volumes in Docker for Mac # More info: https://dockstore.org/docs/faq if sys.platform == "darwin": default_mac_path = "/private/tmp/docker_tmp" if runtimeContext.tmp_outdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmp_outdir_prefix = default_mac_path if runtimeContext.tmpdir_prefix == DEFAULT_TMP_PREFIX: runtimeContext.tmpdir_prefix = default_mac_path if check_working_directories(runtimeContext) is not None: return 1 if args.cachedir: if args.move_outputs == "move": runtimeContext.move_outputs = "copy" runtimeContext.tmp_outdir_prefix = args.cachedir runtimeContext.secret_store = getdefault(runtimeContext.secret_store, SecretStore()) runtimeContext.make_fs_access = getdefault( runtimeContext.make_fs_access, StdFsAccess) if not executor: if args.parallel: temp_executor = MultithreadedJobExecutor() runtimeContext.select_resources = temp_executor.select_resources real_executor = temp_executor # type: JobExecutor else: real_executor = SingleJobExecutor() else: real_executor = executor try: runtimeContext.basedir = input_basedir if isinstance(tool, ProcessGenerator): tfjob_order = {} # type: CWLObjectType if loadingContext.jobdefaults: tfjob_order.update(loadingContext.jobdefaults) if job_order_object: tfjob_order.update(job_order_object) tfout, tfstatus = real_executor(tool.embedded_tool, tfjob_order, runtimeContext) if not tfout or tfstatus != "success": raise WorkflowException( "ProcessGenerator failed to generate workflow") tool, job_order_object = tool.result(tfjob_order, tfout, runtimeContext) if not job_order_object: job_order_object = None try: initialized_job_order_object = init_job_order( job_order_object, args, tool, jobloader, stdout, print_input_deps=args.print_input_deps, relative_deps=args.relative_deps, make_fs_access=runtimeContext.make_fs_access, input_basedir=input_basedir, secret_store=runtimeContext.secret_store, input_required=input_required, ) except SystemExit as err: return err.code del args.workflow del args.job_order conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # str use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # str if conf_file or use_conda_dependencies: runtimeContext.job_script_provider = DependenciesConfiguration( args) else: runtimeContext.find_default_container = functools.partial( find_default_container, default_container=runtimeContext.default_container, use_biocontainers=args.beta_use_biocontainers, ) (out, status) = real_executor(tool, initialized_job_order_object, runtimeContext, logger=_logger) if out is not None: if runtimeContext.research_obj is not None: runtimeContext.research_obj.create_job(out, True) def remove_at_id(doc: CWLObjectType) -> None: for key in list(doc.keys()): if key == "@id": del doc[key] else: value = doc[key] if isinstance(value, MutableMapping): remove_at_id(value) elif isinstance(value, MutableSequence): for entry in value: if isinstance(entry, MutableMapping): remove_at_id(entry) remove_at_id(out) visit_class( out, ("File", ), functools.partial(add_sizes, runtimeContext.make_fs_access("")), ) def loc_to_path(obj: CWLObjectType) -> None: for field in ("path", "nameext", "nameroot", "dirname"): if field in obj: del obj[field] if cast(str, obj["location"]).startswith("file://"): obj["path"] = uri_file_path(cast(str, obj["location"])) visit_class(out, ("File", "Directory"), loc_to_path) # Unsetting the Generation from final output object visit_class(out, ("File", ), MutationManager().unset_generation) if isinstance(out, str): stdout.write(out) else: stdout.write(json_dumps(out, indent=4, ensure_ascii=False)) stdout.write("\n") if hasattr(stdout, "flush"): stdout.flush() if status != "success": _logger.warning("Final process status is %s", status) return 1 _logger.info("Final process status is %s", status) return 0 except (ValidationException) as exc: _logger.error("Input object failed validation:\n%s", str(exc), exc_info=args.debug) return 1 except UnsupportedRequirement as exc: _logger.error( "Workflow or tool uses unsupported feature:\n%s", str(exc), exc_info=args.debug, ) return 33 except WorkflowException as exc: _logger.error( "Workflow error%s:\n%s", try_again_msg, strip_dup_lineno(str(exc)), exc_info=args.debug, ) return 1 except Exception as exc: # pylint: disable=broad-except _logger.error( "Unhandled error%s:\n %s", try_again_msg, str(exc), exc_info=args.debug, ) return 1 finally: if (args and runtimeContext and runtimeContext.research_obj and workflowobj and loadingContext): research_obj = runtimeContext.research_obj if loadingContext.loader is not None: research_obj.generate_snapshot( prov_deps(workflowobj, loadingContext.loader, uri)) else: _logger.warning("Unable to generate provenance snapshot " " due to missing loadingContext.loader.") if prov_log_handler is not None: # Stop logging so we won't half-log adding ourself to RO _logger.debug("[provenance] Closing provenance log file %s", prov_log_handler) _logger.removeHandler(prov_log_handler) # Ensure last log lines are written out prov_log_handler.flush() # Underlying WritableBagFile will add the tagfile to the manifest prov_log_handler.stream.close() prov_log_handler.close() research_obj.close(args.provenance) _logger.removeHandler(stderr_handler) _logger.addHandler(defaultStreamHandler)
def init_job_order( job_order_object: Optional[CWLObjectType], args: argparse.Namespace, process: Process, loader: Loader, stdout: Union[TextIO, StreamWriter], print_input_deps: bool = False, relative_deps: str = "primary", make_fs_access: Callable[[str], StdFsAccess] = StdFsAccess, input_basedir: str = "", secret_store: Optional[SecretStore] = None, input_required: bool = True, ) -> CWLObjectType: secrets_req, _ = process.get_requirement( "http://commonwl.org/cwltool#Secrets") if job_order_object is None: namemap = {} # type: Dict[str, str] records = [] # type: List[str] toolparser = generate_parser( argparse.ArgumentParser(prog=args.workflow), process, namemap, records, input_required, ) if args.tool_help: toolparser.print_help() exit(0) cmd_line = vars(toolparser.parse_args(args.job_order)) for record_name in records: record = {} record_items = { k: v for k, v in cmd_line.items() if k.startswith(record_name) } for key, value in record_items.items(): record[key[len(record_name) + 1:]] = value del cmd_line[key] cmd_line[str(record_name)] = record if "job_order" in cmd_line and cmd_line["job_order"]: try: job_order_object = cast( CWLObjectType, loader.resolve_ref(cmd_line["job_order"])[0], ) except Exception: _logger.exception("Failed to resolv job_order: %s", cmd_line["job_order"]) exit(1) else: job_order_object = {"id": args.workflow} del cmd_line["job_order"] job_order_object.update({namemap[k]: v for k, v in cmd_line.items()}) if secret_store and secrets_req: secret_store.store( [ shortname(sc) for sc in cast(List[str], secrets_req["secrets"]) ], job_order_object, ) if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "Parsed job order from command line: %s", json_dumps(job_order_object, indent=4), ) for inp in process.tool["inputs"]: if "default" in inp and (not job_order_object or shortname(inp["id"]) not in job_order_object): if not job_order_object: job_order_object = {} job_order_object[shortname(inp["id"])] = inp["default"] if job_order_object is None: if process.tool["inputs"]: if toolparser is not None: print("\nOptions for {} ".format(args.workflow)) toolparser.print_help() _logger.error("") _logger.error("Input object required, use --help for details") exit(1) else: job_order_object = {} if print_input_deps: basedir = None # type: Optional[str] uri = cast(str, job_order_object["id"]) if uri == args.workflow: basedir = os.path.dirname(uri) uri = "" printdeps( job_order_object, loader, stdout, relative_deps, uri, basedir=basedir, nestdirs=False, ) exit(0) def path_to_loc(p: CWLObjectType) -> None: if "location" not in p and "path" in p: p["location"] = p["path"] del p["path"] ns = {} # type: ContextType ns.update(cast(ContextType, job_order_object.get("$namespaces", {}))) ns.update(cast(ContextType, process.metadata.get("$namespaces", {}))) ld = Loader(ns) def expand_formats(p: CWLObjectType) -> None: if "format" in p: p["format"] = ld.expand_url(cast(str, p["format"]), "") visit_class(job_order_object, ("File", "Directory"), path_to_loc) visit_class( job_order_object, ("File", ), functools.partial(add_sizes, make_fs_access(input_basedir)), ) visit_class(job_order_object, ("File", ), expand_formats) adjustDirObjs(job_order_object, trim_listing) normalizeFilesDirs(job_order_object) if secret_store and secrets_req: secret_store.store( [shortname(sc) for sc in cast(List[str], secrets_req["secrets"])], job_order_object, ) if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] if "id" in job_order_object: del job_order_object["id"] return job_order_object
def job( self, joborder: CWLObjectType, output_callback: Optional[OutputCallbackType], runtimeContext: RuntimeContext, ) -> JobsGeneratorType: self.state = {} self.processStatus = "success" if _logger.isEnabledFor(logging.DEBUG): _logger.debug("[%s] inputs %s", self.name, json_dumps(joborder, indent=4)) runtimeContext = runtimeContext.copy() runtimeContext.outdir = None debug = runtimeContext.debug for index, inp in enumerate(self.tool["inputs"]): with SourceLine(self.tool["inputs"], index, WorkflowException, debug): inp_id = shortname(inp["id"]) if inp_id in joborder: self.state[inp["id"]] = WorkflowStateItem( inp, joborder[inp_id], "success" ) elif "default" in inp: self.state[inp["id"]] = WorkflowStateItem( inp, inp["default"], "success" ) else: raise WorkflowException( "Input '%s' not in input object and does not have a " " default value." % (inp["id"]) ) for step in self.steps: for out in step.tool["outputs"]: self.state[out["id"]] = None completed = 0 while completed < len(self.steps): self.made_progress = False for step in self.steps: if ( getdefault(runtimeContext.on_error, "stop") == "stop" and self.processStatus != "success" ): break if not step.submitted: try: step.iterable = self.try_make_job( step, output_callback, runtimeContext ) except WorkflowException as exc: _logger.error("[%s] Cannot make job: %s", step.name, str(exc)) _logger.debug("", exc_info=True) self.processStatus = "permanentFail" if step.iterable is not None: try: for newjob in step.iterable: if ( getdefault(runtimeContext.on_error, "stop") == "stop" and self.processStatus != "success" ): break if newjob is not None: self.made_progress = True yield newjob else: break except WorkflowException as exc: _logger.error("[%s] Cannot make job: %s", step.name, str(exc)) _logger.debug("", exc_info=True) self.processStatus = "permanentFail" completed = sum(1 for s in self.steps if s.completed) if not self.made_progress and completed < len(self.steps): if self.processStatus != "success": break else: yield None if not self.did_callback and output_callback: # could have called earlier on line 336; self.do_output_callback(output_callback)
def postScatterEval(io: CWLObjectType) -> Optional[CWLObjectType]: shortio = cast(CWLObjectType, {shortname(k): v for k, v in io.items()}) fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)("") for k, v in io.items(): if k in loadContents: val = cast(CWLObjectType, v) if val.get("contents") is None: with fs_access.open(cast(str, val["location"]), "rb") as f: val["contents"] = content_limit_respected_read(f) def valueFromFunc( k: str, v: Optional[CWLOutputType] ) -> Optional[CWLOutputType]: if k in valueFrom: adjustDirObjs( v, functools.partial(get_listing, fs_access, recursive=True) ) return expression.do_eval( valueFrom[k], shortio, self.workflow.requirements, None, None, {}, context=v, debug=runtimeContext.debug, js_console=runtimeContext.js_console, timeout=runtimeContext.eval_timeout, container_engine=container_engine, ) return v psio = {k: valueFromFunc(k, v) for k, v in io.items()} if "when" in step.tool: evalinputs = {shortname(k): v for k, v in psio.items()} whenval = expression.do_eval( step.tool["when"], evalinputs, self.workflow.requirements, None, None, {}, context=cast(Optional[CWLObjectType], v), debug=runtimeContext.debug, js_console=runtimeContext.js_console, timeout=runtimeContext.eval_timeout, container_engine=container_engine, ) if whenval is True: pass elif whenval is False: _logger.debug( "[%s] conditional %s evaluated to %s", step.name, step.tool["when"], whenval, ) _logger.debug( "[%s] inputs was %s", step.name, json_dumps(evalinputs, indent=2), ) return None else: raise WorkflowException( "Conditional 'when' must evaluate to 'true' or 'false'" ) return psio
def try_make_job( self, step: WorkflowJobStep, final_output_callback: Optional[OutputCallbackType], runtimeContext: RuntimeContext, ) -> JobsGeneratorType: container_engine = "docker" if runtimeContext.podman: container_engine = "podman" elif runtimeContext.singularity: container_engine = "singularity" if step.submitted: return inputparms = step.tool["inputs"] outputparms = step.tool["outputs"] supportsMultipleInput = bool( self.workflow.get_requirement("MultipleInputFeatureRequirement")[0] ) try: inputobj = object_from_state( self.state, inputparms, False, supportsMultipleInput, "source" ) if inputobj is None: _logger.debug("[%s] job step %s not ready", self.name, step.id) return if step.submitted: return _logger.info("[%s] starting %s", self.name, step.name) callback = functools.partial( self.receive_output, step, outputparms, final_output_callback ) valueFrom = { i["id"]: i["valueFrom"] for i in step.tool["inputs"] if "valueFrom" in i } loadContents = { i["id"] for i in step.tool["inputs"] if i.get("loadContents") } if len(valueFrom) > 0 and not bool( self.workflow.get_requirement("StepInputExpressionRequirement")[0] ): raise WorkflowException( "Workflow step contains valueFrom but StepInputExpressionRequirement not in requirements" ) vfinputs = {shortname(k): v for k, v in inputobj.items()} def postScatterEval(io: CWLObjectType) -> Optional[CWLObjectType]: shortio = cast(CWLObjectType, {shortname(k): v for k, v in io.items()}) fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess)("") for k, v in io.items(): if k in loadContents: val = cast(CWLObjectType, v) if val.get("contents") is None: with fs_access.open(cast(str, val["location"]), "rb") as f: val["contents"] = content_limit_respected_read(f) def valueFromFunc( k: str, v: Optional[CWLOutputType] ) -> Optional[CWLOutputType]: if k in valueFrom: adjustDirObjs( v, functools.partial(get_listing, fs_access, recursive=True) ) return expression.do_eval( valueFrom[k], shortio, self.workflow.requirements, None, None, {}, context=v, debug=runtimeContext.debug, js_console=runtimeContext.js_console, timeout=runtimeContext.eval_timeout, container_engine=container_engine, ) return v psio = {k: valueFromFunc(k, v) for k, v in io.items()} if "when" in step.tool: evalinputs = {shortname(k): v for k, v in psio.items()} whenval = expression.do_eval( step.tool["when"], evalinputs, self.workflow.requirements, None, None, {}, context=cast(Optional[CWLObjectType], v), debug=runtimeContext.debug, js_console=runtimeContext.js_console, timeout=runtimeContext.eval_timeout, container_engine=container_engine, ) if whenval is True: pass elif whenval is False: _logger.debug( "[%s] conditional %s evaluated to %s", step.name, step.tool["when"], whenval, ) _logger.debug( "[%s] inputs was %s", step.name, json_dumps(evalinputs, indent=2), ) return None else: raise WorkflowException( "Conditional 'when' must evaluate to 'true' or 'false'" ) return psio if "scatter" in step.tool: scatter = cast(List[str], aslist(step.tool["scatter"])) method = step.tool.get("scatterMethod") if method is None and len(scatter) != 1: raise WorkflowException( "Must specify scatterMethod when scattering over multiple inputs" ) runtimeContext = runtimeContext.copy() runtimeContext.postScatterEval = postScatterEval emptyscatter = [ shortname(s) for s in scatter if len(cast(Sized, inputobj[s])) == 0 ] if emptyscatter: _logger.warning( "[job %s] Notice: scattering over empty input in " "'%s'. All outputs will be empty.", step.name, "', '".join(emptyscatter), ) if method == "dotproduct" or method is None: jobs = dotproduct_scatter( step, inputobj, scatter, callback, runtimeContext ) elif method == "nested_crossproduct": jobs = nested_crossproduct_scatter( step, inputobj, scatter, callback, runtimeContext ) elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter( step, inputobj, scatter, callback, runtimeContext ) else: if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[%s] job input %s", step.name, json_dumps(inputobj, indent=4) ) inputobj = postScatterEval(inputobj) if inputobj is not None: if _logger.isEnabledFor(logging.DEBUG): _logger.debug( "[%s] evaluated job input to %s", step.name, json_dumps(inputobj, indent=4), ) jobs = step.job(inputobj, callback, runtimeContext) else: _logger.info("[%s] will be skipped", step.name) callback({k["id"]: None for k in outputparms}, "skipped") step.completed = True jobs = (_ for _ in ()) step.submitted = True yield from jobs except WorkflowException: raise except Exception: _logger.exception("Unhandled exception") self.processStatus = "permanentFail" step.completed = True