def collect_output_ports(self, ports, builder, outdir): # type: (Set[Dict[str,Any]], Builder, str) -> Dict[str,Union[str,List[Any],Dict[str,Any]]] try: ret = {} # type: Dict[str,Union[str,List[Any],Dict[str,Any]]] custom_output = os.path.join(outdir, "cwl.output.json") if builder.fs_access.exists(custom_output): with builder.fs_access.open(custom_output, "r") as f: ret = json.load(f) _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) adjustFileObjs(ret, remove_hostfs) adjustFileObjs(ret, cast(Callable[[Any], Any], # known bug in mypy # https://github.com/python/mypy/issues/797 partial(revmap_file, builder, outdir))) adjustFileObjs(ret, remove_hostfs) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret for port in ports: fragment = shortname(port["id"]) try: ret[fragment] = self.collect_output(port, builder, outdir) except Exception as e: raise WorkflowException(u"Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e)) if ret: adjustFileObjs(ret, remove_hostfs) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
def validate_hints(self, avsc_names: Names, hints: List[CWLObjectType], strict: bool) -> None: if self.doc_loader is None: return debug = _logger.isEnabledFor(logging.DEBUG) for i, r in enumerate(hints): sl = SourceLine(hints, i, ValidationException, debug) with sl: classname = cast(str, r["class"]) avroname = classname if classname in self.doc_loader.vocab: avroname = avro_type_name(self.doc_loader.vocab[classname]) if avsc_names.get_name(avroname, None) is not None: plain_hint = { key: r[key] for key in r if key not in self.doc_loader.identifiers } # strip identifiers validate_ex( cast( Schema, avsc_names.get_name(avroname, None), ), plain_hint, strict=strict, vocab=self.doc_loader.vocab, ) elif r["class"] in ("NetworkAccess", "LoadListingRequirement"): pass else: _logger.info( str(sl.makeError("Unknown hint %s" % (r["class"]))))
def collect_output_ports(self, ports, builder, outdir): try: ret = {} custom_output = os.path.join(outdir, "cwl.output.json") if builder.fs_access.exists(custom_output): with builder.fs_access.open(custom_output, "r") as f: ret = yaml.load(f) _logger.debug("Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) adjustFileObjs(ret, remove_hostfs) adjustFileObjs(ret, functools.partial(revmap_file, builder, outdir)) adjustFileObjs(ret, remove_hostfs) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret for port in ports: fragment = shortname(port["id"]) try: ret[fragment] = self.collect_output(port, builder, outdir) except Exception as e: raise WorkflowException("Error collecting output for parameter '%s': %s" % (shortname(port["id"]), e)) if ret: adjustFileObjs(ret, remove_hostfs) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
def validate_hints( self, avsc_names: Names, hints: List[CWLObjectType], strict: bool ) -> None: for i, r in enumerate(hints): sl = SourceLine(hints, i, ValidationException) with sl: if ( avsc_names.get_name(cast(str, r["class"]), None) is not None and self.doc_loader is not None ): plain_hint = dict( (key, r[key]) for key in r if key not in self.doc_loader.identifiers ) # strip identifiers validate_ex( cast( Schema, avsc_names.get_name(cast(str, plain_hint["class"]), None), ), plain_hint, strict=strict, ) elif r["class"] in ("NetworkAccess", "LoadListingRequirement"): pass else: _logger.info(str(sl.makeError("Unknown hint %s" % (r["class"]))))
def collect_output_ports(self, ports, # type: Set[Dict[Text, Any]] builder, # type: Builder outdir, # type: Text rcode, # type: int compute_checksum=True, # type: bool jobname="", # type: Text readers=None # type: Dict[Text, Any] ): # type: (...) -> OutputPorts ret = {} # type: OutputPorts debug = _logger.isEnabledFor(logging.DEBUG) cwl_version = self.metadata.get( "http://commonwl.org/cwltool#original_cwlVersion", None) if cwl_version != "v1.0": builder.resources["exitCode"] = rcode try: fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if debug: _logger.debug(u"Raw output from %s: %s", custom_output, json_dumps(ret, indent=4)) else: for i, port in enumerate(ports): def makeWorkflowException(msg): return WorkflowException( u"Error collecting output for parameter '%s':\n%s" % (shortname(port["id"]), msg)) with SourceLine(ports, i, makeWorkflowException, debug): fragment = shortname(port["id"]) ret[fragment] = self.collect_output(port, builder, outdir, fs_access, compute_checksum=compute_checksum) if ret: revmap = partial(revmap_file, builder, outdir) adjustDirObjs(ret, trim_listing) visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap)) visit_class(ret, ("File", "Directory"), remove_path) normalizeFilesDirs(ret) visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access)) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) expected_schema = cast(Schema, self.names.get_name( "outputs_record_schema", "")) validate.validate_ex(expected_schema, ret, strict=False, logger=_logger_validation_warnings) if ret is not None and builder.mutation_manager is not None: adjustFileObjs(ret, builder.mutation_manager.set_generation) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException( "Error validating output record. " + Text(e) + "\n in " + json_dumps(ret, indent=4)) finally: if builder.mutation_manager and readers: for r in readers.values(): builder.mutation_manager.release_reader(jobname, r)
def collect_output_ports(self, ports, builder, outdir, compute_checksum=True): # type: (Set[Dict[Text, Any]], Builder, Text, bool) -> Dict[Text, Union[Text, List[Any], Dict[Text, Any]]] ret = {} # type: Dict[Text, Union[Text, List[Any], Dict[Text, Any]]] try: fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if _logger.isEnabledFor(logging.DEBUG): _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) else: for i, port in enumerate(ports): with SourceLine(ports, i, WorkflowException): fragment = shortname(port["id"]) try: ret[fragment] = self.collect_output( port, builder, outdir, fs_access, compute_checksum=compute_checksum) except Exception as e: _logger.debug( u"Error collecting output for parameter '%s'" % shortname(port["id"]), exc_info=True) raise WorkflowException( u"Error collecting output for parameter '%s':\n%s" % (shortname(port["id"]), indent(u(str(e))))) if ret: adjustDirObjs(ret, trim_listing) adjustFileObjs( ret, cast( Callable[[Any], Any], # known bug in mypy # https://github.com/python/mypy/issues/797 partial(revmap_file, builder, outdir))) adjustFileObjs(ret, remove_path) adjustDirObjs(ret, remove_path) normalizeFilesDirs(ret) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) validate.validate_ex( self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + Text(e) + "\n in " + json.dumps(ret, indent=4))
def validate_hints(self, hints, strict): for r in hints: try: if self.names.get_name(r["class"], "") is not None: validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict) else: _logger.info(validate.ValidationException("Unknown hint %s" % (r["class"]))) except validate.ValidationException as v: raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
def collect_output_ports(self, ports, # type: Set[Dict[Text, Any]] builder, # type: Builder outdir, # type: Text compute_checksum=True, # type: bool jobname="", # type: Text readers=None # type: Dict[Text, Any] ): # type: (...) -> OutputPorts ret = {} # type: OutputPorts debug = LOGGER.isEnabledFor(logging.DEBUG) try: fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if debug: LOGGER.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) else: for i, port in enumerate(ports): def make_workflow_exception(msg): return WorkflowException( u"Error collecting output for parameter '%s':\n%s" % (shortname(port["id"]), msg)) with SourceLine(ports, i, make_workflow_exception, debug): fragment = shortname(port["id"]) ret[fragment] = self.collect_output(port, builder, outdir, fs_access, compute_checksum=compute_checksum) if ret: # revmap = partial(command_line_tool.revmap_file, builder, outdir) adjustDirObjs(ret, trim_listing) # TODO: Attempt to avoid a crash because the revmap fct is not functional # (intend for a docker usage only?) # visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap)) visit_class(ret, ("File", "Directory"), command_line_tool.remove_path) normalizeFilesDirs(ret) visit_class(ret, ("File", "Directory"), partial(command_line_tool.check_valid_locations, fs_access)) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) validate.validate_ex( self.names.get_name("outputs_record_schema", ""), ret, strict=False, logger=LOGGER) if ret is not None and builder.mutation_manager is not None: adjustFileObjs(ret, builder.mutation_manager.set_generation) return ret if ret is not None else {} except validate.ValidationException as exc: raise WorkflowException("Error validating output record: {!s}\nIn:\n{}" .format(exc, json.dumps(ret, indent=4))) finally: if builder.mutation_manager and readers: for reader in readers.values(): builder.mutation_manager.release_reader(jobname, reader)
def collect_output_ports(self, ports, # type: Set[Dict[Text, Any]] builder, # type: Builder outdir, # type: Text compute_checksum=True, # type: bool jobname="", # type: Text readers=None # type: Dict[Text, Any] ): # type: (...) -> OutputPorts ret = {} # type: OutputPorts debug = _logger.isEnabledFor(logging.DEBUG) try: fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if debug: _logger.debug(u"Raw output from %s: %s", custom_output, json_dumps(ret, indent=4)) else: for i, port in enumerate(ports): def makeWorkflowException(msg): return WorkflowException( u"Error collecting output for parameter '%s':\n%s" % (shortname(port["id"]), msg)) with SourceLine(ports, i, makeWorkflowException, debug): fragment = shortname(port["id"]) ret[fragment] = self.collect_output(port, builder, outdir, fs_access, compute_checksum=compute_checksum) if ret: revmap = partial(revmap_file, builder, outdir) adjustDirObjs(ret, trim_listing) visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap)) visit_class(ret, ("File", "Directory"), remove_path) normalizeFilesDirs(ret) visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access)) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) expected_schema = cast(Schema, self.names.get_name( "outputs_record_schema", "")) validate.validate_ex(expected_schema, ret, strict=False, logger=_logger_validation_warnings) if ret is not None and builder.mutation_manager is not None: adjustFileObjs(ret, builder.mutation_manager.set_generation) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException( "Error validating output record. " + Text(e) + "\n in " + json_dumps(ret, indent=4)) finally: if builder.mutation_manager and readers: for r in readers.values(): builder.mutation_manager.release_reader(jobname, r)
def validate_hints(self, avsc_names, hints, strict): # type: (Any, List[Dict[str, Any]], bool) -> None for r in hints: try: if avsc_names.get_name(r["class"], "") is not None: validate.validate_ex(avsc_names.get_name(r["class"], ""), r, strict=strict) else: _logger.info(str(validate.ValidationException( u"Unknown hint %s" % (r["class"])))) except validate.ValidationException as v: raise validate.ValidationException(u"Validating hint `%s`: %s" % (r["class"], str(v)))
def _init_job(self, joborder, input_basedir, **kwargs): builder = Builder() builder.job = copy.deepcopy(joborder) for i in self.tool["inputs"]: d = shortname(i["id"]) if d not in builder.job and "default" in i: builder.job[d] = i["default"] for r in self.requirements: if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) # Validate job order try: validate.validate_ex( self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess( input_basedir) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.requirements, self.formatgraph) builder.bindings.extend( builder.bind_input(self.inputs_record_schema, builder.job)) builder.resources = self.evalResources(builder, kwargs) return builder
def validate_hints(self, avsc_names, hints, strict): # type: (Any, List[Dict[Text, Any]], bool) -> None for i, r in enumerate(hints): sl = SourceLine(hints, i, validate.ValidationException) with sl: if avsc_names.get_name(r["class"], "") is not None: plain_hint = dict((key, r[key]) for key in r if key not in self.doc_loader.identifiers) # strip identifiers validate.validate_ex( avsc_names.get_name(plain_hint["class"], ""), plain_hint, strict=strict) else: _logger.info(sl.makeError(u"Unknown hint %s" % (r["class"])))
def validate_hints(self, hints, strict): for r in hints: try: if self.names.get_name(r["class"], "") is not None: validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict) else: _logger.info( validate.ValidationException("Unknown hint %s" % (r["class"]))) except validate.ValidationException as v: raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
def _init_job(self, joborder, input_basedir, **kwargs): builder = Builder() builder.job = copy.deepcopy(joborder) for i in self.tool["inputs"]: d = shortname(i["id"]) if d not in builder.job and "default" in i: builder.job[d] = i["default"] for r in self.requirements: if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.requirements, self.formatgraph) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) builder.resources = self.evalResources(builder, kwargs) return builder
def collect_output_ports(self, ports, builder, outdir, compute_checksum=True, jobname="", readers=None): # type: (Set[Dict[Text, Any]], Builder, Text, bool, Text, Dict[Text, Any]) -> Dict[Text, Union[Text, List[Any], Dict[Text, Any]]] ret = {} # type: Dict[Text, Union[Text, List[Any], Dict[Text, Any]]] try: fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if _logger.isEnabledFor(logging.DEBUG): _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) else: for i, port in enumerate(ports): with SourceLine(ports, i, WorkflowException): fragment = shortname(port["id"]) try: ret[fragment] = self.collect_output(port, builder, outdir, fs_access, compute_checksum=compute_checksum) except Exception as e: _logger.debug( u"Error collecting output for parameter '%s'" % shortname(port["id"]), exc_info=True) raise WorkflowException( u"Error collecting output for parameter '%s':\n%s" % (shortname(port["id"]), indent(u(str(e))))) if ret: revmap = partial(revmap_file, builder, outdir) adjustDirObjs(ret, trim_listing) visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap)) visit_class(ret, ("File", "Directory"), remove_path) normalizeFilesDirs(ret) visit_class(ret, ("File", "Directory"), partial(check_valid_locations, fs_access)) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret, strict=False, logger=_logger_validation_warnings) if ret is not None and builder.mutation_manager is not None: adjustFileObjs(ret, builder.mutation_manager.set_generation) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record. " + Text(e) + "\n in " + json.dumps(ret, indent=4)) finally: if builder.mutation_manager and readers: for r in readers.values(): builder.mutation_manager.release_reader(jobname, r)
def collect_output_ports(self, ports, builder, outdir): try: custom_output = os.path.join(outdir, "cwl.output.json") if builder.fs_access.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc) return outputdoc ret = {} for port in ports: fragment = shortname(port["id"]) ret[fragment] = self.collect_output(port, builder, outdir) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
def validate_hints(self, avsc_names, hints, strict): # type: (Any, List[Dict[str, Any]], bool) -> None for r in hints: try: if avsc_names.get_name(r["class"], "") is not None: validate.validate_ex(avsc_names.get_name(r["class"], ""), r, strict=strict) else: _logger.info( str( validate.ValidationException(u"Unknown hint %s" % (r["class"])))) except validate.ValidationException as v: raise validate.ValidationException( u"Validating hint `%s`: %s" % (r["class"], str(v)))
def get_expressions( tool: Union[CommentedMap, str], schema: Optional[Union[Schema, ArraySchema]], source_line: Optional[SourceLine] = None, ) -> List[Tuple[str, Optional[SourceLine]]]: if is_expression(tool, schema): return [(cast(str, tool), source_line)] elif isinstance(schema, UnionSchema): valid_schema = None for possible_schema in schema.schemas: if is_expression(tool, possible_schema): return [(cast(str, tool), source_line)] elif validate_ex( possible_schema, tool, raise_ex=False, logger=_logger_validation_warnings, ): valid_schema = possible_schema return get_expressions(tool, valid_schema, source_line) elif isinstance(schema, ArraySchema): if not isinstance(tool, MutableSequence): return [] return list( itertools.chain( *map( lambda x: get_expressions( x[1], schema.items, SourceLine(tool, x[0]) # type: ignore ), enumerate(tool), ) ) ) elif isinstance(schema, RecordSchema): if not isinstance(tool, MutableMapping): return [] expression_nodes = [] for schema_field in schema.fields: if schema_field.name in tool: expression_nodes.extend( get_expressions( tool[schema_field.name], schema_field.type, SourceLine(tool, schema_field.name), ) ) return expression_nodes else: return []
def validate_hints(self, avsc_names, hints, strict): # type: (Any, List[Dict[str, Any]], bool) -> None for i, r in enumerate(hints): sl = SourceLine(hints, i, validate.ValidationException) with sl: if (avsc_names.get_name(r["class"], "") is not None and self.doc_loader is not None): plain_hint = dict( (key, r[key]) for key in r if key not in self.doc_loader.identifiers) # strip identifiers validate.validate_ex( avsc_names.get_name(plain_hint["class"], ""), plain_hint, strict=strict, ) elif r["class"] in ("NetworkAccess", "LoadListingRequirement"): pass else: _logger.info( str(sl.makeError("Unknown hint %s" % (r["class"]))))
def collect_output_ports(self, ports, builder, outdir, compute_checksum=True): # type: (Set[Dict[Text, Any]], Builder, Text, bool) -> Dict[Text, Union[Text, List[Any], Dict[Text, Any]]] try: ret = {} # type: Dict[Text, Union[Text, List[Any], Dict[Text, Any]]] fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if _logger.isEnabledFor(logging.DEBUG): _logger.debug(u"Raw output from %s: %s", custom_output, json.dumps(ret, indent=4)) else: for i, port in enumerate(ports): with SourceLine(ports, i, WorkflowException): fragment = shortname(port["id"]) try: ret[fragment] = self.collect_output(port, builder, outdir, fs_access, compute_checksum=compute_checksum) except Exception as e: _logger.debug( u"Error collecting output for parameter '%s'" % shortname(port["id"]), exc_info=True) raise WorkflowException( u"Error collecting output for parameter '%s':\n%s" % (shortname(port["id"]), indent(unicode(e)))) if ret: adjustFileObjs(ret, cast(Callable[[Any], Any], # known bug in mypy # https://github.com/python/mypy/issues/797 partial(revmap_file, builder, outdir))) adjustFileObjs(ret, remove_path) adjustDirObjs(ret, remove_path) normalizeFilesDirs(ret) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + Text(e) + "\n in " + json.dumps(ret, indent=4))
def collect_output_ports(self, ports, builder, outdir): try: custom_output = os.path.join(outdir, "cwl.output.json") if builder.fs_access.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex( self.names.get_name("outputs_record_schema", ""), outputdoc) return outputdoc ret = {} for port in ports: fragment = shortname(port["id"]) ret[fragment] = self.collect_output(port, builder, outdir) validate.validate_ex( self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e) + "\n in " + json.dumps(ret, indent=4))
def get_expressions( tool, # type: Union[CommentedMap, Any] schema, # type: Optional[avro.schema.Schema] source_line=None # type: Optional[SourceLine] ): # type: (...) -> List[Tuple[Text, Optional[SourceLine]]] if is_expression(tool, schema): return [(tool, source_line)] elif isinstance(schema, avro.schema.UnionSchema): valid_schema = None for possible_schema in schema.schemas: if is_expression(tool, possible_schema): return [(tool, source_line)] elif validate_ex(possible_schema, tool, strict=True, raise_ex=False, logger=_logger_validation_warnings): valid_schema = possible_schema return get_expressions(tool, valid_schema, source_line) elif isinstance(schema, avro.schema.ArraySchema): if not isinstance(tool, list): return [] return list( itertools.chain(*map(lambda x: get_expressions( x[1], schema.items, SourceLine(tool, x[0])), enumerate( tool )) # type: ignore # https://github.com/python/mypy/issues/4679 )) elif isinstance(schema, avro.schema.RecordSchema): if not isinstance(tool, Dict): return [] expression_nodes = [] for schema_field in schema.fields: if schema_field.name in tool: expression_nodes.extend( get_expressions(tool[schema_field.name], schema_field.type, SourceLine(tool, schema_field.name))) return expression_nodes else: return []
def get_expressions( tool, # type: Union[CommentedMap, Text] schema, # type: Optional[avro.schema.Schema] source_line=None # type: Optional[SourceLine] ): # type: (...) -> List[Tuple[Text, Optional[SourceLine]]] if is_expression(tool, schema): return [(cast(Text, tool), source_line)] elif isinstance(schema, avro.schema.UnionSchema): valid_schema = None for possible_schema in schema.schemas: if is_expression(tool, possible_schema): return [(cast(Text, tool), source_line)] elif validate_ex(possible_schema, tool, raise_ex=False, logger=_logger_validation_warnings): valid_schema = possible_schema return get_expressions(tool, valid_schema, source_line) elif isinstance(schema, avro.schema.ArraySchema): if not isinstance(tool, MutableSequence): return [] return list( itertools.chain(*map( lambda x: get_expressions(x[1], schema.items, SourceLine(tool, x[0])), enumerate(tool)))) elif isinstance(schema, avro.schema.RecordSchema): if not isinstance(tool, MutableMapping): return [] expression_nodes = [] for schema_field in schema.fields: if schema_field.name in tool: expression_nodes.extend( get_expressions(tool[schema_field.name], schema_field.type, SourceLine(tool, schema_field.name))) return expression_nodes else: return []
def get_expressions(tool, # type: Union[CommentedMap, Any] schema, # type: Optional[avro.schema.Schema] source_line=None # type: Optional[SourceLine] ): # type: (...) -> List[Tuple[Text, Optional[SourceLine]]] if is_expression(tool, schema): return [(tool, source_line)] elif isinstance(schema, avro.schema.UnionSchema): valid_schema = None for possible_schema in schema.schemas: if is_expression(tool, possible_schema): return [(tool, source_line)] elif validate_ex(possible_schema, tool, raise_ex=False, logger=_logger_validation_warnings): valid_schema = possible_schema return get_expressions(tool, valid_schema, source_line) elif isinstance(schema, avro.schema.ArraySchema): if not isinstance(tool, MutableSequence): return [] return list(itertools.chain( *map(lambda x: get_expressions(x[1], schema.items, SourceLine(tool, x[0])), enumerate(tool)) # type: ignore # https://github.com/python/mypy/issues/4679 )) elif isinstance(schema, avro.schema.RecordSchema): if not isinstance(tool, MutableMapping): return [] expression_nodes = [] for schema_field in schema.fields: if schema_field.name in tool: expression_nodes.extend(get_expressions( tool[schema_field.name], schema_field.type, SourceLine(tool, schema_field.name) )) return expression_nodes else: return []
def _init_job(self, joborder, **kwargs): # type: (Dict[Text, Text], **Any) -> Builder """ kwargs: eval_timeout: javascript evaluation timeout use_container: do/don't use Docker when DockerRequirement hint provided make_fs_access: make an FsAccess() object with given basedir basedir: basedir for FsAccess docker_outdir: output directory inside docker for this job docker_tmpdir: tmpdir inside docker for this job docker_stagedir: stagedir inside docker for this job outdir: outdir on host for this job tmpdir: tmpdir on host for this job stagedir: stagedir on host for this job select_resources: callback to select compute resources debug: enable debugging output js_console: enable javascript console output """ builder = Builder() builder.job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]], copy.deepcopy(joborder)) # Validate job order try: fillInDefaults(self.tool[u"inputs"], builder.job) normalizeFilesDirs(builder.job) validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job, strict=False, logger=_logger_validation_warnings) except (validate.ValidationException, WorkflowException) as e: raise WorkflowException("Invalid job input record:\n" + Text(e)) builder.files = [] builder.bindings = CommentedSeq() builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.hints = self.hints builder.resources = {} builder.timeout = kwargs.get("eval_timeout") builder.debug = kwargs.get("debug") builder.js_console = kwargs.get("js_console") builder.mutation_manager = kwargs.get("mutation_manager") builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess builder.fs_access = builder.make_fs_access(kwargs["basedir"]) builder.force_docker_pull = kwargs.get("force_docker_pull") loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement") if loadListingReq: builder.loadListing = loadListingReq.get("loadListing") dockerReq, is_req = self.get_requirement("DockerRequirement") defaultDocker = None if dockerReq is None and "default_container" in kwargs: defaultDocker = kwargs["default_container"] if (dockerReq or defaultDocker) and kwargs.get("use_container"): if dockerReq: # Check if docker output directory is absolute if dockerReq.get("dockerOutputDirectory") and dockerReq.get("dockerOutputDirectory").startswith('/'): builder.outdir = dockerReq.get("dockerOutputDirectory") else: builder.outdir = builder.fs_access.docker_compatible_realpath( dockerReq.get("dockerOutputDirectory") or kwargs.get("docker_outdir") or "/var/spool/cwl") elif defaultDocker: builder.outdir = builder.fs_access.docker_compatible_realpath( kwargs.get("docker_outdir") or "/var/spool/cwl") builder.tmpdir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_tmpdir") or "/tmp") builder.stagedir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_stagedir") or "/var/lib/cwl") else: builder.outdir = builder.fs_access.realpath(kwargs.get("outdir") or tempfile.mkdtemp()) if self.tool[u"class"] != 'Workflow': builder.tmpdir = builder.fs_access.realpath(kwargs.get("tmpdir") or tempfile.mkdtemp()) builder.stagedir = builder.fs_access.realpath(kwargs.get("stagedir") or tempfile.mkdtemp()) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "datum": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] fn = self.tool["arguments"].lc.filename builder.bindings.lc.add_kv_line_col(len(builder.bindings), lc) if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] builder.bindings.append(a) elif ("$(" in a) or ("${" in a): cm = CommentedMap(( ("position", [0, i]), ("valueFrom", a) )) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = fn builder.bindings.append(cm) else: cm = CommentedMap(( ("position", [0, i]), ("datum", a) )) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = fn builder.bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), # TODO: unify for both runtime if six.PY3: key = cmp_to_key(cmp_like_py2) else: # PY2 key = lambda dict: dict["position"] builder.bindings.sort(key=key) builder.resources = self.evalResources(builder, kwargs) builder.job_script_provider = kwargs.get("job_script_provider", None) return builder
def _init_job(self, joborder, runtime_context): # type: (Mapping[str, str], RuntimeContext) -> Builder if self.metadata.get("cwlVersion") != INTERNAL_VERSION: raise WorkflowException( "Process object loaded with version '%s', must update to '%s' in order to execute." % (self.metadata.get("cwlVersion"), INTERNAL_VERSION)) job = cast(Dict[str, expression.JSON], copy.deepcopy(joborder)) make_fs_access = getdefault(runtime_context.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtime_context.basedir) load_listing_req, _ = self.get_requirement("LoadListingRequirement") if load_listing_req is not None: load_listing = load_listing_req.get("loadListing") else: load_listing = "no_listing" # Validate job order try: fill_in_defaults(self.tool["inputs"], job, fs_access) normalizeFilesDirs(job) schema = self.names.get_name("input_record_schema", "") if schema is None: raise WorkflowException("Missing input record schema: " "{}".format(self.names)) validate.validate_ex(schema, job, strict=False, logger=_logger_validation_warnings) if load_listing and load_listing != "no_listing": get_listing(fs_access, job, recursive=(load_listing == "deep_listing")) visit_class(job, ("File", ), functools.partial(add_sizes, fs_access)) if load_listing == "deep_listing": for i, inparm in enumerate(self.tool["inputs"]): k = shortname(inparm["id"]) if k not in job: continue v = job[k] dircount = [0] def inc(d): # type: (List[int]) -> None d[0] += 1 visit_class(v, ("Directory", ), lambda x: inc(dircount)) if dircount[0] == 0: continue filecount = [0] visit_class(v, ("File", ), lambda x: inc(filecount)) if filecount[0] > FILE_COUNT_WARNING: # Long lines in this message are okay, will be reflowed based on terminal columns. _logger.warning( strip_dup_lineno( SourceLine(self.tool["inputs"], i, str). makeError( """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'. This may negatively affect workflow performance and memory use. If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior: $namespaces: cwltool: "http://commonwl.org/cwltool#" hints: cwltool:LoadListingRequirement: loadListing: shallow_listing """ % (filecount[0], k)))) except (validate.ValidationException, WorkflowException) as err: raise WorkflowException("Invalid job input record:\n" + str(err)) from err files = [] # type: List[Dict[str, str]] bindings = CommentedSeq() tmpdir = "" stagedir = "" docker_req, _ = self.get_requirement("DockerRequirement") default_docker = None if docker_req is None and runtime_context.default_container: default_docker = runtime_context.default_container if (docker_req or default_docker) and runtime_context.use_container: if docker_req is not None: # Check if docker output directory is absolute if docker_req.get("dockerOutputDirectory") and docker_req.get( "dockerOutputDirectory").startswith("/"): outdir = docker_req.get("dockerOutputDirectory") else: outdir = (docker_req.get("dockerOutputDirectory") or runtime_context.docker_outdir or random_outdir()) elif default_docker is not None: outdir = runtime_context.docker_outdir or random_outdir() tmpdir = runtime_context.docker_tmpdir or "/tmp" # nosec stagedir = runtime_context.docker_stagedir or "/var/lib/cwl" else: outdir = fs_access.realpath( runtime_context.outdir or tempfile.mkdtemp(prefix=getdefault( runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool["class"] != "Workflow": tmpdir = fs_access.realpath(runtime_context.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtime_context.stagedir or tempfile.mkdtemp()) builder = Builder( job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, {}, runtime_context.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtime_context.job_script_provider, runtime_context.eval_timeout, runtime_context.debug, runtime_context.js_console, runtime_context.force_docker_pull, load_listing, outdir, tmpdir, stagedir, ) bindings.extend( builder.bind_input( self.inputs_record_schema, job, discover_secondaryFiles=getdefault(runtime_context.toplevel, False), )) if self.tool.get("baseCommand"): for index, command in enumerate(aslist(self.tool["baseCommand"])): bindings.append({ "position": [-1000000, index], "datum": command }) if self.tool.get("arguments"): for i, arg in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] filename = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(arg, MutableMapping): arg = copy.deepcopy(arg) if arg.get("position"): position = arg.get("position") if isinstance(position, str): # no need to test the # CWLVersion as the v1.0 # schema only allows ints position = builder.do_eval(position) if position is None: position = 0 arg["position"] = [position, i] else: arg["position"] = [0, i] bindings.append(arg) elif ("$(" in arg) or ("${" in arg): cm = CommentedMap((("position", [0, i]), ("valueFrom", arg))) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = filename bindings.append(cm) else: cm = CommentedMap((("position", [0, i]), ("datum", arg))) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = filename bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), key = functools.cmp_to_key(cmp_like_py2) # This awkward construction replaces the contents of # "bindings" in place (because Builder expects it to be # mutated in place, sigh, I'm sorry) with its contents sorted, # supporting different versions of Python and ruamel.yaml with # different behaviors/bugs in CommentedSeq. bindings_copy = copy.deepcopy(bindings) del bindings[:] bindings.extend(sorted(bindings_copy, key=key)) if self.tool["class"] != "Workflow": builder.resources = self.evalResources(builder, runtime_context) return builder
def get_expressions( tool: Union[CommentedMap, str, CommentedSeq], schema: Optional[Union[Schema, ArraySchema]], source_line: Optional[SourceLine] = None, ) -> List[Tuple[str, Optional[SourceLine]]]: debug = _logger.isEnabledFor(logging.DEBUG) if is_expression(tool, schema): return [(cast(str, tool), source_line)] elif isinstance(schema, UnionSchema): valid_schema = None for possible_schema in schema.schemas: if is_expression(tool, possible_schema): return [(cast(str, tool), source_line)] elif validate_ex( possible_schema, tool, raise_ex=False, logger=_logger_validation_warnings, vocab={}, ): valid_schema = possible_schema return get_expressions(tool, valid_schema, source_line) elif isinstance(schema, ArraySchema): if not isinstance(tool, MutableSequence): return [] def tmp_expr( x: Tuple[int, Union[CommentedMap, str, CommentedSeq]] ) -> List[Tuple[str, Optional[SourceLine]]]: # using a lambda for this broke mypyc v0.910 and before return get_expressions( x[1], cast(ArraySchema, schema).items, SourceLine(tool, x[0], include_traceback=debug), ) return list(itertools.chain(*map( tmp_expr, enumerate(tool), ))) elif isinstance(schema, RecordSchema): if not isinstance(tool, MutableMapping): return [] expression_nodes = [] for schema_field in schema.fields: if schema_field.name in tool: expression_nodes.extend( get_expressions( tool[schema_field.name], schema_field.type, SourceLine(tool, schema_field.name, include_traceback=debug), )) return expression_nodes else: return []
def _init_job(self, joborder, input_basedir, **kwargs): # type: (Dict[str, str], str, **Any) -> Builder builder = Builder() builder.job = copy.deepcopy(joborder) fillInDefaults(self.tool["inputs"], builder.job) # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" else: if kwargs.get('tmp_outdir_prefix'): builder.outdir = tempfile.mkdtemp(prefix=kwargs.get('tmp_outdir_prefix')) else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() if kwargs.get('tmpdir_prefix'): builder.tmpdir = tempfile.mkdtemp(prefix=kwargs.get('tmpdir_prefix')) else: builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess(input_basedir) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) builder.resources = self.evalResources(builder, kwargs) return builder
def _init_job(self, joborder, runtimeContext): # type: (MutableMapping[Text, Text], RuntimeContext) -> Builder job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]], copy.deepcopy(joborder)) make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtimeContext.basedir) # Validate job order try: fill_in_defaults(self.tool[u"inputs"], job, fs_access) normalizeFilesDirs(job) validate.validate_ex(self.names.get_name("input_record_schema", ""), job, strict=False, logger=_logger_validation_warnings) except (validate.ValidationException, WorkflowException) as e: raise WorkflowException("Invalid job input record:\n" + Text(e)) files = [] # type: List[Dict[Text, Text]] bindings = CommentedSeq() tmpdir = u"" stagedir = u"" loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement") if loadListingReq is not None: loadListing = loadListingReq.get("loadListing") else: loadListing = "deep_listing" # will default to "no_listing" in CWL v1.1 dockerReq, _ = self.get_requirement("DockerRequirement") defaultDocker = None if dockerReq is None and runtimeContext.default_container: defaultDocker = runtimeContext.default_container if (dockerReq or defaultDocker) and runtimeContext.use_container: if dockerReq is not None: # Check if docker output directory is absolute if dockerReq.get("dockerOutputDirectory") and \ dockerReq.get("dockerOutputDirectory").startswith('/'): outdir = dockerReq.get("dockerOutputDirectory") else: outdir = dockerReq.get("dockerOutputDirectory") or \ runtimeContext.docker_outdir or random_outdir() elif defaultDocker is not None: outdir = runtimeContext.docker_outdir or random_outdir() tmpdir = runtimeContext.docker_tmpdir or "/tmp" stagedir = runtimeContext.docker_stagedir or "/var/lib/cwl" else: outdir = fs_access.realpath( runtimeContext.outdir or tempfile.mkdtemp( prefix=getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool[u"class"] != 'Workflow': tmpdir = fs_access.realpath(runtimeContext.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtimeContext.stagedir or tempfile.mkdtemp()) builder = Builder(job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, {}, runtimeContext.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtimeContext.job_script_provider, runtimeContext.eval_timeout, runtimeContext.debug, runtimeContext.js_console, runtimeContext.force_docker_pull, loadListing, outdir, tmpdir, stagedir) bindings.extend(builder.bind_input( self.inputs_record_schema, job, discover_secondaryFiles=getdefault(runtimeContext.toplevel, False))) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): bindings.append({ "position": [-1000000, n], "datum": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] fn = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(a, MutableMapping): a = copy.deepcopy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] bindings.append(a) elif ("$(" in a) or ("${" in a): cm = CommentedMap(( ("position", [0, i]), ("valueFrom", a) )) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = fn bindings.append(cm) else: cm = CommentedMap(( ("position", [0, i]), ("datum", a) )) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = fn bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), # TODO: unify for both runtime if PY3: key = functools.cmp_to_key(cmp_like_py2) else: # PY2 key = lambda d: d["position"] # This awkward construction replaces the contents of # "bindings" in place (because Builder expects it to be # mutated in place, sigh, I'm sorry) with its contents sorted, # supporting different versions of Python and ruamel.yaml with # different behaviors/bugs in CommentedSeq. bd = copy.deepcopy(bindings) del bindings[:] bindings.extend(sorted(bd, key=key)) if self.tool[u"class"] != 'Workflow': builder.resources = self.evalResources(builder, runtimeContext) return builder
def _init_job(self, joborder, **kwargs): # type: (Dict[Text, Text], **Any) -> Builder """ kwargs: eval_timeout: javascript evaluation timeout use_container: do/don't use Docker when DockerRequirement hint provided make_fs_access: make an FsAccess() object with given basedir basedir: basedir for FsAccess docker_outdir: output directory inside docker for this job docker_tmpdir: tmpdir inside docker for this job docker_stagedir: stagedir inside docker for this job outdir: outdir on host for this job tmpdir: tmpdir on host for this job stagedir: stagedir on host for this job select_resources: callback to select compute resources """ builder = Builder() builder.job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]], copy.deepcopy(joborder)) # Validate job order try: fillInDefaults(self.tool[u"inputs"], builder.job) normalizeFilesDirs(builder.job) validate.validate_ex( self.names.get_name("input_record_schema", ""), builder.job) except (validate.ValidationException, WorkflowException) as e: raise WorkflowException("Invalid job input record:\n" + Text(e)) builder.files = [] builder.bindings = CommentedSeq() builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.hints = self.hints builder.resources = {} builder.timeout = kwargs.get("eval_timeout") builder.debug = kwargs.get("debug") dockerReq, is_req = self.get_requirement("DockerRequirement") if dockerReq and is_req and not kwargs.get("use_container"): raise WorkflowException( "Document has DockerRequirement under 'requirements' but use_container is false. DockerRequirement must be under 'hints' or use_container must be true." ) builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess builder.fs_access = builder.make_fs_access(kwargs["basedir"]) if dockerReq and kwargs.get("use_container"): builder.outdir = builder.fs_access.realpath( dockerReq.get("dockerOutputDirectory") or kwargs.get("docker_outdir") or "/var/spool/cwl") builder.tmpdir = builder.fs_access.realpath( kwargs.get("docker_tmpdir") or "/tmp") builder.stagedir = builder.fs_access.realpath( kwargs.get("docker_stagedir") or "/var/lib/cwl") else: builder.outdir = builder.fs_access.realpath( kwargs.get("outdir") or tempfile.mkdtemp()) builder.tmpdir = builder.fs_access.realpath( kwargs.get("tmpdir") or tempfile.mkdtemp()) builder.stagedir = builder.fs_access.realpath( kwargs.get("stagedir") or tempfile.mkdtemp()) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend( builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "datum": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] fn = self.tool["arguments"].lc.filename builder.bindings.lc.add_kv_line_col(len(builder.bindings), lc) if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] builder.bindings.append(a) elif ("$(" in a) or ("${" in a): cm = CommentedMap((("position", [0, i]), ("valueFrom", a))) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = fn builder.bindings.append(cm) else: cm = CommentedMap((("position", [0, i]), ("datum", a))) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = fn builder.bindings.append(cm) builder.bindings.sort(key=lambda a: a["position"]) builder.resources = self.evalResources(builder, kwargs) return builder
def _init_job(self, joborder, **kwargs): # type: (Dict[unicode, unicode], **Any) -> Builder builder = Builder() builder.job = cast(Dict[unicode, Union[Dict[unicode, Any], List, unicode]], copy.deepcopy(joborder)) fillInDefaults(self.tool[u"inputs"], builder.job) normalizeFilesDirs(builder.job) # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, is_req = self.get_requirement("DockerRequirement") if dockerReq and is_req and not kwargs.get("use_container"): raise WorkflowException("Document has DockerRequirement under 'requirements' but use_container is false. DockerRequirement must be under 'hints' or use_container must be true.") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" builder.stagedir = kwargs.get("docker_stagedir") or "/var/lib/cwl" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.stagedir = kwargs.get("stagedir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess(kwargs["basedir"]) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) elif ("$(" in a) or ("${" in a): builder.bindings.append({ "position": [0, i], "do_eval": a, "valueFrom": None }) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) builder.resources = self.evalResources(builder, kwargs) return builder
def _init_job(self, joborder, **kwargs): # type: (Dict[Text, Text], **Any) -> Builder """ kwargs: eval_timeout: javascript evaluation timeout use_container: do/don't use Docker when DockerRequirement hint provided make_fs_access: make an FsAccess() object with given basedir basedir: basedir for FsAccess docker_outdir: output directory inside docker for this job docker_tmpdir: tmpdir inside docker for this job docker_stagedir: stagedir inside docker for this job outdir: outdir on host for this job tmpdir: tmpdir on host for this job stagedir: stagedir on host for this job select_resources: callback to select compute resources debug: enable debugging output js_console: enable javascript console output """ builder = Builder() builder.job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]], copy.deepcopy(joborder)) # Validate job order try: fillInDefaults(self.tool[u"inputs"], builder.job) normalizeFilesDirs(builder.job) validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job, strict=False, logger=_logger_validation_warnings) except (validate.ValidationException, WorkflowException) as e: raise WorkflowException("Invalid job input record:\n" + Text(e)) builder.files = [] builder.bindings = CommentedSeq() builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.hints = self.hints builder.resources = {} builder.timeout = kwargs.get("eval_timeout") builder.debug = kwargs.get("debug") builder.js_console = kwargs.get("js_console") builder.mutation_manager = kwargs.get("mutation_manager") builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess builder.fs_access = builder.make_fs_access(kwargs["basedir"]) builder.force_docker_pull = kwargs.get("force_docker_pull") loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement") if loadListingReq: builder.loadListing = loadListingReq.get("loadListing") dockerReq, is_req = self.get_requirement("DockerRequirement") defaultDocker = None if dockerReq is None and "default_container" in kwargs: defaultDocker = kwargs["default_container"] if (dockerReq or defaultDocker) and kwargs.get("use_container"): if dockerReq: # Check if docker output directory is absolute if dockerReq.get("dockerOutputDirectory") and dockerReq.get("dockerOutputDirectory").startswith('/'): builder.outdir = dockerReq.get("dockerOutputDirectory") else: builder.outdir = builder.fs_access.docker_compatible_realpath( dockerReq.get("dockerOutputDirectory") or kwargs.get("docker_outdir") or "/var/spool/cwl") elif defaultDocker: builder.outdir = builder.fs_access.docker_compatible_realpath( kwargs.get("docker_outdir") or "/var/spool/cwl") builder.tmpdir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_tmpdir") or "/tmp") builder.stagedir = builder.fs_access.docker_compatible_realpath(kwargs.get("docker_stagedir") or "/var/lib/cwl") else: builder.outdir = builder.fs_access.realpath(kwargs.get("outdir") or tempfile.mkdtemp()) builder.tmpdir = builder.fs_access.realpath(kwargs.get("tmpdir") or tempfile.mkdtemp()) builder.stagedir = builder.fs_access.realpath(kwargs.get("stagedir") or tempfile.mkdtemp()) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "datum": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] fn = self.tool["arguments"].lc.filename builder.bindings.lc.add_kv_line_col(len(builder.bindings), lc) if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] builder.bindings.append(a) elif ("$(" in a) or ("${" in a): cm = CommentedMap(( ("position", [0, i]), ("valueFrom", a) )) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = fn builder.bindings.append(cm) else: cm = CommentedMap(( ("position", [0, i]), ("datum", a) )) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = fn builder.bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), # TODO: unify for both runtime if six.PY3: key = cmp_to_key(cmp_like_py2) else: # PY2 key = lambda dict: dict["position"] builder.bindings.sort(key=key) builder.resources = self.evalResources(builder, kwargs) builder.job_script_provider = kwargs.get("job_script_provider", None) return builder
def _init_job(self, joborder, input_basedir, **kwargs): # type: (Dict[str, str], str, **Any) -> Builder builder = Builder() builder.job = copy.deepcopy(joborder) fillInDefaults(self.tool["inputs"], builder.job) # Validate job order try: validate.validate_ex( self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" else: if kwargs.get('tmp_outdir_prefix'): builder.outdir = tempfile.mkdtemp( prefix=kwargs.get('tmp_outdir_prefix')) else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() if kwargs.get('tmpdir_prefix'): builder.tmpdir = tempfile.mkdtemp( prefix=kwargs.get('tmpdir_prefix')) else: builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess( input_basedir) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend( builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) builder.resources = self.evalResources(builder, kwargs) return builder
def _init_job(self, joborder, runtime_context): # type: (Mapping[Text, Text], RuntimeContext) -> Builder job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]], copy.deepcopy(joborder)) make_fs_access = getdefault(runtime_context.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtime_context.basedir) load_listing_req, _ = self.get_requirement( "LoadListingRequirement") if load_listing_req is not None: load_listing = load_listing_req.get("loadListing") else: load_listing = "no_listing" # Validate job order try: fill_in_defaults(self.tool[u"inputs"], job, fs_access) normalizeFilesDirs(job) schema = self.names.get_name("input_record_schema", "") if schema is None: raise WorkflowException("Missing input record schema: " "{}".format(self.names)) validate.validate_ex(schema, job, strict=False, logger=_logger_validation_warnings) if load_listing and load_listing != "no_listing": get_listing(fs_access, job, recursive=(load_listing == "deep_listing")) visit_class(job, ("File",), functools.partial(add_sizes, fs_access)) if load_listing == "deep_listing": for i, inparm in enumerate(self.tool["inputs"]): k = shortname(inparm["id"]) if k not in job: continue v = job[k] dircount = [0] def inc(d): # type: (List[int]) -> None d[0] += 1 visit_class(v, ("Directory",), lambda x: inc(dircount)) if dircount[0] == 0: continue filecount = [0] visit_class(v, ("File",), lambda x: inc(filecount)) if filecount[0] > FILE_COUNT_WARNING: # Long lines in this message are okay, will be reflowed based on terminal columns. _logger.warning(strip_dup_lineno(SourceLine(self.tool["inputs"], i, Text).makeError( """Recursive directory listing has resulted in a large number of File objects (%s) passed to the input parameter '%s'. This may negatively affect workflow performance and memory use. If this is a problem, use the hint 'cwltool:LoadListingRequirement' with "shallow_listing" or "no_listing" to change the directory listing behavior: $namespaces: cwltool: "http://commonwl.org/cwltool#" hints: cwltool:LoadListingRequirement: loadListing: shallow_listing """ % (filecount[0], k)))) except (validate.ValidationException, WorkflowException) as err: raise WorkflowException("Invalid job input record:\n" + Text(err)) files = [] # type: List[Dict[Text, Text]] bindings = CommentedSeq() tmpdir = u"" stagedir = u"" docker_req, _ = self.get_requirement("DockerRequirement") default_docker = None if docker_req is None and runtime_context.default_container: default_docker = runtime_context.default_container if (docker_req or default_docker) and runtime_context.use_container: if docker_req is not None: # Check if docker output directory is absolute if docker_req.get("dockerOutputDirectory") and \ docker_req.get("dockerOutputDirectory").startswith('/'): outdir = docker_req.get("dockerOutputDirectory") else: outdir = docker_req.get("dockerOutputDirectory") or \ runtime_context.docker_outdir or random_outdir() elif default_docker is not None: outdir = runtime_context.docker_outdir or random_outdir() tmpdir = runtime_context.docker_tmpdir or "/tmp" stagedir = runtime_context.docker_stagedir or "/var/lib/cwl" else: outdir = fs_access.realpath( runtime_context.outdir or tempfile.mkdtemp( prefix=getdefault(runtime_context.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool[u"class"] != 'Workflow': tmpdir = fs_access.realpath(runtime_context.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtime_context.stagedir or tempfile.mkdtemp()) builder = Builder(job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, {}, runtime_context.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtime_context.job_script_provider, runtime_context.eval_timeout, runtime_context.debug, runtime_context.js_console, runtime_context.force_docker_pull, load_listing, outdir, tmpdir, stagedir) bindings.extend(builder.bind_input( self.inputs_record_schema, job, discover_secondaryFiles=getdefault(runtime_context.toplevel, False))) if self.tool.get("baseCommand"): for index, command in enumerate(aslist(self.tool["baseCommand"])): bindings.append({ "position": [-1000000, index], "datum": command }) if self.tool.get("arguments"): for i, arg in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] filename = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(arg, MutableMapping): arg = copy.deepcopy(arg) if arg.get("position"): arg["position"] = [arg["position"], i] else: arg["position"] = [0, i] bindings.append(arg) elif ("$(" in arg) or ("${" in arg): cm = CommentedMap(( ("position", [0, i]), ("valueFrom", arg) )) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = filename bindings.append(cm) else: cm = CommentedMap(( ("position", [0, i]), ("datum", arg) )) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = filename bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), if PY3: key = functools.cmp_to_key(cmp_like_py2) else: # PY2 key = lambda d: d["position"] # This awkward construction replaces the contents of # "bindings" in place (because Builder expects it to be # mutated in place, sigh, I'm sorry) with its contents sorted, # supporting different versions of Python and ruamel.yaml with # different behaviors/bugs in CommentedSeq. bindings_copy = copy.deepcopy(bindings) del bindings[:] bindings.extend(sorted(bindings_copy, key=key)) if self.tool[u"class"] != 'Workflow': builder.resources = self.evalResources(builder, runtime_context) return builder
def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs): (_, self.names, _) = get_schema() self.tool = toolpath_object if do_validate: try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict")) except validate.ValidationException as v: raise validate.ValidationException("Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v)))) self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: sdtypes = sd["types"] av = schema_salad.schema.make_valid_avro(sdtypes, {t["name"]: t for t in sdtypes}, set()) for i in av: self.schemaDefs[i["name"]] = i avro.schema.make_avsc_object(av, self.names) # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for key in ("inputs", "outputs"): for i in self.tool[key]: c = copy.copy(i) doc_url, _ = urlparse.urldefrag(c['id']) c["name"] = shortname(c["id"]) del c["id"] if "type" not in c: raise validate.ValidationException("Missing `type` in parameter `%s`" % c["name"]) if "default" in c and "null" not in aslist(c["type"]): c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] if key == "inputs": self.inputs_record_schema["fields"].append(c) elif key == "outputs": self.outputs_record_schema["fields"].append(c) try: self.inputs_record_schema = schema_salad.schema.make_valid_avro(self.inputs_record_schema, {}, set()) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) except avro.schema.SchemaParseException as e: raise validate.ValidationException("Got error `%s` while prcoessing inputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.inputs_record_schema, indent=4))) try: self.outputs_record_schema = schema_salad.schema.make_valid_avro(self.outputs_record_schema, {}, set()) avro.schema.make_avsc_object(self.outputs_record_schema, self.names) except avro.schema.SchemaParseException as e: raise validate.ValidationException("Got error `%s` while prcoessing outputs of %s:\n%s" % (str(e), self.tool["id"], json.dumps(self.outputs_record_schema, indent=4)))
def _init_job(self, joborder, **kwargs): # type: (Dict[unicode, unicode], **Any) -> Builder builder = Builder() builder.job = cast( Dict[unicode, Union[Dict[unicode, Any], List, unicode]], copy.deepcopy(joborder)) fillInDefaults(self.tool[u"inputs"], builder.job) normalizeFilesDirs(builder.job) # Validate job order try: validate.validate_ex( self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, is_req = self.get_requirement("DockerRequirement") if dockerReq and is_req and not kwargs.get("use_container"): raise WorkflowException( "Document has DockerRequirement under 'requirements' but use_container is false. DockerRequirement must be under 'hints' or use_container must be true." ) if dockerReq and kwargs.get("use_container"): builder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl" builder.tmpdir = kwargs.get("docker_tmpdir") or "/tmp" builder.stagedir = kwargs.get("docker_stagedir") or "/var/lib/cwl" else: builder.outdir = kwargs.get("outdir") or tempfile.mkdtemp() builder.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp() builder.stagedir = kwargs.get("stagedir") or tempfile.mkdtemp() builder.fs_access = kwargs.get("fs_access") or StdFsAccess( kwargs["basedir"]) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend( builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) elif ("$(" in a) or ("${" in a): builder.bindings.append({ "position": [0, i], "do_eval": a, "valueFrom": None }) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) builder.resources = self.evalResources(builder, kwargs) return builder
def _init_job(self, joborder, runtimeContext): # type: (Dict[Text, Text], RuntimeContext) -> Builder """ kwargs: use_container: do/don't use Docker when DockerRequirement hint provided make_fs_access: make an FsAccess() object with given basedir docker_outdir: output directory inside docker for this job docker_tmpdir: tmpdir inside docker for this job docker_stagedir: stagedir inside docker for this job outdir: outdir on host for this job tmpdir: tmpdir on host for this job stagedir: stagedir on host for this job select_resources: callback to select compute resources tmp_outdir_prefix: Path prefix for intermediate output directories """ job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]], copy.deepcopy(joborder)) # Validate job order try: fillInDefaults(self.tool[u"inputs"], job) normalizeFilesDirs(job) validate.validate_ex(self.names.get_name("input_record_schema", ""), job, strict=False, logger=_logger_validation_warnings) except (validate.ValidationException, WorkflowException) as e: raise WorkflowException("Invalid job input record:\n" + Text(e)) files = [] # type: List[Dict[Text, Text]] bindings = CommentedSeq() make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtimeContext.basedir) tmpdir = u"" stagedir = u"" loadListingReq, _ = self.get_requirement( "http://commonwl.org/cwltool#LoadListingRequirement") if loadListingReq: loadListing = loadListingReq.get("loadListing") else: loadListing = "deep_listing" # will default to "no_listing" in CWL v1.1 dockerReq, _ = self.get_requirement("DockerRequirement") defaultDocker = None if dockerReq is None and runtimeContext.default_container: defaultDocker = runtimeContext.default_container if (dockerReq or defaultDocker) and runtimeContext.use_container: if dockerReq: # Check if docker output directory is absolute if dockerReq.get("dockerOutputDirectory") and \ dockerReq.get("dockerOutputDirectory").startswith('/'): outdir = dockerReq.get("dockerOutputDirectory") else: outdir = fs_access.docker_compatible_realpath( dockerReq.get("dockerOutputDirectory") or runtimeContext.docker_outdir or "/var/spool/cwl") elif defaultDocker: outdir = fs_access.docker_compatible_realpath( runtimeContext.docker_outdir or "/var/spool/cwl") tmpdir = fs_access.docker_compatible_realpath( runtimeContext.docker_tmpdir or "/tmp") stagedir = fs_access.docker_compatible_realpath( runtimeContext.docker_stagedir or "/var/lib/cwl") else: outdir = fs_access.realpath( runtimeContext.outdir or tempfile.mkdtemp(prefix=getdefault( runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool[u"class"] != 'Workflow': tmpdir = fs_access.realpath(runtimeContext.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtimeContext.stagedir or tempfile.mkdtemp()) builder = Builder( job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, runtimeContext.eval_timeout, runtimeContext.debug, {}, runtimeContext.js_console, runtimeContext.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtimeContext.force_docker_pull, loadListing, outdir, tmpdir, stagedir, runtimeContext.job_script_provider) bindings.extend( builder.bind_input(self.inputs_record_schema, job, discover_secondaryFiles=getdefault( runtimeContext.toplevel, False))) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): bindings.append({"position": [-1000000, n], "datum": b}) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] fn = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] bindings.append(a) elif ("$(" in a) or ("${" in a): cm = CommentedMap((("position", [0, i]), ("valueFrom", a))) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = fn bindings.append(cm) else: cm = CommentedMap((("position", [0, i]), ("datum", a))) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = fn bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), # TODO: unify for both runtime if six.PY3: key = cmp_to_key(cmp_like_py2) else: # PY2 key = lambda dict: dict["position"] bindings.sort(key=key) builder.resources = self.evalResources(builder, runtimeContext) return builder
def _init_job(self, joborder, runtimeContext): # type: (Dict[Text, Text], RuntimeContext) -> Builder job = cast(Dict[Text, Union[Dict[Text, Any], List[Any], Text, None]], copy.deepcopy(joborder)) make_fs_access = getdefault(runtimeContext.make_fs_access, StdFsAccess) fs_access = make_fs_access(runtimeContext.basedir) # Validate job order try: fill_in_defaults(self.tool[u"inputs"], job, fs_access) normalizeFilesDirs(job) validate.validate_ex(self.names.get_name("input_record_schema", ""), job, strict=False, logger=_logger_validation_warnings) except (validate.ValidationException, WorkflowException) as e: raise WorkflowException("Invalid job input record:\n" + Text(e)) files = [] # type: List[Dict[Text, Text]] bindings = CommentedSeq() tmpdir = u"" stagedir = u"" loadListingReq, _ = self.get_requirement("http://commonwl.org/cwltool#LoadListingRequirement") if loadListingReq: loadListing = loadListingReq.get("loadListing") else: loadListing = "deep_listing" # will default to "no_listing" in CWL v1.1 dockerReq, _ = self.get_requirement("DockerRequirement") defaultDocker = None if dockerReq is None and runtimeContext.default_container: defaultDocker = runtimeContext.default_container if (dockerReq or defaultDocker) and runtimeContext.use_container: if dockerReq: # Check if docker output directory is absolute if dockerReq.get("dockerOutputDirectory") and \ dockerReq.get("dockerOutputDirectory").startswith('/'): outdir = dockerReq.get("dockerOutputDirectory") else: outdir = dockerReq.get("dockerOutputDirectory") or \ runtimeContext.docker_outdir or "/var/spool/cwl" elif defaultDocker: outdir = runtimeContext.docker_outdir or "/var/spool/cwl" tmpdir = runtimeContext.docker_tmpdir or "/tmp" stagedir = runtimeContext.docker_stagedir or "/var/lib/cwl" else: outdir = fs_access.realpath( runtimeContext.outdir or tempfile.mkdtemp( prefix=getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX))) if self.tool[u"class"] != 'Workflow': tmpdir = fs_access.realpath(runtimeContext.tmpdir or tempfile.mkdtemp()) stagedir = fs_access.realpath(runtimeContext.stagedir or tempfile.mkdtemp()) builder = Builder(job, files, bindings, self.schemaDefs, self.names, self.requirements, self.hints, runtimeContext.eval_timeout, runtimeContext.debug, {}, runtimeContext.js_console, runtimeContext.mutation_manager, self.formatgraph, make_fs_access, fs_access, runtimeContext.force_docker_pull, loadListing, outdir, tmpdir, stagedir, runtimeContext.job_script_provider) bindings.extend(builder.bind_input( self.inputs_record_schema, job, discover_secondaryFiles=getdefault(runtimeContext.toplevel, False))) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): bindings.append({ "position": [-1000000, n], "datum": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): lc = self.tool["arguments"].lc.data[i] fn = self.tool["arguments"].lc.filename bindings.lc.add_kv_line_col(len(bindings), lc) if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] bindings.append(a) elif ("$(" in a) or ("${" in a): cm = CommentedMap(( ("position", [0, i]), ("valueFrom", a) )) cm.lc.add_kv_line_col("valueFrom", lc) cm.lc.filename = fn bindings.append(cm) else: cm = CommentedMap(( ("position", [0, i]), ("datum", a) )) cm.lc.add_kv_line_col("datum", lc) cm.lc.filename = fn bindings.append(cm) # use python2 like sorting of heterogeneous lists # (containing str and int types), # TODO: unify for both runtime if six.PY3: key = cmp_to_key(cmp_like_py2) else: # PY2 key = lambda dict: dict["position"] bindings.sort(key=key) if self.tool[u"class"] != 'Workflow': builder.resources = self.evalResources(builder, runtimeContext) return builder
def _init_job(self, joborder, **kwargs): # type: (Dict[Text, Text], **Any) -> Builder """ kwargs: eval_timeout: javascript evaluation timeout use_container: do/don't use Docker when DockerRequirement hint provided make_fs_access: make an FsAccess() object with given basedir basedir: basedir for FsAccess docker_outdir: output directory inside docker for this job docker_tmpdir: tmpdir inside docker for this job docker_stagedir: stagedir inside docker for this job outdir: outdir on host for this job tmpdir: tmpdir on host for this job stagedir: stagedir on host for this job select_resources: callback to select compute resources """ builder = Builder() builder.job = cast(Dict[Text, Union[Dict[Text, Any], List, Text]], copy.deepcopy(joborder)) fillInDefaults(self.tool[u"inputs"], builder.job) normalizeFilesDirs(builder.job) # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), builder.job) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + Text(e)) builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.names = self.names builder.requirements = self.requirements builder.resources = {} builder.timeout = kwargs.get("eval_timeout") dockerReq, is_req = self.get_requirement("DockerRequirement") if dockerReq and is_req and not kwargs.get("use_container"): raise WorkflowException("Document has DockerRequirement under 'requirements' but use_container is false. DockerRequirement must be under 'hints' or use_container must be true.") builder.make_fs_access = kwargs.get("make_fs_access") or StdFsAccess builder.fs_access = builder.make_fs_access(kwargs["basedir"]) if dockerReq and kwargs.get("use_container"): builder.outdir = builder.fs_access.realpath(kwargs.get("docker_outdir") or "/var/spool/cwl") builder.tmpdir = builder.fs_access.realpath(kwargs.get("docker_tmpdir") or "/tmp") builder.stagedir = builder.fs_access.realpath(kwargs.get("docker_stagedir") or "/var/lib/cwl") else: builder.outdir = builder.fs_access.realpath(kwargs.get("outdir") or tempfile.mkdtemp()) builder.tmpdir = builder.fs_access.realpath(kwargs.get("tmpdir") or tempfile.mkdtemp()) builder.stagedir = builder.fs_access.realpath(kwargs.get("stagedir") or tempfile.mkdtemp()) if self.formatgraph: for i in self.tool["inputs"]: d = shortname(i["id"]) if d in builder.job and i.get("format"): checkFormat(builder.job[d], builder.do_eval(i["format"]), self.formatgraph) builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) if self.tool.get("baseCommand"): for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "datum": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] builder.bindings.append(a) elif ("$(" in a) or ("${" in a): builder.bindings.append({ "position": [0, i], "valueFrom": a }) else: builder.bindings.append({ "position": [0, i], "datum": a }) builder.bindings.sort(key=lambda a: a["position"]) builder.resources = self.evalResources(builder, kwargs) return builder
def collect_output_ports( self, ports: Union[CommentedSeq, Set[CWLObjectType]], builder: Builder, outdir: str, rcode: int, compute_checksum: bool = True, jobname: str = "", readers: Optional[MutableMapping[str, CWLObjectType]] = None, ) -> OutputPortsType: ret = {} # type: OutputPortsType debug = _logger.isEnabledFor(logging.DEBUG) cwl_version = self.metadata.get( "http://commonwl.org/cwltool#original_cwlVersion", None) if cwl_version != "v1.0": builder.resources["exitCode"] = rcode try: fs_access = builder.make_fs_access(outdir) custom_output = fs_access.join(outdir, "cwl.output.json") if fs_access.exists(custom_output): with fs_access.open(custom_output, "r") as f: ret = json.load(f) if debug: _logger.debug( "Raw output from %s: %s", custom_output, json_dumps(ret, indent=4), ) else: for i, port in enumerate(ports): with SourceLine( ports, i, partial(ParameterOutputWorkflowException, port=port), debug, ): fragment = shortname(port["id"]) ret[fragment] = self.collect_output( port, builder, outdir, fs_access, compute_checksum=compute_checksum, ) if ret: revmap = partial(revmap_file, builder, outdir) adjustDirObjs(ret, trim_listing) visit_class(ret, ("File", "Directory"), revmap) visit_class(ret, ("File", "Directory"), remove_path) normalizeFilesDirs(ret) visit_class( ret, ("File", "Directory"), partial(check_valid_locations, fs_access), ) if compute_checksum: adjustFileObjs(ret, partial(compute_checksums, fs_access)) expected_schema = cast( Schema, self.names.get_name("outputs_record_schema", None)) validate_ex(expected_schema, ret, strict=False, logger=_logger_validation_warnings) if ret is not None and builder.mutation_manager is not None: adjustFileObjs(ret, builder.mutation_manager.set_generation) return ret if ret is not None else {} except ValidationException as e: raise WorkflowException("Error validating output record. " + str(e) + "\n in " + json_dumps(ret, indent=4)) from e finally: if builder.mutation_manager and readers: for r in readers.values(): builder.mutation_manager.release_reader(jobname, r)