def validate_hints(self, hints, strict): for r in hints: try: if self.names.get_name(r["class"], "") is not None: validate.validate_ex(self.names.get_name(r["class"], ""), r, strict=strict) else: _logger.info( validate.ValidationException("Unknown hint %s" % (r["class"]))) except validate.ValidationException as v: raise validate.ValidationException("Validating hint `%s`: %s" % (r["class"], str(v)))
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): bindings = [] binding = None if "inputBinding" in schema and isinstance(schema["inputBinding"], dict): binding = copy.copy(schema["inputBinding"]) if "position" in binding: binding["position"] = aslist(lead_pos) + aslist( binding["position"]) + aslist(tail_pos) else: binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos) if "valueFrom" in binding: binding["do_eval"] = binding["valueFrom"] binding["valueFrom"] = datum # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if isinstance(t, basestring) and self.names.has_name(t, ""): avsc = self.names.get_name(t, "") elif isinstance(t, dict) and "name" in t and self.names.has_name( t["name"], ""): avsc = self.names.get_name(t["name"], "") else: avsc = avro.schema.make_avsc_object(t, self.names) if validate.validate(avsc, datum): schema = copy.deepcopy(schema) schema["type"] = t return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos) raise validate.ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): st = copy.deepcopy(schema["type"]) if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[ "type"] in ("array", "map"): st["inputBinding"] = {} bindings.extend( self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: bindings.extend( self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"])) else: datum[f["name"]] = f.get("default") if schema["type"] == "map": for n, item in datum.items(): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = [n, item] bindings.extend( self.bind_input( { "type": schema["values"], "inputBinding": b2 }, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "array": for n, item in enumerate(datum): b2 = None if binding: b2 = copy.deepcopy(binding) b2["valueFrom"] = item bindings.extend( self.bind_input( { "type": schema["items"], "inputBinding": b2 }, item, lead_pos=n, tail_pos=tail_pos)) binding = None if schema["type"] == "File": self.files.append(datum) if binding: if binding.get("loadContents"): with self.fs_access.open(datum["path"], "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) if "secondaryFiles" in binding: if "secondaryFiles" not in datum: datum["secondaryFiles"] = [] for sf in aslist(binding["secondaryFiles"]): if isinstance(sf, dict): sfpath = self.do_eval(sf, context=datum["path"]) else: sfpath = { "path": substitute(datum["path"], sf), "class": "File" } if isinstance(sfpath, list): datum["secondaryFiles"].extend(sfpath) self.files.extend(sfpath) else: datum["secondaryFiles"].append(sfpath) self.files.append(sfpath) # Position to front of the sort key if binding: for bi in bindings: bi["position"] = binding["position"] + bi["position"] bindings.append(binding) return bindings
def job(self, joborder, input_basedir, output_callback, **kwargs): builder = self._init_job(joborder, input_basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): if isinstance(a, dict): a = copy.copy(a) if a.get("position"): a["position"] = [a["position"], i] else: a["position"] = [0, i] a["do_eval"] = a["valueFrom"] a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ "position": [0, i], "valueFrom": a }) builder.bindings.sort(key=lambda a: a["position"]) reffiles = set((f["path"] for f in builder.files)) j = self.makeJobRunner() j.joborder = builder.job j.stdin = None j.stdout = None j.successCodes = self.tool.get("successCodes") j.temporaryFailCodes = self.tool.get("temporaryFailCodes") j.permanentFailCodes = self.tool.get("permanentFailCodes") j.requirements = self.requirements j.hints = self.hints _logger.debug( "[job %s] initializing from %s%s", id(j), self.tool.get("id", ""), " as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "") _logger.debug("[job %s] %s", id(j), json.dumps(joborder, indent=4)) builder.pathmapper = None if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) if isinstance(j.stdin, dict) and "ref" in j.stdin: j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.add(j.stdin) if self.tool.get("stdout"): j.stdout = builder.do_eval(self.tool["stdout"]) if os.path.isabs(j.stdout) or ".." in j.stdout: raise validate.ValidationException( "stdout must be a relative path") builder.pathmapper = self.makePathMapper(reffiles, input_basedir, **kwargs) builder.requirements = j.requirements for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"])[1] _logger.debug("[job %s] command line bindings is %s", id(j), json.dumps(builder.bindings, indent=4)) _logger.debug( "[job %s] path mappings is %s", id(j), json.dumps( { p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files() }, indent=4)) dockerReq, _ = self.get_requirement("DockerRequirement") if dockerReq and kwargs.get("use_container"): out_prefix = kwargs.get("tmp_outdir_prefix") j.outdir = kwargs.get("outdir") or tempfile.mkdtemp( prefix=out_prefix) tmpdir_prefix = kwargs.get('tmpdir_prefix') j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp( prefix=tmpdir_prefix) else: j.outdir = builder.outdir j.tmpdir = builder.tmpdir createFiles, _ = self.get_requirement("CreateFileRequirement") j.generatefiles = {} if createFiles: for t in createFiles["fileDef"]: j.generatefiles[builder.do_eval( t["filename"])] = copy.deepcopy( builder.do_eval(t["fileContent"])) j.environment = {} evr, _ = self.get_requirement("EnvVarRequirement") if evr: for t in evr["envDef"]: j.environment[t["envName"]] = builder.do_eval(t["envValue"]) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) j.output_callback = output_callback yield j
def __init__(self, toolpath_object, validateAs, do_validate=True, **kwargs): (_, self.names) = get_schema() self.tool = toolpath_object if do_validate: try: # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool, strict=kwargs.get("strict")) except validate.ValidationException as v: raise validate.ValidationException( "Could not validate %s as %s:\n%s" % (self.tool.get("id"), validateAs, validate.indent(str(v)))) self.requirements = kwargs.get("requirements", []) + self.tool.get( "requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) self.validate_hints(self.tool.get("hints", []), strict=kwargs.get("strict")) self.schemaDefs = {} sd, _ = self.get_requirement("SchemaDefRequirement") if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = { "name": "input_record_schema", "type": "record", "fields": [] } for i in self.tool["inputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = { "name": "outputs_record_schema", "type": "record", "fields": [] } for i in self.tool["outputs"]: c = copy.copy(i) doc_url, fragment = urlparse.urldefrag(c['id']) c["name"] = fragment del c["id"] if "type" not in c: raise validate.ValidationException( "Missing `type` in parameter `%s`" % c["name"]) if "default" in c: c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)