def _init_job(self, joborder, basedir, **kwargs): # Validate job order try: validate.validate_ex( self.names.get_name("input_record_schema", ""), joborder) except validate.ValidationException as v: _logger.error("Failed to validate %s\n%s" % (pprint.pformat(joborder), v)) raise for r in self.tool.get("requirements", []): if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) self.requirements = kwargs.get("requirements", []) + self.tool.get( "requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) builder = Builder() builder.job = copy.deepcopy(joborder) builder.jslib = '' builder.basedir = basedir builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.docpath = self.docpath builder.bindings.extend( builder.bind_input(self.inputs_record_schema, builder.job)) return builder
def __init__(self, toolpath_object, validateAs, docpath): self.names = get_schema() self.docpath = docpath self.tool = toolpath_object # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) self.validate_requirements(self.tool, "requirements") self.validate_requirements(self.tool, "hints") for t in self.tool.get("requirements", []): t["_docpath"] = docpath for t in self.tool.get("hints", []): t["_docpath"] = docpath # Import schema defs self.schemaDefs = { "Any": [ "null", "boolean", "int", "long", "float", "double", "bytes", "string", "File", {"type": "array", "items": "Any"}, {"type": "map", "values": "Any"} ]} sd, _ = get_feature("SchemaDefRequirement", requirements=self.tool.get("requirements"), hints=self.tool.get("hints")) if sd: for i in sd["types"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)
def _init_job(self, joborder, basedir, **kwargs): # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) except validate.ValidationException as e: raise WorkflowException("Error validating input record, " + str(e)) for r in self.tool.get("requirements", []): if r["class"] not in supportedProcessRequirements: raise WorkflowException("Unsupported process requirement %s" % (r["class"])) self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) builder = Builder() builder.job = copy.deepcopy(joborder) builder.jslib = '' builder.basedir = basedir builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs builder.docpath = self.docpath builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) return builder
def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) requirements = kwargs.get("requirements", []) + self.tool.get( "requirements", []) hints = kwargs.get("hints", []) + self.tool.get("hints", []) steps = [ makeTool(step, basedir) for step in self.tool.get("steps", []) ] random.shuffle(steps) self.state = {} for i in self.tool["inputs"]: iid = idk(i["id"]) if iid in joborder: self.state[iid] = WorkflowStateItem( i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[iid] = WorkflowStateItem( i, copy.deepcopy(i["default"])) else: raise WorkflowException( "Input '%s' not in input object and does not have a default value." % (i["id"])) for s in steps: for out in s.tool["outputs"]: self.state[idk(out["id"])] = None s.completed = False completed = 0 while completed < len(steps): made_progress = False completed = 0 for step in steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, requirements=requirements, hints=hints, **kwargs): if newjob: made_progress = True yield newjob if not made_progress and completed < len(steps): yield None wo = {} for i in self.tool["outputs"]: if "connect" in i: src = idk(i["connect"]["source"]) wo[idk(i["id"])] = self.state[src].value output_callback(wo)
def collect_output_ports(self, ports, builder, outdir): custom_output = os.path.join(outdir, "cwl.output.json") if os.path.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex( self.names.get_name("output_record_schema", ""), outputdoc) return outputdoc ret = { port["id"][1:]: self.collect_output(port, builder, outdir) for port in ports } return ret if ret is not None else {}
def collect_output_ports(self, ports, builder, outdir): try: custom_output = os.path.join(outdir, "cwl.output.json") if os.path.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), outputdoc) return outputdoc ret = {port["id"][1:]: self.collect_output(port, builder, outdir) for port in ports} validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: raise WorkflowException("Error validating output record, " + str(e))
def exeval(ex, jobinput, requirements, docpath, context, pull_image): for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[ "engine"]: if r["id"][0] != "#": with open(os.path.join(docpath, r["id"])) as f: ex_obj = yaml.load(f) sch = process.get_schema() validate.validate_ex( sch.get_name("ExpressionEngineRequirement", ""), ex_obj) r = ex_obj runtime = [] img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("expressionDefs", []): if isinstance(exdef, dict) and "ref" in exdef: with open(os.path.join(r["_docpath"], exdef["ref"])) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "expressionDefs": exdefs, "job": jobinput, "context": context } _logger.debug(json.dumps(inp)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException( "Expression engine returned non-zero exit code.") return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def exeval(ex, jobinput, requirements, docpath, context, pull_image): if ex["engine"] == "JsonPointer": return ref_resolver.resolve_pointer({"job": jobinput, "context": context}, ex["script"]) for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: if r["id"][0] != "#": with open(os.path.join(docpath, r["id"])) as f: ex_obj = yaml.load(f) sch = process.get_schema() validate.validate_ex(sch.get_name("ExpressionEngineRequirement", ""), ex_obj) r = ex_obj runtime = [] img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] exdefs = [] for exdef in r.get("expressionDefs", []): if isinstance(exdef, dict) and "ref" in exdef: with open(os.path.join(r["_docpath"], exdef["ref"])) as f: exdefs.append(f.read()) elif isinstance(exdef, basestring): exdefs.append(exdef) inp = { "script": ex["script"], "expressionDefs": exdefs, "job": jobinput, "context": context } _logger.debug(json.dumps(inp)) sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), shell=False, close_fds=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE) (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") if sp.returncode != 0: raise WorkflowException("Expression engine returned non-zero exit code.") return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def validate_requirements(self, tool, field): for r in tool.get(field, []): try: if self.names.get_name(r["class"], "") is None: raise validate.ValidationException("Unknown requirement %s" % (r["class"])) validate.validate_ex(self.names.get_name(r["class"], ""), r) if "requirements" in r: self.validate_requirements(r, "requirements") if "hints" in r: self.validate_requirements(r, "hints") except validate.ValidationException as v: err = "While validating %s %s\n%s" % (field, r["class"], validate.indent(str(v))) if field == "hints": _logger.warn(err) else: raise validate.ValidationException(err)
def validate_doc(schema_names, validate_doc, loader, strict): has_root = False for r in schema_names.names.values(): if r.get_prop("documentRoot"): has_root = True break if not has_root: raise validate.ValidationException( "No document roots defined in the schema") if isinstance(validate_doc, list): pass elif isinstance(validate_doc, dict): validate_doc = [validate_doc] else: raise validate.ValidationException("Document must be dict or list") anyerrors = [] for pos, item in enumerate(validate_doc): errors = [] success = False for r in schema_names.names.values(): if r.get_prop("documentRoot"): try: validate.validate_ex( r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties) success = True break except validate.ValidationException as e: errors.append("Could not validate as `%s` because\n%s" % (r.get_prop("name"), validate.indent(str(e), nolead=False))) if not success: objerr = "Validation error at position %i" % pos for ident in loader.identifiers: if ident in item: objerr = "Validation error in object %s" % (item[ident]) break anyerrors.append("%s\n%s" % (objerr, validate.indent("\n".join(errors)))) if anyerrors: raise validate.ValidationException("\n".join(anyerrors))
def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) hints = kwargs.get("hints", []) + self.tool.get("hints", []) steps = [makeTool(step, basedir) for step in self.tool.get("steps", [])] random.shuffle(steps) self.state = {} for i in self.tool["inputs"]: iid = idk(i["id"]) if iid in joborder: self.state[iid] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[iid] = WorkflowStateItem(i, copy.deepcopy(i["default"])) else: raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"])) for s in steps: for out in s.tool["outputs"]: self.state[idk(out["id"])] = None s.completed = False completed = 0 while completed < len(steps): made_progress = False completed = 0 for step in steps: if step.completed: completed += 1 else: for newjob in self.try_make_job(step, basedir, requirements=requirements, hints=hints, **kwargs): if newjob: made_progress = True yield newjob if not made_progress and completed < len(steps): yield None wo = {} for i in self.tool["outputs"]: if "connect" in i: src = idk(i["connect"]["source"]) wo[idk(i["id"])] = self.state[src].value output_callback(wo)
def validate_doc(schema_names, validate_doc, loader, strict): has_root = False for r in schema_names.names.values(): if r.get_prop("documentRoot"): has_root = True break if not has_root: raise validate.ValidationException("No document roots defined in the schema") if isinstance(validate_doc, list): pass elif isinstance(validate_doc, dict): validate_doc = [validate_doc] else: raise validate.ValidationException("Document must be dict or list") anyerrors = [] for pos, item in enumerate(validate_doc): errors = [] success = False for r in schema_names.names.values(): if r.get_prop("documentRoot"): try: validate.validate_ex(r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties) success = True break except validate.ValidationException as e: errors.append("Could not validate as `%s` because\n%s" % (r.get_prop("name"), validate.indent(str(e), nolead=False))) if not success: objerr = "Validation error at position %i" % pos for ident in loader.identifiers: if ident in item: objerr = "Validation error in object %s" % (item[ident]) break anyerrors.append("%s\n%s" % (objerr, validate.indent("\n".join(errors)))) if anyerrors: raise validate.ValidationException("\n".join(anyerrors))
def __init__(self, toolpath_object, validateAs, docpath): self.names = get_schema() self.docpath = docpath self.tool = toolpath_object #if self.tool.get("@context") != TOOL_CONTEXT_URL: # raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) self.validate_requirements(self.tool, "requirements") self.validate_requirements(self.tool, "hints") for t in self.tool.get("requirements", []): t["_docpath"] = docpath for t in self.tool.get("hints", []): t["_docpath"] = docpath # Import schema defs self.schemaDefs = { "Any": [ "null", "boolean", "int", "long", "float", "double", "bytes", "string", "File", {"type": "array", "items": "Any"}, {"type": "map", "values": "Any"} ]} if self.tool.get("schemaDefs"): for i in self.tool["schemaDefs"]: avro.schema.make_avsc_object(i, self.names) self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] if "default" in c: c["type"] = ["null"] + aslist(c["type"]) self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names)