Пример #1
0
def get_overrides(
        overrides,
        toolid):  # type: (List[Dict[Text, Any]], Text) -> Dict[Text, Any]
    req = {}  # type: Dict[Text, Any]
    if not isinstance(overrides, list):
        raise validate.ValidationException(
            "Expected overrides to be a list, but was %s" % type(overrides))
    for ov in overrides:
        if ov["overrideTarget"] == toolid:
            req.update(ov)
    return req
Пример #2
0
    def run(self, fileStore):
        cwljob = resolve_indirect(self.cwljob)

        if isinstance(self.step.tool["scatter"], basestring):
            scatter = [self.step.tool["scatter"]]
        else:
            scatter = self.step.tool["scatter"]

        scatterMethod = self.step.tool.get("scatterMethod", None)
        if len(scatter) == 1:
            scatterMethod = "dotproduct"
        outputs = []

        self.vfinputs = cwljob

        shortscatter = [shortname(s) for s in scatter]
        cwljob = {k: self.valueFromFunc(k, v) if k not in shortscatter else v
                    for k,v in cwljob.items()}

        if scatterMethod == "dotproduct":
            for i in xrange(0, len(cwljob[shortname(scatter[0])])):
                copyjob = copy.copy(cwljob)
                for sc in scatter:
                    scatter_key = shortname(sc)
                    copyjob[scatter_key] = self.valueFromFunc(scatter_key, cwljob[scatter_key][i])
                (subjob, followOn) = makeJob(self.step.embedded_tool, copyjob, **self.executor_options)
                self.addChild(subjob)
                outputs.append(followOn.rv())
        elif scatterMethod == "nested_crossproduct":
            outputs = self.nested_crossproduct_scatter(cwljob, scatter)
        elif scatterMethod == "flat_crossproduct":
            self.flat_crossproduct_scatter(cwljob, scatter, outputs)
        else:
            if scatterMethod:
                raise validate.ValidationException(
                    "Unsupported complex scatter type '%s'" % scatterMethod)
            else:
                raise validate.ValidationException(
                    "Must provide scatterMethod to scatter over multiple inputs")

        return outputs
Пример #3
0
    def addLocation(d):
        if "location" not in d:
            if d["class"] == "File" and ("contents" not in d):
                raise validate.ValidationException(
                    "Anonymous file object must have 'contents' and 'basename' fields."
                )
            if d["class"] == "Directory" and ("listing" not in d
                                              or "basename" not in d):
                raise validate.ValidationException(
                    "Anonymous directory object must have 'listing' and 'basename' fields."
                )
            d["location"] = "_:" + Text(uuid.uuid4())
            if "basename" not in d:
                d["basename"] = d["location"][2:]

        parse = urllib.parse.urlparse(d["location"])
        path = parse.path
        # strip trailing slash
        if path.endswith("/"):
            if d["class"] != "Directory":
                raise validate.ValidationException(
                    "location '%s' ends with '/' but is not a Directory" %
                    d["location"])
            path = path.rstrip("/")
            d["location"] = urllib.parse.urlunparse(
                (parse.scheme, parse.netloc, path, parse.params, parse.query,
                 parse.fragment))

        if not d.get("basename"):
            if path.startswith("_:"):
                d["basename"] = Text(path[2:])
            else:
                d["basename"] = Text(
                    os.path.basename(urllib.request.url2pathname(path)))

        if d["class"] == "File":
            nr, ne = os.path.splitext(d["basename"])
            if d.get("nameroot") != nr:
                d["nameroot"] = Text(nr)
            if d.get("nameext") != ne:
                d["nameext"] = Text(ne)
Пример #4
0
def fillInDefaults(inputs, job):
    # type: (List[Dict[str, str]], Dict[str, str]) -> None
    for inp in inputs:
        if shortname(inp["id"]) in job:
            pass
        elif shortname(inp["id"]) not in job and "default" in inp:
            job[shortname(inp["id"])] = copy.copy(inp["default"])
        elif shortname(inp["id"]) not in job and inp["type"][0] == "null":
            pass
        else:
            raise validate.ValidationException("Missing input parameter `%s`" %
                                               shortname(inp["id"]))
Пример #5
0
def fillInDefaults(inputs, job):
    # type: (List[Dict[unicode, unicode]], Dict[unicode, Union[Dict[unicode, Any], List, unicode]]) -> None
    for inp in inputs:
        if shortname(inp[u"id"]) in job:
            pass
        elif shortname(inp[u"id"]) not in job and u"default" in inp:
            job[shortname(inp[u"id"])] = copy.copy(inp[u"default"])
        elif shortname(inp[u"id"]) not in job and inp[u"type"][0] == u"null":
            pass
        else:
            raise validate.ValidationException("Missing input parameter `%s`" %
                                               shortname(inp["id"]))
Пример #6
0
    def __init__(
        self,
        toolpath_object,  # type: Dict[Text, Any]
        loadingContext  # type: LoadingContext
    ):  # type: (...) -> None
        super(Workflow, self).__init__(toolpath_object, loadingContext)
        self.provenance_object = None  # type: Optional[CreateProvProfile]
        if loadingContext.research_obj:
            orcid = loadingContext.orcid
            full_name = loadingContext.cwl_full_name
            self.provenance_object = CreateProvProfile(
                loadingContext.research_obj, full_name, orcid,
                loadingContext.host_provenance, loadingContext.user_provenance)
            self.parent_wf = self.provenance_object
        loadingContext.prov_obj = self.provenance_object
        loadingContext = loadingContext.copy()
        loadingContext.requirements = self.requirements
        loadingContext.hints = self.hints

        self.steps = []  # type: List[WorkflowStep]
        validation_errors = []
        for index, step in enumerate(self.tool.get("steps", [])):
            try:
                self.steps.append(
                    WorkflowStep(step, index, loadingContext,
                                 loadingContext.prov_obj))
            except validate.ValidationException as vexc:
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.exception("Validation failed at")
                validation_errors.append(vexc)

        if validation_errors:
            raise validate.ValidationException("\n".join(
                str(v) for v in validation_errors))

        random.shuffle(self.steps)

        # statically validate data links instead of doing it at runtime.
        workflow_inputs = self.tool["inputs"]
        workflow_outputs = self.tool["outputs"]

        step_inputs = []  # type: List[Any]
        step_outputs = []  # type: List[Any]
        param_to_step = {}  # type: Dict[Text, Dict[Text, Any]]
        for step in self.steps:
            step_inputs.extend(step.tool["inputs"])
            step_outputs.extend(step.tool["outputs"])
            for s in step.tool["inputs"]:
                param_to_step[s["id"]] = step.tool

        if getdefault(loadingContext.do_validate, True):
            static_checker(workflow_inputs, workflow_outputs, step_inputs,
                           step_outputs, param_to_step)
Пример #7
0
def checkversion(doc, metadata, enable_dev):
    # type: (Union[CommentedSeq, CommentedMap], CommentedMap, bool) -> Tuple[Dict[Text, Any], Text]  # pylint: disable=line-too-long
    """Checks the validity of the version of the give CWL document.

    Returns the document and the validated version string.
    """

    cdoc = None  # type: Optional[CommentedMap]
    if isinstance(doc, CommentedSeq):
        lc = metadata.lc
        metadata = copy.deepcopy(metadata)
        metadata.lc.data = copy.copy(lc.data)
        metadata.lc.filename = lc.filename
        metadata[u"$graph"] = doc
        cdoc = metadata
    elif isinstance(doc, CommentedMap):
        cdoc = doc
    else:
        raise Exception("Expected CommentedMap or CommentedSeq")
    assert cdoc is not None

    version = cdoc[u"cwlVersion"]

    if version not in UPDATES:
        if version in DEVUPDATES:
            if enable_dev:
                pass
            else:
                raise validate.ValidationException(
                    u"Version '%s' is a development or deprecated version.\n "
                    "Update your document to a stable version (%s) or use "
                    "--enable-dev to enable support for development and "
                    "deprecated versions." %
                    (version, ", ".join(list(UPDATES.keys()))))
        else:
            raise validate.ValidationException(u"Unrecognized version %s" %
                                               version)

    return (cdoc, version)
Пример #8
0
def fetch_document(argsworkflow):
    # type: (Union[str, unicode, dict[unicode, Any]]) -> Tuple[Loader, Dict[unicode, Any], unicode]
    """Retrieve a CWL document."""
    document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"})

    uri = None  # type: unicode
    workflowobj = None  # type: Dict[unicode, Any]
    if isinstance(argsworkflow, (str, unicode)):
        split = urlparse.urlsplit(argsworkflow)
        if split.scheme:
            uri = argsworkflow
        else:
            uri = "file://" + os.path.abspath(argsworkflow)
        fileuri = urlparse.urldefrag(uri)[0]
        workflowobj = document_loader.fetch(fileuri)
    elif isinstance(argsworkflow, dict):
        workflowobj = argsworkflow
        uri = "#" + str(id(argsworkflow))
    else:
        raise validate.ValidationException(
            "Must be URI or object: '%s'" % argsworkflow)

    return document_loader, workflowobj, uri
Пример #9
0
    def __init__(self, toolpath_object, **kwargs):
        # type: (Dict[Text, Any], **Any) -> None
        super(Workflow, self).__init__(toolpath_object, **kwargs)

        kwargs["requirements"] = self.requirements
        kwargs["hints"] = self.hints

        makeTool = kwargs.get("makeTool")
        self.steps = []  # type: List[WorkflowStep]
        validation_errors = []
        for n, step in enumerate(self.tool.get("steps", [])):
            try:
                self.steps.append(WorkflowStep(step, n, **kwargs))
            except validate.ValidationException as v:
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.exception("Validation failed at")
                validation_errors.append(v)

        if validation_errors:
            raise validate.ValidationException("\n".join(
                str(v) for v in validation_errors))

        random.shuffle(self.steps)

        # statically validate data links instead of doing it at runtime.
        workflow_inputs = self.tool["inputs"]
        workflow_outputs = self.tool["outputs"]

        step_inputs = []  # type: List[Any]
        step_outputs = []  # type: List[Any]
        for step in self.steps:
            step_inputs.extend(step.tool["inputs"])
            step_outputs.extend(step.tool["outputs"])

        static_checker(workflow_inputs, workflow_outputs, step_inputs,
                       step_outputs)
Пример #10
0
    def __init__(self,
                 toolpath_object,      # type: Dict[Text, Any]
                 pos,                  # type: int
                 loadingContext,       # type: LoadingContext
                 parentworkflowProv=None  # type: Optional[CreateProvProfile]
                ):  # type: (...) -> None
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + Text(pos)

        loadingContext = loadingContext.copy()

        loadingContext.requirements = (getdefault(loadingContext.requirements, []) +
                                  toolpath_object.get("requirements", []) +
                                  get_overrides(getdefault(loadingContext.overrides_list, []),
                                                self.id).get("requirements", []))
        loadingContext.hints = getdefault(loadingContext.hints, []) + toolpath_object.get("hints", [])

        try:
            if isinstance(toolpath_object["run"], dict):
                self.embedded_tool = loadingContext.construct_tool_object(toolpath_object["run"], loadingContext)
            else:
                self.embedded_tool = load_tool(
                    toolpath_object["run"], loadingContext)
        except validate.ValidationException as vexc:
            if loadingContext.debug:
                _logger.exception("Validation exception")
            raise WorkflowException(
                u"Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"], validate.indent(str(vexc))))

        validation_errors = []
        self.tool = toolpath_object = copy.deepcopy(toolpath_object)
        bound = set()
        for stepfield, toolfield in (("in", "inputs"), ("out", "outputs")):
            toolpath_object[toolfield] = []
            for index, step_entry in enumerate(toolpath_object[stepfield]):
                if isinstance(step_entry, string_types):
                    param = CommentedMap()  # type: CommentedMap
                    inputid = step_entry
                else:
                    param = CommentedMap(six.iteritems(step_entry))
                    inputid = step_entry["id"]

                shortinputid = shortname(inputid)
                found = False
                for tool_entry in self.embedded_tool.tool[toolfield]:
                    frag = shortname(tool_entry["id"])
                    if frag == shortinputid:
                        #if the case that the step has a default for a parameter,
                        #we do not want the default of the tool to override it
                        step_default = None
                        if "default" in param and "default" in tool_entry:
                            step_default = param["default"]
                        param.update(tool_entry)
                        param["_tool_entry"] = tool_entry
                        if step_default is not None:
                            param["default"] = step_default
                        found = True
                        bound.add(frag)
                        break
                if not found:
                    if stepfield == "in":
                        param["type"] = "Any"
                        param["not_connected"] = True
                    else:
                        validation_errors.append(
                            SourceLine(self.tool["out"], index).makeError(
                                "Workflow step output '%s' does not correspond to"
                                % shortname(step_entry))
                            + "\n" + SourceLine(self.embedded_tool.tool, "outputs").makeError(
                                "  tool output (expected '%s')" % (
                                    "', '".join(
                                        [shortname(tool_entry["id"]) for tool_entry in
                                         self.embedded_tool.tool[toolfield]]))))
                param["id"] = inputid
                param.lc.line = toolpath_object[stepfield].lc.data[index][0]
                param.lc.col = toolpath_object[stepfield].lc.data[index][1]
                param.lc.filename = toolpath_object[stepfield].lc.filename
                toolpath_object[toolfield].append(param)

        missing = []
        for i, tool_entry in enumerate(self.embedded_tool.tool["inputs"]):
            if shortname(tool_entry["id"]) not in bound:
                if "null" not in tool_entry["type"] and "default" not in tool_entry:
                    missing.append(shortname(tool_entry["id"]))

        if missing:
            validation_errors.append(SourceLine(self.tool, "in").makeError(
                "Step is missing required parameter%s '%s'" %
                ("s" if len(missing) > 1 else "", "', '".join(missing))))

        if validation_errors:
            raise validate.ValidationException("\n".join(validation_errors))

        super(WorkflowStep, self).__init__(toolpath_object, loadingContext)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature, _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but "
                    "SubworkflowFeatureRequirement not in requirements")

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement "
                    "not in requirements")

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise validate.ValidationException(
                    "Must specify scatterMethod when scattering over multiple inputs")

            inp_map = {i["id"]: i for i in inputparms}
            for inp in scatter:
                if inp not in inp_map:
                    raise validate.ValidationException(
                        SourceLine(self.tool, "scatter").makeError(
                            "Scatter parameter '%s' does not correspond to "
                            "an input parameter of this step, expecting '%s'"
                            % (shortname(inp), "', '".join(
                                shortname(k) for k in inp_map.keys()))))

                inp_map[inp]["type"] = {"type": "array", "items": inp_map[inp]["type"]}

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for index in range(0, nesting):
                for oparam in outputparms:
                    oparam["type"] = {"type": "array", "items": oparam["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
        self.prov_obj = None  # type: Optional[CreateProvProfile]
        if loadingContext.research_obj:
            self.prov_obj = parentworkflowProv
            if self.embedded_tool.tool["class"] == "Workflow":
                self.parent_wf = self.embedded_tool.parent_wf
            else:
                self.parent_wf = self.prov_obj
Пример #11
0
    def bind_input(self,
                   schema,                   # type: MutableMapping[Text, Any]
                   datum,                    # type: Any
                   discover_secondaryFiles,  # type: bool
                   lead_pos=None,            # type: Optional[Union[int, List[int]]]
                   tail_pos=None,            # type: Optional[List[int]]
                  ):  # type: (...) -> List[MutableMapping[Text, Any]]

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[Text, Text]]
        binding = None  # type: Optional[MutableMapping[Text,Any]]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())
            assert binding is not None

            bp = list(aslist(lead_pos))
            if "position" in binding:
                bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, string_types) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t, MutableMapping) and "name" in t and self.names.has_name(t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                assert avsc is not None
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
                    else:
                        self.bind_input(schema, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if binding is not None\
                    and "inputBinding" not in st\
                    and "type" in st\
                    and st["type"] == "array"\
                    and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles)
            else:
                bindings.extend(self.bind_input(st, datum, lead_pos=lead_pos, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum and datum[f["name"]] is not None:
                        bindings.extend(self.bind_input(f, datum[f["name"]], lead_pos=lead_pos, tail_pos=f["name"], discover_secondaryFiles=discover_secondaryFiles))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding is not None:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema, item, lead_pos=n, tail_pos=tail_pos, discover_secondaryFiles=discover_secondaryFiles))
                binding = None

            def _capture_files(f):
                self.files.append(f)
                return f

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT).decode("utf-8")

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if 'required' in sf:
                            sf_required = self.do_eval(sf['required'], context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(datum["basename"], sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False
                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][d["location"].rindex("/")+1:]
                                if d["basename"] == sfname:
                                    found = True
                            if not found:
                                sf_location = datum["location"][0:datum["location"].rindex("/")+1]+sfname
                                if isinstance(sfname, MutableMapping):
                                    datum["secondaryFiles"].append(sfname)
                                elif discover_secondaryFiles and self.fs_access.exists(sf_location):
                                    datum["secondaryFiles"].append({
                                        "location": sf_location,
                                        "basename": sfname,
                                        "class": "File"})
                                elif sf_required:
                                    raise WorkflowException("Missing required secondary file '%s' from file object: %s" % (
                                        sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        check_format(datum, self.do_eval(schema["format"]),
                                     self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" % (schema["name"], schema["format"], ve))

                visit_class(datum.get("secondaryFiles", []), ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding is not None:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Пример #12
0
    def job(
            self,
            job_order,  # type: MutableMapping[Text, Text]
            output_callbacks,  # type: Callable[[Any, Any], Any]
            runtimeContext  # RuntimeContext
    ):
        # type: (...) -> Generator[Union[JobBase, CallbackJob], None, None]

        require_prefix = ""
        if self.metadata["cwlVersion"] == "v1.0":
            require_prefix = "http://commonwl.org/cwltool#"

        workReuse, _ = self.get_requirement(require_prefix + "WorkReuse")
        enableReuse = workReuse.get("enableReuse", True) if workReuse else True

        jobname = uniquename(runtimeContext.name
                             or shortname(self.tool.get("id", "job")))
        if runtimeContext.cachedir and enableReuse:
            cachecontext = runtimeContext.copy()
            cachecontext.outdir = "/out"
            cachecontext.tmpdir = "/tmp"
            cachecontext.stagedir = "/stage"
            cachebuilder = self._init_job(job_order, cachecontext)
            cachebuilder.pathmapper = PathMapper(cachebuilder.files,
                                                 runtimeContext.basedir,
                                                 cachebuilder.stagedir,
                                                 separateDirs=False)
            _check_adjust = partial(check_adjust, cachebuilder)
            visit_class([cachebuilder.files, cachebuilder.bindings],
                        ("File", "Directory"), _check_adjust)

            cmdline = flatten(
                list(map(cachebuilder.generate_arg, cachebuilder.bindings)))
            docker_req, _ = self.get_requirement("DockerRequirement")
            if docker_req is not None and runtimeContext.use_container:
                dockerimg = docker_req.get("dockerImageId") or docker_req.get(
                    "dockerPull")
            elif runtimeContext.default_container is not None and runtimeContext.use_container:
                dockerimg = runtimeContext.default_container
            else:
                dockerimg = None

            if dockerimg is not None:
                cmdline = ["docker", "run", dockerimg] + cmdline
                # not really run using docker, just for hashing purposes
            keydict = {u"cmdline": cmdline}

            for shortcut in ["stdout", "stderr"]:  # later, add "stdin"
                if shortcut in self.tool:
                    keydict[shortcut] = self.tool[shortcut]

            for location, fobj in cachebuilder.pathmapper.items():
                if fobj.type == "File":
                    checksum = next(
                        (e['checksum'] for e in cachebuilder.files
                         if 'location' in e and e['location'] == location
                         and 'checksum' in e and e['checksum'] != 'sha1$hash'),
                        None)
                    fobj_stat = os.stat(fobj.resolved)
                    if checksum is not None:
                        keydict[fobj.resolved] = [fobj_stat.st_size, checksum]
                    else:
                        keydict[fobj.resolved] = [
                            fobj_stat.st_size,
                            int(fobj_stat.st_mtime * 1000)
                        ]

            interesting = {
                "DockerRequirement", "EnvVarRequirement",
                "CreateFileRequirement", "ShellCommandRequirement"
            }
            for rh in (self.original_requirements, self.original_hints):
                for r in reversed(rh):
                    if r["class"] in interesting and r["class"] not in keydict:
                        keydict[r["class"]] = r

            keydictstr = json_dumps(keydict,
                                    separators=(',', ':'),
                                    sort_keys=True)
            cachekey = hashlib.md5(keydictstr.encode('utf-8')).hexdigest()

            _logger.debug("[job %s] keydictstr is %s -> %s", jobname,
                          keydictstr, cachekey)

            jobcache = os.path.join(runtimeContext.cachedir, cachekey)
            jobcachepending = "{}.{}.pending".format(
                jobcache,
                threading.current_thread().ident)

            if os.path.isdir(jobcache) and not os.path.isfile(jobcachepending):
                if docker_req and runtimeContext.use_container:
                    cachebuilder.outdir = runtimeContext.docker_outdir or random_outdir(
                    )
                else:
                    cachebuilder.outdir = jobcache

                _logger.info("[job %s] Using cached output in %s", jobname,
                             jobcache)
                yield CallbackJob(self, output_callbacks, cachebuilder,
                                  jobcache)
                return
            else:
                _logger.info("[job %s] Output of job will be cached in %s",
                             jobname, jobcache)
                shutil.rmtree(jobcache, True)
                os.makedirs(jobcache)
                runtimeContext = runtimeContext.copy()
                runtimeContext.outdir = jobcache
                open(jobcachepending, "w").close()

                def rm_pending_output_callback(output_callbacks,
                                               jobcachepending, outputs,
                                               processStatus):
                    if processStatus == "success":
                        os.remove(jobcachepending)
                    output_callbacks(outputs, processStatus)

                output_callbacks = partial(rm_pending_output_callback,
                                           output_callbacks, jobcachepending)

        builder = self._init_job(job_order, runtimeContext)

        reffiles = copy.deepcopy(builder.files)

        j = self.make_job_runner(runtimeContext)(builder, builder.job,
                                                 self.make_path_mapper,
                                                 self.requirements, self.hints,
                                                 jobname)
        j.prov_obj = self.prov_obj

        j.successCodes = self.tool.get("successCodes", [])
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes", [])
        j.permanentFailCodes = self.tool.get("permanentFailCodes", [])

        debug = _logger.isEnabledFor(logging.DEBUG)

        if debug:
            _logger.debug(
                u"[job %s] initializing from %s%s", j.name,
                self.tool.get("id", ""), u" as part of %s" %
                runtimeContext.part_of if runtimeContext.part_of else "")
            _logger.debug(u"[job %s] %s", j.name,
                          json_dumps(job_order, indent=4))

        builder.pathmapper = self.make_path_mapper(reffiles, builder.stagedir,
                                                   runtimeContext, True)
        builder.requirements = j.requirements

        _check_adjust = partial(check_adjust, builder)

        visit_class([builder.files, builder.bindings], ("File", "Directory"),
                    _check_adjust)

        initialWorkdir, _ = self.get_requirement("InitialWorkDirRequirement")
        if initialWorkdir is not None:
            ls = []  # type: List[Dict[Text, Any]]
            if isinstance(initialWorkdir["listing"], string_types):
                ls = builder.do_eval(initialWorkdir["listing"])
            else:
                for t in initialWorkdir["listing"]:
                    if "entry" in t:
                        et = {
                            u"entry":
                            builder.do_eval(t["entry"], strip_whitespace=False)
                        }
                        if "entryname" in t:
                            et["entryname"] = builder.do_eval(t["entryname"])
                        else:
                            et["entryname"] = None
                        et["writable"] = t.get("writable", False)
                        ls.append(et)
                    else:
                        ls.append(builder.do_eval(t))
            for i, t in enumerate(ls):
                if "entry" in t:
                    if isinstance(t["entry"], string_types):
                        ls[i] = {
                            "class": "File",
                            "basename": t["entryname"],
                            "contents": t["entry"],
                            "writable": t.get("writable")
                        }
                    else:
                        if t.get("entryname") or t.get("writable"):
                            t = copy.deepcopy(t)
                            if t.get("entryname"):
                                t["entry"]["basename"] = t["entryname"]
                            t["entry"]["writable"] = t.get("writable")
                        ls[i] = t["entry"]
            j.generatefiles["listing"] = ls
            for l in ls:
                self.updatePathmap(builder.outdir, builder.pathmapper, l)
            visit_class([builder.files, builder.bindings],
                        ("File", "Directory"), _check_adjust)

        if debug:
            _logger.debug(
                u"[job %s] path mappings is %s", j.name,
                json_dumps(
                    {
                        p: builder.pathmapper.mapper(p)
                        for p in builder.pathmapper.files()
                    },
                    indent=4))

        if self.tool.get("stdin"):
            with SourceLine(self.tool, "stdin", validate.ValidationException,
                            debug):
                j.stdin = builder.do_eval(self.tool["stdin"])
                assert j.stdin is not None
                reffiles.append({"class": "File", "path": j.stdin})

        if self.tool.get("stderr"):
            with SourceLine(self.tool, "stderr", validate.ValidationException,
                            debug):
                j.stderr = builder.do_eval(self.tool["stderr"])
                assert j.stderr is not None
                if os.path.isabs(j.stderr) or ".." in j.stderr:
                    raise validate.ValidationException(
                        "stderr must be a relative path, got '%s'" % j.stderr)

        if self.tool.get("stdout"):
            with SourceLine(self.tool, "stdout", validate.ValidationException,
                            debug):
                j.stdout = builder.do_eval(self.tool["stdout"])
                assert j.stdout is not None
                if os.path.isabs(j.stdout) or ".." in j.stdout or not j.stdout:
                    raise validate.ValidationException(
                        "stdout must be a relative path, got '%s'" % j.stdout)

        if debug:
            _logger.debug(u"[job %s] command line bindings is %s", j.name,
                          json_dumps(builder.bindings, indent=4))
        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq is not None and runtimeContext.use_container:
            out_prefix = getdefault(runtimeContext.tmp_outdir_prefix, 'tmp')
            j.outdir = runtimeContext.outdir or \
                tempfile.mkdtemp(prefix=out_prefix)  # type: ignore
            tmpdir_prefix = getdefault(runtimeContext.tmpdir_prefix, 'tmp')
            j.tmpdir = runtimeContext.tmpdir or \
                tempfile.mkdtemp(prefix=tmpdir_prefix)  # type: ignore
            j.stagedir = tempfile.mkdtemp(prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir
            j.stagedir = builder.stagedir

        inplaceUpdateReq, _ = self.get_requirement(
            "http://commonwl.org/cwltool#InplaceUpdateRequirement")

        if inplaceUpdateReq is not None:
            j.inplace_update = inplaceUpdateReq["inplaceUpdate"]
        normalizeFilesDirs(j.generatefiles)

        readers = {}  # type: Dict[Text, Any]
        muts = set()  # type: Set[Text]

        if builder.mutation_manager is not None:

            def register_mut(f):
                muts.add(f["location"])
                builder.mutation_manager.register_mutation(j.name, f)

            def register_reader(f):
                if f["location"] not in muts:
                    builder.mutation_manager.register_reader(j.name, f)
                    readers[f["location"]] = copy.deepcopy(f)

            for li in j.generatefiles["listing"]:
                li = cast(Dict[Text, Any], li)
                if li.get("writable") and j.inplace_update:
                    adjustFileObjs(li, register_mut)
                    adjustDirObjs(li, register_mut)
                else:
                    adjustFileObjs(li, register_reader)
                    adjustDirObjs(li, register_reader)

            adjustFileObjs(builder.files, register_reader)
            adjustFileObjs(builder.bindings, register_reader)
            adjustDirObjs(builder.files, register_reader)
            adjustDirObjs(builder.bindings, register_reader)

        timelimit, _ = self.get_requirement(require_prefix + "TimeLimit")
        if timelimit is not None:
            with SourceLine(timelimit, "timelimit",
                            validate.ValidationException, debug):
                j.timelimit = builder.do_eval(timelimit["timelimit"])
                if not isinstance(j.timelimit, int) or j.timelimit < 0:
                    raise Exception(
                        "timelimit must be an integer >= 0, got: %s" %
                        j.timelimit)

        if self.metadata["cwlVersion"] == "v1.0":
            j.networkaccess = True
        networkaccess, _ = self.get_requirement(require_prefix +
                                                "NetworkAccess")
        if networkaccess is not None:
            with SourceLine(networkaccess, "networkAccess",
                            validate.ValidationException, debug):
                j.networkaccess = builder.do_eval(
                    networkaccess["networkAccess"])
                if not isinstance(j.networkaccess, bool):
                    raise Exception(
                        "networkAccess must be a boolean, got: %s" %
                        j.networkaccess)

        j.environment = {}
        evr, _ = self.get_requirement("EnvVarRequirement")
        if evr is not None:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        shellcmd, _ = self.get_requirement("ShellCommandRequirement")
        if shellcmd is not None:
            cmd = []  # type: List[Text]
            for b in builder.bindings:
                arg = builder.generate_arg(b)
                if b.get("shellQuote", True):
                    arg = [shellescape.quote(a) for a in aslist(arg)]
                cmd.extend(aslist(arg))
            j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
        else:
            j.command_line = flatten(
                list(map(builder.generate_arg, builder.bindings)))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = partial(self.collect_output_ports,
                                    self.tool["outputs"],
                                    builder,
                                    compute_checksum=getdefault(
                                        runtimeContext.compute_checksum, True),
                                    jobname=jobname,
                                    readers=readers)
        j.output_callback = output_callbacks

        yield j
Пример #13
0
    def __init__(self, toolpath_object, **kwargs):
        (_, self.names, _) = get_schema()
        self.tool = toolpath_object
        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])
        if "loader" in kwargs:
            self.formatgraph = kwargs["loader"].graph

        self.validate_hints(self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            sdtypes = sd["types"]
            av = schema_salad.schema.make_valid_avro(
                sdtypes, {t["name"]: t
                          for t in sdtypes}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i
            avro.schema.make_avsc_object(av, self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.copy(i)
                doc_url, _ = urlparse.urldefrag(c['id'])
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException(
                        "Missing `type` in parameter `%s`" % c["name"])

                if "default" in c and "null" not in aslist(c["type"]):
                    c["type"] = ["null"] + aslist(c["type"])
                else:
                    c["type"] = c["type"]

                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        try:
            self.inputs_record_schema = schema_salad.schema.make_valid_avro(
                self.inputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException(
                "Got error `%s` while prcoessing inputs of %s:\n%s" %
                (str(e), self.tool["id"],
                 json.dumps(self.inputs_record_schema, indent=4)))

        try:
            self.outputs_record_schema = schema_salad.schema.make_valid_avro(
                self.outputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.outputs_record_schema,
                                         self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException(
                "Got error `%s` while prcoessing outputs of %s:\n%s" %
                (str(e), self.tool["id"],
                 json.dumps(self.outputs_record_schema, indent=4)))
Пример #14
0
    def run(self, fileStore):
        cwljob = resolve_indirect(self.cwljob)

        if isinstance(self.step.tool["scatter"], string_types):
            scatter = [self.step.tool["scatter"]]
        else:
            scatter = self.step.tool["scatter"]

        scatterMethod = self.step.tool.get("scatterMethod", None)
        if len(scatter) == 1:
            scatterMethod = "dotproduct"
        outputs = []

        valueFrom = {
            shortname(i["id"]): i["valueFrom"]
            for i in self.step.tool["inputs"] if "valueFrom" in i
        }

        def postScatterEval(io):
            shortio = {shortname(k): v for k, v in iteritems(io)}
            for k in valueFrom:
                io.setdefault(k, None)

            def valueFromFunc(k, v):
                if k in valueFrom:
                    return cwltool.expression.do_eval(valueFrom[k],
                                                      shortio,
                                                      self.step.requirements,
                                                      None,
                                                      None, {},
                                                      context=v)
                else:
                    return v

            return {k: valueFromFunc(k, v) for k, v in list(io.items())}

        if scatterMethod == "dotproduct":
            for i in range(0, len(cwljob[shortname(scatter[0])])):
                copyjob = copy.copy(cwljob)
                for sc in [shortname(x) for x in scatter]:
                    copyjob[sc] = cwljob[sc][i]
                copyjob = postScatterEval(copyjob)
                (subjob, followOn) = makeJob(self.step.embedded_tool, copyjob,
                                             **self.executor_options)
                self.addChild(subjob)
                outputs.append(followOn.rv())
        elif scatterMethod == "nested_crossproduct":
            outputs = self.nested_crossproduct_scatter(cwljob, scatter,
                                                       postScatterEval)
        elif scatterMethod == "flat_crossproduct":
            self.flat_crossproduct_scatter(cwljob, scatter, outputs,
                                           postScatterEval)
        else:
            if scatterMethod:
                raise validate.ValidationException(
                    "Unsupported complex scatter type '%s'" % scatterMethod)
            else:
                raise validate.ValidationException(
                    "Must provide scatterMethod to scatter over multiple inputs"
                )

        return outputs
Пример #15
0
    def bind_input(
        self,
        schema: MutableMapping[str, Any],
        datum: Any,
        discover_secondaryFiles: bool,
        lead_pos: Optional[Union[int, List[int]]] = None,
        tail_pos: Optional[List[int]] = None,
    ) -> List[MutableMapping[str, Any]]:

        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []

        bindings = []  # type: List[MutableMapping[str, str]]
        binding = {}  # type: Union[MutableMapping[str, str], CommentedMap]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   MutableMapping):
            binding = CommentedMap(schema["inputBinding"].items())

            bp = list(aslist(lead_pos))
            if "position" in binding:
                position = binding["position"]
                if isinstance(position,
                              str):  # no need to test the CWL Version
                    # the schema for v1.0 only allow ints
                    binding["position"] = self.do_eval(position, context=datum)
                    bp.append(binding["position"])
                else:
                    bp.extend(aslist(binding["position"]))
            else:
                bp.append(0)
            bp.extend(aslist(tail_pos))
            binding["position"] = bp

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], MutableSequence):
            bound_input = False
            for t in schema["type"]:
                avsc = None  # type: Optional[Schema]
                if isinstance(t, str) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif (isinstance(t, MutableMapping) and "name" in t
                      and self.names.has_name(t["name"], "")):
                    avsc = self.names.get_name(t["name"], "")
                if not avsc:
                    avsc = make_avsc_object(convert_to_dict(t), self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        )
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(
                    "'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], MutableMapping):
            st = copy.deepcopy(schema["type"])
            if (binding and "inputBinding" not in st and "type" in st
                    and st["type"] == "array"
                    and "itemSeparator" not in binding):
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles,
                )
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles,
                    ))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum and datum[f["name"]] is not None:
                        bindings.extend(
                            self.bind_input(
                                f,
                                datum[f["name"]],
                                lead_pos=lead_pos,
                                tail_pos=f["name"],
                                discover_secondaryFiles=discover_secondaryFiles,
                            ))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {"type": schema["items"], "inputBinding": b2}
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles,
                        ))
                binding = {}

            def _capture_files(f):  # type: (Dict[str, str]) -> Dict[str, str]
                self.files.append(f)
                return f

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")
                    ) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = content_limit_respected_read(f)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if "required" in sf:
                            sf_required = self.do_eval(sf["required"],
                                                       context=datum)
                        else:
                            sf_required = True

                        if "$(" in sf["pattern"] or "${" in sf["pattern"]:
                            sfpath = self.do_eval(sf["pattern"], context=datum)
                        else:
                            sfpath = substitute(datum["basename"],
                                                sf["pattern"])

                        for sfname in aslist(sfpath):
                            if not sfname:
                                continue
                            found = False

                            if isinstance(sfname, str):
                                sf_location = (
                                    datum["location"]
                                    [0:datum["location"].rindex("/") + 1] +
                                    sfname)
                                sfbasename = sfname
                            elif isinstance(sfname, MutableMapping):
                                sf_location = sfname["location"]
                                sfbasename = sfname["basename"]
                            else:
                                raise WorkflowException(
                                    "Expected secondaryFile expression to return type 'str' or 'MutableMapping', received '%s'"
                                    % (type(sfname)))

                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1:]
                                if d["basename"] == sfbasename:
                                    found = True

                            if not found:

                                def addsf(
                                    files: MutableSequence[MutableMapping[
                                        str, Any]],
                                    newsf: MutableMapping[str, Any],
                                ) -> None:
                                    for f in files:
                                        if f["location"] == newsf["location"]:
                                            f["basename"] = newsf["basename"]
                                            return
                                    files.append(newsf)

                                if isinstance(sfname, MutableMapping):
                                    addsf(datum["secondaryFiles"], sfname)
                                elif discover_secondaryFiles and self.fs_access.exists(
                                        sf_location):
                                    addsf(
                                        datum["secondaryFiles"],
                                        {
                                            "location": sf_location,
                                            "basename": sfname,
                                            "class": "File",
                                        },
                                    )
                                elif sf_required:
                                    raise WorkflowException(
                                        "Missing required secondary file '%s' from file object: %s"
                                        %
                                        (sfname, json_dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        check_format(datum, self.do_eval(schema["format"]),
                                     self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n "
                            " %s" %
                            (schema["name"], schema["format"], ve)) from ve

                visit_class(
                    datum.get("secondaryFiles", []),
                    ("File", "Directory"),
                    _capture_files,
                )

            if schema["type"] == "Directory":
                ll = schema.get("loadListing") or self.loadListing
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

            if schema["type"] == "Any":
                visit_class(datum, ("File", "Directory"), _capture_files)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Пример #16
0
    def __init__(self, toolpath_object: MutableMapping[str, Any],
                 loadingContext: LoadingContext) -> None:
        """Build a Process object from the provided dictionary."""
        super(Process, self).__init__()
        self.metadata = getdefault(loadingContext.metadata,
                                   {})  # type: Dict[str,Any]
        self.provenance_object = None  # type: Optional[ProvenanceProfile]
        self.parent_wf = None  # type: Optional[ProvenanceProfile]
        global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY  # pylint: disable=global-statement
        if SCHEMA_FILE is None or SCHEMA_ANY is None or SCHEMA_DIR is None:
            get_schema("v1.0")
            SCHEMA_ANY = cast(
                Dict[str, Any],
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"],
            )
            SCHEMA_FILE = cast(
                Dict[str, Any],
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"],
            )
            SCHEMA_DIR = cast(
                Dict[str, Any],
                SCHEMA_CACHE["v1.0"]
                [3].idx["https://w3id.org/cwl/cwl#Directory"],
            )

        self.names = schema.make_avro_schema(
            [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY], Loader({}))
        self.tool = toolpath_object
        self.requirements = copy.deepcopy(
            getdefault(loadingContext.requirements, []))
        self.requirements.extend(self.tool.get("requirements", []))
        if "id" not in self.tool:
            self.tool["id"] = "_:" + str(uuid.uuid4())
        self.requirements.extend(
            get_overrides(getdefault(loadingContext.overrides_list, []),
                          self.tool["id"]).get("requirements", []))
        self.hints = copy.deepcopy(getdefault(loadingContext.hints, []))
        self.hints.extend(self.tool.get("hints", []))
        # Versions of requirements and hints which aren't mutated.
        self.original_requirements = copy.deepcopy(self.requirements)
        self.original_hints = copy.deepcopy(self.hints)
        self.doc_loader = loadingContext.loader
        self.doc_schema = loadingContext.avsc_names

        self.formatgraph = None  # type: Optional[Graph]
        if self.doc_loader is not None:
            self.formatgraph = self.doc_loader.graph

        checkRequirements(self.tool, supportedProcessRequirements)
        self.validate_hints(
            loadingContext.avsc_names,
            self.tool.get("hints", []),
            strict=getdefault(loadingContext.strict, False),
        )

        self.schemaDefs = {}  # type: Dict[str,Dict[str, Any]]

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd is not None:
            sdtypes = avroize_type(sd["types"])
            av = schema.make_valid_avro(sdtypes,
                                        {t["name"]: t
                                         for t in sdtypes}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i  # type: ignore
            schema.make_avsc_object(schema.convert_to_dict(av), self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": [],
        }  # type: Dict[str, Any]
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": [],
        }  # type: Dict[str, Any]

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.deepcopy(i)
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException(
                        "Missing 'type' in parameter '{}'".format(c["name"]))

                if "default" in c and "null" not in aslist(c["type"]):
                    nullable = ["null"]
                    nullable.extend(aslist(c["type"]))
                    c["type"] = nullable
                else:
                    c["type"] = c["type"]
                c["type"] = avroize_type(c["type"], c["name"])
                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        with SourceLine(toolpath_object, "inputs",
                        validate.ValidationException):
            self.inputs_record_schema = cast(
                Dict[str, Any],
                schema.make_valid_avro(self.inputs_record_schema, {}, set()),
            )
            schema.make_avsc_object(
                schema.convert_to_dict(self.inputs_record_schema), self.names)
        with SourceLine(toolpath_object, "outputs",
                        validate.ValidationException):
            self.outputs_record_schema = cast(
                Dict[str, Any],
                schema.make_valid_avro(self.outputs_record_schema, {}, set()),
            )
            schema.make_avsc_object(
                schema.convert_to_dict(self.outputs_record_schema), self.names)

        if toolpath_object.get("class") is not None and not getdefault(
                loadingContext.disable_js_validation, False):
            if loadingContext.js_hint_options_file is not None:
                try:
                    with open(loadingContext.js_hint_options_file
                              ) as options_file:
                        validate_js_options = json.load(options_file)
                except (OSError, ValueError) as err:
                    _logger.error(
                        "Failed to read options file %s",
                        loadingContext.js_hint_options_file,
                    )
                    raise
            else:
                validate_js_options = None
            if self.doc_schema is not None:
                validate_js_expressions(
                    cast(CommentedMap, toolpath_object),
                    self.doc_schema.names[toolpath_object["class"]],
                    validate_js_options,
                )

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if (dockerReq is not None and "dockerOutputDirectory" in dockerReq
                and is_req is not None and not is_req):
            _logger.warning(
                SourceLine(item=dockerReq, raise_type=str).makeError(
                    "When 'dockerOutputDirectory' is declared, DockerRequirement "
                    "should go in the 'requirements' section, not 'hints'."
                    ""))

        if (dockerReq is not None and is_req is not None and
                dockerReq.get("dockerOutputDirectory") == "/var/spool/cwl"):
            if is_req:
                # In this specific case, it is legal to have /var/spool/cwl, so skip the check.
                pass
            else:
                # Must be a requirement
                var_spool_cwl_detector(self.tool)
        else:
            var_spool_cwl_detector(self.tool)
Пример #17
0
    def __init__(
        self,
        toolpath_object,  # type: MutableMapping[Text, Any]
        loadingContext  # type: LoadingContext
    ):  # type: (...) -> None
        """Initializet this Workflow."""
        super(Workflow, self).__init__(toolpath_object, loadingContext)
        self.provenance_object = None  # type: Optional[ProvenanceProfile]
        if loadingContext.research_obj is not None:
            run_uuid = None  # type: Optional[UUID]
            is_master = not loadingContext.prov_obj  # Not yet set
            if is_master:
                run_uuid = loadingContext.research_obj.ro_uuid

            self.provenance_object = ProvenanceProfile(
                loadingContext.research_obj,
                full_name=loadingContext.cwl_full_name,
                host_provenance=loadingContext.host_provenance,
                user_provenance=loadingContext.user_provenance,
                orcid=loadingContext.orcid,
                run_uuid=run_uuid,
                fsaccess=loadingContext.research_obj.fsaccess
            )  # inherit RO UUID for master wf run
            # TODO: Is Workflow(..) only called when we are the master workflow?
            self.parent_wf = self.provenance_object

        # FIXME: Won't this overwrite prov_obj for nested workflows?
        loadingContext.prov_obj = self.provenance_object
        loadingContext = loadingContext.copy()
        loadingContext.requirements = self.requirements
        loadingContext.hints = self.hints

        self.steps = []  # type: List[WorkflowStep]
        validation_errors = []
        for index, step in enumerate(self.tool.get("steps", [])):
            try:
                self.steps.append(
                    self.make_workflow_step(step, index, loadingContext,
                                            loadingContext.prov_obj))
            except validate.ValidationException as vexc:
                if _logger.isEnabledFor(logging.DEBUG):
                    _logger.exception("Validation failed at")
                validation_errors.append(vexc)

        if validation_errors:
            raise validate.ValidationException("\n".join(
                str(v) for v in validation_errors))

        random.shuffle(self.steps)

        # statically validate data links instead of doing it at runtime.
        workflow_inputs = self.tool["inputs"]
        workflow_outputs = self.tool["outputs"]

        step_inputs = []  # type: List[Any]
        step_outputs = []  # type: List[Any]
        param_to_step = {}  # type: Dict[Text, Dict[Text, Any]]
        for step in self.steps:
            step_inputs.extend(step.tool["inputs"])
            step_outputs.extend(step.tool["outputs"])
            for s in step.tool["inputs"]:
                param_to_step[s["id"]] = step.tool

        if getdefault(loadingContext.do_validate, True):
            static_checker(workflow_inputs, workflow_outputs, step_inputs,
                           step_outputs, param_to_step)
Пример #18
0
    def run(self, file_store):
        cwljob = resolve_indirect(self.cwljob)

        # `promises` dict
        # from: each parameter (workflow input or step output)
        #   that may be used as a "source" for a step input workflow output
        #   parameter
        # to: the job that will produce that value.
        promises = {}

        # `jobs` dict from step id to job that implements that step.
        jobs = {}

        for inp in self.cwlwf.tool["inputs"]:
            promises[inp["id"]] = SelfJob(self, cwljob)

        alloutputs_fufilled = False
        while not alloutputs_fufilled:
            # Iteratively go over the workflow steps, scheduling jobs as their
            # dependencies can be fufilled by upstream workflow inputs or
            # step outputs.  Loop exits when the workflow outputs
            # are satisfied.

            alloutputs_fufilled = True

            for step in self.cwlwf.steps:
                if step.tool["id"] not in jobs:
                    stepinputs_fufilled = True
                    for inp in step.tool["inputs"]:
                        if "source" in inp:
                            for s in aslist(inp["source"]):
                                if s not in promises:
                                    stepinputs_fufilled = False
                    if stepinputs_fufilled:
                        jobobj = {}

                        for inp in step.tool["inputs"]:
                            key = shortname(inp["id"])
                            if "source" in inp:
                                if inp.get("linkMerge") \
                                        or len(aslist(inp["source"])) > 1:
                                    linkMerge = inp.get(
                                        "linkMerge", "merge_nested")
                                    if linkMerge == "merge_nested":
                                        jobobj[key] = (
                                            MergeInputsNested(
                                                [(shortname(s),
                                                  promises[s].rv())
                                                 for s in aslist(
                                                     inp["source"])]))
                                    elif linkMerge == "merge_flattened":
                                        jobobj[key] = (
                                            MergeInputsFlattened(
                                                [(shortname(s),
                                                  promises[s].rv())
                                                 for s in aslist(
                                                      inp["source"])]))
                                    else:
                                        raise validate.ValidationException(
                                            "Unsupported linkMerge '%s'" %
                                            linkMerge)
                                else:
                                    inpSource = inp["source"]
                                    if isinstance(inpSource, MutableSequence):
                                        # It seems that an input source with a
                                        # '#' in the name will be returned as a
                                        # CommentedSeq list by the yaml parser.
                                        inpSource = str(inpSource[0])
                                    jobobj[key] = (shortname(inpSource),
                                                   promises[inpSource].rv())
                            if "default" in inp:
                                if key in jobobj:
                                    if isinstance(jobobj[key][1], Promise):
                                        d = copy.copy(inp["default"])
                                        jobobj[key] = DefaultWithSource(
                                            d, jobobj[key])
                                    else:
                                        if jobobj[key][1][
                                                jobobj[key][0]] is None:
                                            d = copy.copy(inp["default"])
                                            jobobj[key] = (
                                                "default", {"default": d})
                                else:
                                    d = copy.copy(inp["default"])
                                    jobobj[key] = ("default", {"default": d})

                            if "valueFrom" in inp \
                                    and "scatter" not in step.tool:
                                if key in jobobj:
                                    jobobj[key] = StepValueFrom(
                                        inp["valueFrom"], jobobj[key],
                                        self.cwlwf.requirements)
                                else:
                                    jobobj[key] = StepValueFrom(
                                        inp["valueFrom"], (
                                            "None", {"None": None}),
                                        self.cwlwf.requirements)

                        if "scatter" in step.tool:
                            wfjob = CWLScatter(step, IndirectDict(jobobj),
                                               self.runtime_context)
                            followOn = CWLGather(step, wfjob.rv())
                            wfjob.addFollowOn(followOn)
                        else:
                            (wfjob, followOn) = makeJob(
                                step.embedded_tool, IndirectDict(jobobj),
                                step.tool["inputs"],
                                self.runtime_context)

                        jobs[step.tool["id"]] = followOn

                        connected = False
                        for inp in step.tool["inputs"]:
                            for s in aslist(inp.get("source", [])):
                                if (isinstance(
                                        promises[s], (CWLJobWrapper, CWLGather)
                                              ) and
                                        not promises[s].hasFollowOn(wfjob)):
                                    promises[s].addFollowOn(wfjob)
                                    connected = True
                                if (not isinstance(
                                        promises[s], (CWLJobWrapper, CWLGather)
                                                  ) and
                                        not promises[s].hasChild(wfjob)):
                                    promises[s].addChild(wfjob)
                                    connected = True
                        if not connected:
                            # the workflow step has default inputs only & isn't
                            # connected to other jobs, so add it as child of
                            # this workflow.
                            self.addChild(wfjob)

                        for out in step.tool["outputs"]:
                            promises[out["id"]] = followOn

                for inp in step.tool["inputs"]:
                    for source in aslist(inp.get("source", [])):
                        if source not in promises:
                            alloutputs_fufilled = False

            # may need a test
            for out in self.cwlwf.tool["outputs"]:
                if "source" in out:
                    if out["source"] not in promises:
                        alloutputs_fufilled = False

        outobj = {}
        for out in self.cwlwf.tool["outputs"]:
            key = shortname(out["id"])
            if out.get("linkMerge") or len(aslist(out["outputSource"])) > 1:
                link_merge = out.get("linkMerge", "merge_nested")
                if link_merge == "merge_nested":
                    outobj[key] = (
                        MergeInputsNested(
                            [(shortname(s), promises[s].rv())
                             for s in aslist(out["outputSource"])]))
                elif link_merge == "merge_flattened":
                    outobj[key] = (
                        MergeInputsFlattened([
                            (shortname(s), promises[s].rv())
                            for s in aslist(out["source"])]))
                else:
                    raise validate.ValidationException(
                        "Unsupported linkMerge '{}'".format(link_merge))

            else:
                # A CommentedSeq of length one still appears here rarely -
                # not clear why from the CWL code. When it does, it breaks
                # the execution by causing a non-hashable type exception.
                # We simplify the list into its first (and only) element.
                src = simplify_list(out["outputSource"])
                outobj[key] = (shortname(src), promises[src].rv())

        return IndirectDict(outobj)
Пример #19
0
    def bind_input(self, schema, datum, lead_pos=None, tail_pos=None):
        # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]]
        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []
        bindings = []  # type: List[Dict[Text,Text]]
        binding = None  # type: Dict[Text,Any]
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            binding["datum"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, (str, Text)) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema,
                                           datum,
                                           lead_pos=lead_pos,
                                           tail_pos=tail_pos)
            raise validate.ValidationException(
                u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and st[
                    "type"] == "array" and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            bindings.extend(
                self.bind_input(st,
                                datum,
                                lead_pos=lead_pos,
                                tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(f,
                                            datum[f["name"]],
                                            lead_pos=lead_pos,
                                            tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(itemschema,
                                        item,
                                        lead_pos=n,
                                        tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding:
                    if binding.get("loadContents"):
                        with self.fs_access.open(datum["location"], "rb") as f:
                            datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            secondary_eval = self.do_eval(sf, context=datum)
                            if isinstance(secondary_eval, string_types):
                                sfpath = {
                                    "location": secondary_eval,
                                    "class": "File"
                                }
                            else:
                                sfpath = secondary_eval
                        else:
                            sfpath = {
                                "location": substitute(datum["location"], sf),
                                "class": "File"
                            }
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                    normalizeFilesDirs(datum["secondaryFiles"])

                def _capture_files(f):
                    self.files.append(f)
                    return f

                visit_class(datum.get("secondaryFiles", []),
                            ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = self.loadListing or (binding
                                          and binding.get("loadListing"))
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Пример #20
0
    def bind_input(self,
                   schema,
                   datum,
                   lead_pos=None,
                   tail_pos=None,
                   discover_secondaryFiles=False):
        # type: (Dict[Text, Any], Any, Union[int, List[int]], List[int], bool) -> List[Dict[Text, Any]]
        if tail_pos is None:
            tail_pos = []
        if lead_pos is None:
            lead_pos = []
        bindings = []  # type: List[Dict[Text,Text]]
        binding = None  # type: Dict[Text,Any]
        value_from_expression = False
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            binding["datum"] = datum
            if "valueFrom" in binding:
                value_from_expression = True

        # Handle union types
        if isinstance(schema["type"], list):
            bound_input = False
            for t in schema["type"]:
                if isinstance(t, (str, Text)) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = AvroSchemaFromJSONData(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    if not value_from_expression:
                        return self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles)
                    else:
                        self.bind_input(
                            schema,
                            datum,
                            lead_pos=lead_pos,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles)
                        bound_input = True
            if not bound_input:
                raise validate.ValidationException(
                    u"'%s' is not a valid union %s" % (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and st[
                    "type"] == "array" and "itemSeparator" not in binding:
                st["inputBinding"] = {}
            for k in ("secondaryFiles", "format", "streamable"):
                if k in schema:
                    st[k] = schema[k]
            if value_from_expression:
                self.bind_input(
                    st,
                    datum,
                    lead_pos=lead_pos,
                    tail_pos=tail_pos,
                    discover_secondaryFiles=discover_secondaryFiles)
            else:
                bindings.extend(
                    self.bind_input(
                        st,
                        datum,
                        lead_pos=lead_pos,
                        tail_pos=tail_pos,
                        discover_secondaryFiles=discover_secondaryFiles))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(
                                f,
                                datum[f["name"]],
                                lead_pos=lead_pos,
                                tail_pos=f["name"],
                                discover_secondaryFiles=discover_secondaryFiles
                            ))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["datum"] = item
                    itemschema = {
                        u"type": schema["items"],
                        u"inputBinding": b2
                    }
                    for k in ("secondaryFiles", "format", "streamable"):
                        if k in schema:
                            itemschema[k] = schema[k]
                    bindings.extend(
                        self.bind_input(
                            itemschema,
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos,
                            discover_secondaryFiles=discover_secondaryFiles))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if (binding and binding.get("loadContents")
                    ) or schema.get("loadContents"):
                    with self.fs_access.open(datum["location"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            sfpath = self.do_eval(sf, context=datum)
                        else:
                            sfpath = substitute(datum["basename"], sf)
                        for sfname in aslist(sfpath):
                            found = False
                            for d in datum["secondaryFiles"]:
                                if not d.get("basename"):
                                    d["basename"] = d["location"][
                                        d["location"].rindex("/") + 1:]
                                if d["basename"] == sfname:
                                    found = True
                            if not found:
                                if isinstance(sfname, dict):
                                    datum["secondaryFiles"].append(sfname)
                                elif discover_secondaryFiles:
                                    datum["secondaryFiles"].append({
                                        "location":
                                        datum["location"]
                                        [0:datum["location"].rindex("/") + 1] +
                                        sfname,
                                        "basename":
                                        sfname,
                                        "class":
                                        "File"
                                    })
                                else:
                                    raise WorkflowException(
                                        "Missing required secondary file '%s' from file object: %s"
                                        %
                                        (sfname, json.dumps(datum, indent=4)))

                    normalizeFilesDirs(datum["secondaryFiles"])

                if "format" in schema:
                    try:
                        checkFormat(datum, self.do_eval(schema["format"]),
                                    self.formatgraph)
                    except validate.ValidationException as ve:
                        raise WorkflowException(
                            "Expected value of '%s' to have format %s but\n  %s"
                            % (schema["name"], schema["format"], ve))

                def _capture_files(f):
                    self.files.append(f)
                    return f

                visit_class(datum.get("secondaryFiles", []),
                            ("File", "Directory"), _capture_files)

            if schema["type"] == "Directory":
                ll = self.loadListing or (binding
                                          and binding.get("loadListing"))
                if ll and ll != "no_listing":
                    get_listing(self.fs_access, datum, (ll == "deep_listing"))
                self.files.append(datum)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Пример #21
0
def static_checker(workflow_inputs, workflow_outputs, step_inputs,
                   step_outputs, param_to_step):
    # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], Dict[Text, Dict[Text, Any]]) -> None
    """Check if all source and sink types of a workflow are compatible before run time."""
    # source parameters: workflow_inputs and step_outputs
    # sink parameters: step_inputs and workflow_outputs

    # make a dictionary of source parameters, indexed by the "id" field
    src_parms = workflow_inputs + step_outputs
    src_dict = {}
    for parm in src_parms:
        src_dict[parm["id"]] = parm

    step_inputs_val = check_all_types(src_dict, step_inputs, "source")
    workflow_outputs_val = check_all_types(src_dict, workflow_outputs,
                                           "outputSource")

    warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
    exceptions = step_inputs_val["exception"] + workflow_outputs_val[
        "exception"]

    warning_msgs = []
    exception_msgs = []
    for warning in warnings:
        src = warning.src
        sink = warning.sink
        linkMerge = warning.linkMerge
        sinksf = sorted([
            p["pattern"] for p in sink.get("secondaryFiles", [])
            if p.get("required", True)
        ])
        srcsf = sorted([p["pattern"] for p in src.get("secondaryFiles", [])])
        # Every secondaryFile required by the sink, should be declared
        # by the source
        missing = missing_subset(srcsf, sinksf)
        if missing:
            msg1 = "Parameter '%s' requires secondaryFiles %s but" % (
                shortname(sink["id"]), missing)
            msg3 = SourceLine(src, "id").makeError(
                "source '%s' does not provide those secondaryFiles." %
                (shortname(src["id"])))
            msg4 = SourceLine(
                src.get("_tool_entry", src), "secondaryFiles"
            ).makeError(
                "To resolve, add missing secondaryFiles patterns to definition of '%s' or"
                % (shortname(src["id"])))
            msg5 = SourceLine(
                sink.get("_tool_entry", sink), "secondaryFiles"
            ).makeError(
                "mark missing secondaryFiles in definition of '%s' as optional."
                % shortname(sink["id"]))
            msg = SourceLine(sink).makeError(
                "%s\n%s" % (msg1, bullets([msg3, msg4, msg5], "  ")))
        elif sink.get("not_connected"):
            msg = SourceLine(sink, "type").makeError(
                "'%s' is not an input parameter of %s, expected %s" %
                (shortname(
                    sink["id"]), param_to_step[sink["id"]]["run"], ", ".join(
                        shortname(s["id"])
                        for s in param_to_step[sink["id"]]["inputs"]
                        if not s.get("not_connected"))))
        else:
            msg = SourceLine(src, "type").makeError(
                "Source '%s' of type %s may be incompatible"
                % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
                SourceLine(sink, "type").makeError(
                    "  with sink '%s' of type %s"
                    % (shortname(sink["id"]), json_dumps(sink["type"])))
            if linkMerge is not None:
                msg += "\n" + SourceLine(sink).makeError(
                    "  source has linkMerge method %s" % linkMerge)

        warning_msgs.append(msg)
    for exception in exceptions:
        src = exception.src
        sink = exception.sink
        linkMerge = exception.linkMerge
        msg = SourceLine(src, "type").makeError(
            "Source '%s' of type %s is incompatible"
            % (shortname(src["id"]), json_dumps(src["type"]))) + "\n" + \
            SourceLine(sink, "type").makeError(
                "  with sink '%s' of type %s"
                % (shortname(sink["id"]), json_dumps(sink["type"])))
        if linkMerge is not None:
            msg += "\n" + SourceLine(sink).makeError(
                "  source has linkMerge method %s" % linkMerge)
        exception_msgs.append(msg)

    for sink in step_inputs:
        if ('null' != sink["type"] and 'null' not in sink["type"]
                and "source" not in sink and "default" not in sink
                and "valueFrom" not in sink):
            msg = SourceLine(sink).makeError(
                "Required parameter '%s' does not have source, default, or valueFrom expression"
                % shortname(sink["id"]))
            exception_msgs.append(msg)

    all_warning_msg = strip_dup_lineno("\n".join(warning_msgs))
    all_exception_msg = strip_dup_lineno("\n".join(exception_msgs))

    if warnings:
        _logger.warning("Workflow checker warning:\n%s", all_warning_msg)
    if exceptions:
        raise validate.ValidationException(all_exception_msg)
Пример #22
0
    def job(
            self,
            job_order,  # type: Mapping[str, str]
            output_callbacks,  # type: Callable[[Any, Any], Any]
            runtimeContext,  # type: RuntimeContext
    ):
        # type: (...) -> Generator[Union[JobBase, CallbackJob], None, None]

        workReuse, _ = self.get_requirement("WorkReuse")
        enableReuse = workReuse.get("enableReuse", True) if workReuse else True

        jobname = uniquename(runtimeContext.name
                             or shortname(self.tool.get("id", "job")))
        if runtimeContext.cachedir and enableReuse:
            cachecontext = runtimeContext.copy()
            cachecontext.outdir = "/out"
            cachecontext.tmpdir = "/tmp"  # nosec
            cachecontext.stagedir = "/stage"
            cachebuilder = self._init_job(job_order, cachecontext)
            cachebuilder.pathmapper = PathMapper(
                cachebuilder.files,
                runtimeContext.basedir,
                cachebuilder.stagedir,
                separateDirs=False,
            )
            _check_adjust = partial(check_adjust, cachebuilder)
            visit_class(
                [cachebuilder.files, cachebuilder.bindings],
                ("File", "Directory"),
                _check_adjust,
            )

            cmdline = flatten(
                list(map(cachebuilder.generate_arg, cachebuilder.bindings)))
            docker_req, _ = self.get_requirement("DockerRequirement")
            if docker_req is not None and runtimeContext.use_container:
                dockerimg = docker_req.get("dockerImageId") or docker_req.get(
                    "dockerPull")
            elif (runtimeContext.default_container is not None
                  and runtimeContext.use_container):
                dockerimg = runtimeContext.default_container
            else:
                dockerimg = None

            if dockerimg is not None:
                cmdline = ["docker", "run", dockerimg] + cmdline
                # not really run using docker, just for hashing purposes

            keydict = {
                "cmdline": cmdline
            }  # type: Dict[str, Union[Dict[str, Any], List[Any]]]

            for shortcut in ["stdin", "stdout", "stderr"]:
                if shortcut in self.tool:
                    keydict[shortcut] = self.tool[shortcut]

            for location, fobj in cachebuilder.pathmapper.items():
                if fobj.type == "File":
                    checksum = next(
                        (e["checksum"] for e in cachebuilder.files
                         if "location" in e and e["location"] == location
                         and "checksum" in e and e["checksum"] != "sha1$hash"),
                        None,
                    )
                    fobj_stat = os.stat(fobj.resolved)
                    if checksum is not None:
                        keydict[fobj.resolved] = [fobj_stat.st_size, checksum]
                    else:
                        keydict[fobj.resolved] = [
                            fobj_stat.st_size,
                            int(fobj_stat.st_mtime * 1000),
                        ]

            interesting = {
                "DockerRequirement",
                "EnvVarRequirement",
                "InitialWorkDirRequirement",
                "ShellCommandRequirement",
                "NetworkAccess",
            }
            for rh in (self.original_requirements, self.original_hints):
                for r in reversed(rh):
                    if r["class"] in interesting and r["class"] not in keydict:
                        keydict[r["class"]] = r

            keydictstr = json_dumps(keydict,
                                    separators=(",", ":"),
                                    sort_keys=True)
            cachekey = hashlib.md5(
                keydictstr.encode("utf-8")).hexdigest()  # nosec

            _logger.debug("[job %s] keydictstr is %s -> %s", jobname,
                          keydictstr, cachekey)

            jobcache = os.path.join(runtimeContext.cachedir, cachekey)

            # Create a lockfile to manage cache status.
            jobcachepending = "{}.status".format(jobcache)
            jobcachelock = None
            jobstatus = None

            # Opens the file for read/write, or creates an empty file.
            jobcachelock = open(jobcachepending, "a+")

            # get the shared lock to ensure no other process is trying
            # to write to this cache
            shared_file_lock(jobcachelock)
            jobcachelock.seek(0)
            jobstatus = jobcachelock.read()

            if os.path.isdir(jobcache) and jobstatus == "success":
                if docker_req and runtimeContext.use_container:
                    cachebuilder.outdir = (runtimeContext.docker_outdir
                                           or random_outdir())
                else:
                    cachebuilder.outdir = jobcache

                _logger.info("[job %s] Using cached output in %s", jobname,
                             jobcache)
                yield CallbackJob(self, output_callbacks, cachebuilder,
                                  jobcache)
                # we're done with the cache so release lock
                jobcachelock.close()
                return
            else:
                _logger.info("[job %s] Output of job will be cached in %s",
                             jobname, jobcache)

                # turn shared lock into an exclusive lock since we'll
                # be writing the cache directory
                upgrade_lock(jobcachelock)

                shutil.rmtree(jobcache, True)
                os.makedirs(jobcache)
                runtimeContext = runtimeContext.copy()
                runtimeContext.outdir = jobcache

                def update_status_output_callback(
                    output_callbacks: Callable[[List[Dict[str, Any]], str],
                                               None],
                    jobcachelock: IO[Any],
                    outputs: List[Dict[str, Any]],
                    processStatus: str,
                ) -> None:
                    # save status to the lockfile then release the lock
                    jobcachelock.seek(0)
                    jobcachelock.truncate()
                    jobcachelock.write(processStatus)
                    jobcachelock.close()
                    output_callbacks(outputs, processStatus)

                output_callbacks = partial(update_status_output_callback,
                                           output_callbacks, jobcachelock)

        builder = self._init_job(job_order, runtimeContext)

        reffiles = copy.deepcopy(builder.files)

        j = self.make_job_runner(runtimeContext)(
            builder,
            builder.job,
            self.make_path_mapper,
            self.requirements,
            self.hints,
            jobname,
        )
        j.prov_obj = self.prov_obj

        j.successCodes = self.tool.get("successCodes", [])
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes", [])
        j.permanentFailCodes = self.tool.get("permanentFailCodes", [])

        debug = _logger.isEnabledFor(logging.DEBUG)

        if debug:
            _logger.debug(
                "[job %s] initializing from %s%s",
                j.name,
                self.tool.get("id", ""),
                " as part of %s" %
                runtimeContext.part_of if runtimeContext.part_of else "",
            )
            _logger.debug("[job %s] %s", j.name,
                          json_dumps(builder.job, indent=4))

        builder.pathmapper = self.make_path_mapper(reffiles, builder.stagedir,
                                                   runtimeContext, True)
        builder.requirements = j.requirements

        _check_adjust = partial(check_adjust, builder)

        visit_class([builder.files, builder.bindings], ("File", "Directory"),
                    _check_adjust)

        initialWorkdir, _ = self.get_requirement("InitialWorkDirRequirement")
        if initialWorkdir is not None:
            ls = []  # type: List[Dict[str, Any]]
            if isinstance(initialWorkdir["listing"], str):
                ls = builder.do_eval(initialWorkdir["listing"])
            else:
                for t in initialWorkdir["listing"]:
                    if isinstance(t, Mapping) and "entry" in t:
                        entry_exp = builder.do_eval(t["entry"],
                                                    strip_whitespace=False)
                        for entry in aslist(entry_exp):
                            et = {"entry": entry}
                            if "entryname" in t:
                                et["entryname"] = builder.do_eval(
                                    t["entryname"])
                            else:
                                et["entryname"] = None
                            et["writable"] = t.get("writable", False)
                            if et["entry"] is not None:
                                ls.append(et)
                    else:
                        initwd_item = builder.do_eval(t)
                        if not initwd_item:
                            continue
                        if isinstance(initwd_item, MutableSequence):
                            ls.extend(initwd_item)
                        else:
                            ls.append(initwd_item)
            for i, t in enumerate(ls):
                if "entry" in t:
                    if isinstance(t["entry"], str):
                        ls[i] = {
                            "class": "File",
                            "basename": t["entryname"],
                            "contents": t["entry"],
                            "writable": t.get("writable"),
                        }
                    else:
                        if t.get("entryname") or t.get("writable"):
                            t = copy.deepcopy(t)
                            if t.get("entryname"):
                                t["entry"]["basename"] = t["entryname"]
                            t["entry"]["writable"] = t.get("writable")
                        ls[i] = t["entry"]
            j.generatefiles["listing"] = ls
            for l in ls:
                self.updatePathmap(builder.outdir, builder.pathmapper, l)
            visit_class([builder.files, builder.bindings],
                        ("File", "Directory"), _check_adjust)

        if debug:
            _logger.debug(
                "[job %s] path mappings is %s",
                j.name,
                json_dumps(
                    {
                        p: builder.pathmapper.mapper(p)
                        for p in builder.pathmapper.files()
                    },
                    indent=4,
                ),
            )

        if self.tool.get("stdin"):
            with SourceLine(self.tool, "stdin", validate.ValidationException,
                            debug):
                j.stdin = builder.do_eval(self.tool["stdin"])
                if j.stdin:
                    reffiles.append({"class": "File", "path": j.stdin})

        if self.tool.get("stderr"):
            with SourceLine(self.tool, "stderr", validate.ValidationException,
                            debug):
                j.stderr = builder.do_eval(self.tool["stderr"])
                if j.stderr:
                    if os.path.isabs(j.stderr) or ".." in j.stderr:
                        raise validate.ValidationException(
                            "stderr must be a relative path, got '%s'" %
                            j.stderr)

        if self.tool.get("stdout"):
            with SourceLine(self.tool, "stdout", validate.ValidationException,
                            debug):
                j.stdout = builder.do_eval(self.tool["stdout"])
                if j.stdout:
                    if os.path.isabs(
                            j.stdout) or ".." in j.stdout or not j.stdout:
                        raise validate.ValidationException(
                            "stdout must be a relative path, got '%s'" %
                            j.stdout)

        if debug:
            _logger.debug(
                "[job %s] command line bindings is %s",
                j.name,
                json_dumps(builder.bindings, indent=4),
            )
        dockerReq, _ = self.get_requirement("DockerRequirement")
        if dockerReq is not None and runtimeContext.use_container:
            out_dir, out_prefix = os.path.split(
                runtimeContext.tmp_outdir_prefix)
            j.outdir = runtimeContext.outdir or tempfile.mkdtemp(
                prefix=out_prefix, dir=out_dir)
            tmpdir_dir, tmpdir_prefix = os.path.split(
                runtimeContext.tmpdir_prefix)
            j.tmpdir = runtimeContext.tmpdir or tempfile.mkdtemp(
                prefix=tmpdir_prefix, dir=tmpdir_dir)
            j.stagedir = tempfile.mkdtemp(prefix=tmpdir_prefix, dir=tmpdir_dir)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir
            j.stagedir = builder.stagedir

        inplaceUpdateReq, _ = self.get_requirement("InplaceUpdateRequirement")
        if inplaceUpdateReq is not None:
            j.inplace_update = inplaceUpdateReq["inplaceUpdate"]
        normalizeFilesDirs(j.generatefiles)

        readers = {}  # type: Dict[str, Any]
        muts = set()  # type: Set[str]

        if builder.mutation_manager is not None:

            def register_mut(f):  # type: (Dict[str, Any]) -> None
                mm = cast(MutationManager, builder.mutation_manager)
                muts.add(f["location"])
                mm.register_mutation(j.name, f)

            def register_reader(f):  # type: (Dict[str, Any]) -> None
                mm = cast(MutationManager, builder.mutation_manager)
                if f["location"] not in muts:
                    mm.register_reader(j.name, f)
                    readers[f["location"]] = copy.deepcopy(f)

            for li in j.generatefiles["listing"]:
                li = cast(Dict[str, Any], li)
                if li.get("writable") and j.inplace_update:
                    adjustFileObjs(li, register_mut)
                    adjustDirObjs(li, register_mut)
                else:
                    adjustFileObjs(li, register_reader)
                    adjustDirObjs(li, register_reader)

            adjustFileObjs(builder.files, register_reader)
            adjustFileObjs(builder.bindings, register_reader)
            adjustDirObjs(builder.files, register_reader)
            adjustDirObjs(builder.bindings, register_reader)

        timelimit, _ = self.get_requirement("ToolTimeLimit")
        if timelimit is not None:
            with SourceLine(timelimit, "timelimit",
                            validate.ValidationException, debug):
                j.timelimit = builder.do_eval(timelimit["timelimit"])
                if not isinstance(j.timelimit, int) or j.timelimit < 0:
                    raise Exception(
                        "timelimit must be an integer >= 0, got: %s" %
                        j.timelimit)

        networkaccess, _ = self.get_requirement("NetworkAccess")
        if networkaccess is not None:
            with SourceLine(networkaccess, "networkAccess",
                            validate.ValidationException, debug):
                j.networkaccess = builder.do_eval(
                    networkaccess["networkAccess"])
                if not isinstance(j.networkaccess, bool):
                    raise Exception(
                        "networkAccess must be a boolean, got: %s" %
                        j.networkaccess)

        j.environment = {}
        evr, _ = self.get_requirement("EnvVarRequirement")
        if evr is not None:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        shellcmd, _ = self.get_requirement("ShellCommandRequirement")
        if shellcmd is not None:
            cmd = []  # type: List[str]
            for b in builder.bindings:
                arg = builder.generate_arg(b)
                if b.get("shellQuote", True):
                    arg = [shellescape.quote(a) for a in aslist(arg)]
                cmd.extend(aslist(arg))
            j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
        else:
            j.command_line = flatten(
                list(map(builder.generate_arg, builder.bindings)))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = partial(
            self.collect_output_ports,
            self.tool["outputs"],
            builder,
            compute_checksum=getdefault(runtimeContext.compute_checksum, True),
            jobname=jobname,
            readers=readers,
        )
        j.output_callback = output_callbacks

        yield j
Пример #23
0
    def run(self, fileStore):
        cwljob = resolve_indirect(self.cwljob)

        # `promises` dict
        # from: each parameter (workflow input or step output)
        #   that may be used as a "source" for a step input workflow output
        #   parameter
        # to: the job that will produce that value.
        promises = {}

        # `jobs` dict from step id to job that implements that step.
        jobs = {}

        for inp in self.cwlwf.tool["inputs"]:
            promises[inp["id"]] = SelfJob(self, cwljob)

        alloutputs_fufilled = False
        while not alloutputs_fufilled:
            # Iteratively go over the workflow steps, scheduling jobs as their
            # dependencies can be fufilled by upstream workflow inputs or
            # step outputs.  Loop exits when the workflow outputs
            # are satisfied.

            alloutputs_fufilled = True

            for step in self.cwlwf.steps:
                if step.tool["id"] not in jobs:
                    stepinputs_fufilled = True
                    for inp in step.tool["inputs"]:
                        if "source" in inp:
                            for s in aslist(inp["source"]):
                                if s not in promises:
                                    stepinputs_fufilled = False
                    if stepinputs_fufilled:
                        jobobj = {}

                        for inp in step.tool["inputs"]:
                            key = shortname(inp["id"])
                            if "source" in inp:
                                if inp.get("linkMerge") or len(aslist(inp["source"])) > 1:
                                    linkMerge = inp.get("linkMerge", "merge_nested")
                                    if linkMerge == "merge_nested":
                                        jobobj[key] = (
                                            MergeInputsNested([(shortname(s), promises[s].rv())
                                                               for s in aslist(inp["source"])]))
                                    elif linkMerge == "merge_flattened":
                                        jobobj[key] = (
                                            MergeInputsFlattened([(shortname(s), promises[s].rv())
                                                                  for s in aslist(inp["source"])]))
                                    else:
                                        raise validate.ValidationException(
                                            "Unsupported linkMerge '%s'", linkMerge)
                                else:
                                    jobobj[key] = (shortname(inp["source"]),
                                                   promises[inp["source"]].rv())
                            elif "default" in inp:
                                d = copy.copy(inp["default"])
                                jobobj[key] = ("default", {"default": d})

                            if "valueFrom" in inp and "scatter" not in step.tool:
                                if key in jobobj:
                                    jobobj[key] = StepValueFrom(inp["valueFrom"],
                                                                jobobj[key],
                                                                self.cwlwf.requirements)
                                else:
                                    jobobj[key] = StepValueFrom(inp["valueFrom"],
                                                                ("None", {"None": None}),
                                                                self.cwlwf.requirements)

                        if "scatter" in step.tool:
                            wfjob = CWLScatter(step, IndirectDict(jobobj), **self.executor_options)
                            followOn = CWLGather(step, wfjob.rv())
                            wfjob.addFollowOn(followOn)
                        else:
                            (wfjob, followOn) = makeJob(step.embedded_tool, IndirectDict(jobobj),
                                                        step_inputs=step.tool["inputs"],
                                                        **self.executor_options)

                        jobs[step.tool["id"]] = followOn

                        connected = False
                        for inp in step.tool["inputs"]:
                            for s in aslist(inp.get("source", [])):
                                if not promises[s].hasChild(wfjob):
                                    promises[s].addChild(wfjob)
                                    connected = True
                        if not connected:
                            # workflow step has default inputs only, isn't connected to other jobs,
                            # so add it as child of workflow.
                            self.addChild(wfjob)

                        for out in step.tool["outputs"]:
                            promises[out["id"]] = followOn

                for inp in step.tool["inputs"]:
                    for s in aslist(inp.get("source", [])):
                        if s not in promises:
                            alloutputs_fufilled = False

            # may need a test
            for out in self.cwlwf.tool["outputs"]:
                if "source" in out:
                    if out["source"] not in promises:
                        alloutputs_fufilled = False

        outobj = {}
        for out in self.cwlwf.tool["outputs"]:
            outobj[shortname(out["id"])] = (shortname(out["outputSource"]), promises[out["outputSource"]].rv())

        return IndirectDict(outobj)
Пример #24
0
def static_checker(workflow_inputs, workflow_outputs, step_inputs,
                   step_outputs):
    # type: (List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]], List[Dict[Text, Any]]) -> None
    """Check if all source and sink types of a workflow are compatible before run time.
    """

    # source parameters: workflow_inputs and step_outputs
    # sink parameters: step_inputs and workflow_outputs

    # make a dictionary of source parameters, indexed by the "id" field
    src_parms = workflow_inputs + step_outputs
    src_dict = {}
    for parm in src_parms:
        src_dict[parm["id"]] = parm

    step_inputs_val = check_all_types(src_dict, step_inputs, "source")
    workflow_outputs_val = check_all_types(src_dict, workflow_outputs,
                                           "outputSource")

    warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
    exceptions = step_inputs_val["exception"] + workflow_outputs_val[
        "exception"]

    warning_msgs = []
    exception_msgs = []
    for warning in warnings:
        src = warning.src
        sink = warning.sink
        linkMerge = warning.linkMerge
        msg = SourceLine(src, "type").makeError(
            "Source '%s' of type %s is partially incompatible"
            % (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \
            SourceLine(sink, "type").makeError(
            "  with sink '%s' of type %s"
            % (shortname(sink["id"]), json.dumps(sink["type"])))
        if linkMerge:
            msg += "\n" + SourceLine(sink).makeError(
                "  sink has linkMerge method %s" % linkMerge)
        warning_msgs.append(msg)
    for exception in exceptions:
        src = exception.src
        sink = exception.sink
        linkMerge = exception.linkMerge
        msg = SourceLine(src, "type").makeError(
            "Source '%s' of type %s is incompatible"
            % (shortname(src["id"]), json.dumps(src["type"]))) + "\n" + \
            SourceLine(sink, "type").makeError(
            "  with sink '%s' of type %s"
            % (shortname(sink["id"]), json.dumps(sink["type"])))
        if linkMerge:
            msg += "\n" + SourceLine(sink).makeError(
                "  sink has linkMerge method %s" % linkMerge)
        exception_msgs.append(msg)

    for sink in step_inputs:
        if ('null' != sink["type"] and 'null' not in sink["type"]
                and "source" not in sink and "default" not in sink
                and "valueFrom" not in sink):
            msg = SourceLine(sink).makeError(
                "Required parameter '%s' does not have source, default, or valueFrom expression"
                % shortname(sink["id"]))
            exception_msgs.append(msg)

    all_warning_msg = "\n".join(warning_msgs)
    all_exception_msg = "\n".join(exception_msgs)

    if warnings:
        _logger.warn("Workflow checker warning:")
        _logger.warn(all_warning_msg)
    if exceptions:
        raise validate.ValidationException(all_exception_msg)
Пример #25
0
    def __init__(self, toolpath_object, **kwargs):
        # type: (Dict[Text, Any], **Any) -> None
        """
        kwargs:

        metadata: tool document metadata
        requirements: inherited requirements
        hints: inherited hints
        loader: schema_salad.ref_resolver.Loader used to load tool document
        avsc_names: CWL Avro schema object used to validate document
        strict: flag to determine strict validation (fail on unrecognized fields)
        """

        self.metadata = kwargs.get("metadata", {})  # type: Dict[Text,Any]
        self.names = None  # type: avro.schema.Names

        global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY  # pylint: disable=global-statement
        if SCHEMA_FILE is None:
            get_schema("v1.0")
            SCHEMA_ANY = cast(
                Dict[Text, Any],
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"])
            SCHEMA_FILE = cast(
                Dict[Text, Any],
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"])
            SCHEMA_DIR = cast(
                Dict[Text, Any], SCHEMA_CACHE["v1.0"]
                [3].idx["https://w3id.org/cwl/cwl#Directory"])

        names = schema_salad.schema.make_avro_schema(
            [SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY],
            schema_salad.ref_resolver.Loader({}))[0]
        if isinstance(names, avro.schema.SchemaParseException):
            raise names
        else:
            self.names = names
        self.tool = toolpath_object
        self.requirements = kwargs.get("requirements", []) + self.tool.get(
            "requirements", [])
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])
        self.formatgraph = None  # type: Graph
        if "loader" in kwargs:
            self.formatgraph = kwargs["loader"].graph

        self.doc_loader = kwargs["loader"]
        self.doc_schema = kwargs["avsc_names"]

        checkRequirements(self.tool, supportedProcessRequirements)
        self.validate_hints(kwargs["avsc_names"],
                            self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}  # type: Dict[Text,Dict[Text, Any]]

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            sdtypes = sd["types"]
            av = schema_salad.schema.make_valid_avro(
                sdtypes, {t["name"]: t
                          for t in avroize_type(sdtypes)}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i
            avro.schema.make_avsc_object(av, self.names)

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }  # type: Dict[Text, Any]
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }  # type: Dict[Text, Any]

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.copy(i)
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException(
                        u"Missing `type` in parameter `%s`" % c["name"])

                if "default" in c and "null" not in aslist(c["type"]):
                    c["type"] = ["null"] + aslist(c["type"])
                else:
                    c["type"] = c["type"]
                c["type"] = avroize_type(c["type"], c["name"])
                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        try:
            self.inputs_record_schema = schema_salad.schema.make_valid_avro(
                self.inputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.inputs_record_schema, self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException(
                u"Got error `%s` while processing inputs of %s:\n%s" %
                (Text(e), self.tool["id"],
                 json.dumps(self.inputs_record_schema, indent=4)))

        try:
            self.outputs_record_schema = schema_salad.schema.make_valid_avro(
                self.outputs_record_schema, {}, set())
            avro.schema.make_avsc_object(self.outputs_record_schema,
                                         self.names)
        except avro.schema.SchemaParseException as e:
            raise validate.ValidationException(
                u"Got error `%s` while processing outputs of %s:\n%s" %
                (Text(e), self.tool["id"],
                 json.dumps(self.outputs_record_schema, indent=4)))
Пример #26
0
    def job(self, joborder, output_callback, **kwargs):
        # type: (Dict[str,str], str, Callable[..., Any], **Any) -> Generator[Union[CommandLineJob, CallbackJob], None, None]

        jobname = uniquename(kwargs.get("name", shortname(self.tool.get("id", "job"))))

        if kwargs.get("cachedir"):
            cacheargs = kwargs.copy()
            cacheargs["outdir"] = "/out"
            cacheargs["tmpdir"] = "/tmp"
            cachebuilder = self._init_job(joborder, **cacheargs)
            cachebuilder.pathmapper = PathMapper(set((f["path"] for f in cachebuilder.files)),
                                                 kwargs["basedir"])

            cmdline = flatten(map(cachebuilder.generate_arg, cachebuilder.bindings))
            (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
            if docker_req and kwargs.get("use_container") is not False:
                dockerimg = docker_req.get("dockerImageId") or docker_req.get("dockerPull")
                cmdline = ["docker", "run", dockerimg] + cmdline
            keydict = {"cmdline": cmdline}

            for _,f in cachebuilder.pathmapper.items():
                st = os.stat(f[0])
                keydict[f[0]] = [st.st_size, int(st.st_mtime * 1000)]

            interesting = {"DockerRequirement",
                           "EnvVarRequirement",
                           "CreateFileRequirement",
                           "ShellCommandRequirement"}
            for rh in (self.requirements, self.hints):
                for r in reversed(rh):
                    if r["class"] in interesting and r["class"] not in keydict:
                        keydict[r["class"]] = r

            keydictstr = json.dumps(keydict, separators=(',',':'), sort_keys=True)
            cachekey = hashlib.md5(keydictstr).hexdigest()

            _logger.debug("[job %s] keydictstr is %s -> %s", jobname, keydictstr, cachekey)

            jobcache = os.path.join(kwargs["cachedir"], cachekey)
            jobcachepending = jobcache + ".pending"

            if os.path.isdir(jobcache) and not os.path.isfile(jobcachepending):
                if docker_req and kwargs.get("use_container") is not False:
                    cachebuilder.outdir = kwargs.get("docker_outdir") or "/var/spool/cwl"
                else:
                    cachebuilder.outdir = jobcache

                _logger.info("[job %s] Using cached output in %s", jobname, jobcache)
                yield CallbackJob(self, output_callback, cachebuilder, jobcache)
                return
            else:
                _logger.info("[job %s] Output of job will be cached in %s", jobname, jobcache)
                shutil.rmtree(jobcache, True)
                os.makedirs(jobcache)
                kwargs["outdir"] = jobcache
                open(jobcachepending, "w").close()
                def rm_pending_output_callback(output_callback, jobcachepending,
                                               outputs, processStatus):
                    if processStatus == "success":
                        os.remove(jobcachepending)
                    output_callback(outputs, processStatus)
                output_callback = cast(
                        Callable[..., Any],  # known bug in mypy
                        # https://github.com/python/mypy/issues/797
                        partial(rm_pending_output_callback, output_callback,
                            jobcachepending))

        builder = self._init_job(joborder, **kwargs)

        reffiles = set((f["path"] for f in builder.files))

        j = self.makeJobRunner()
        j.builder = builder
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints
        j.name = jobname

        _logger.debug(u"[job %s] initializing from %s%s",
                     j.name,
                     self.tool.get("id", ""),
                     u" as part of %s" % kwargs["part_of"] if "part_of" in kwargs else "")
        _logger.debug(u"[job %s] %s", j.name, json.dumps(joborder, indent=4))


        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = builder.do_eval(self.tool["stdin"])
            reffiles.add(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = builder.do_eval(self.tool["stdout"])
            if os.path.isabs(j.stdout) or ".." in j.stdout:
                raise validate.ValidationException("stdout must be a relative path")

        builder.pathmapper = self.makePathMapper(reffiles, **kwargs)
        builder.requirements = j.requirements

        # map files to assigned path inside a container. We need to also explicitly
        # walk over input as implicit reassignment doesn't reach everything in builder.bindings
        def _check_adjust(f):  # type: (Dict[str,Any]) -> Dict[str,Any]
            if not f.get("containerfs"):
                f["path"] = builder.pathmapper.mapper(f["path"])[1]
                f["containerfs"] = True
            return f

        _logger.debug(u"[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4))

        adjustFileObjs(builder.files, _check_adjust)
        adjustFileObjs(builder.bindings, _check_adjust)

        _logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4))

        dockerReq = self.get_requirement("DockerRequirement")[0]
        if dockerReq and kwargs.get("use_container"):
            out_prefix = kwargs.get("tmp_outdir_prefix")
            j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix)
            tmpdir_prefix = kwargs.get('tmpdir_prefix')
            j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir

        createFiles = self.get_requirement("CreateFileRequirement")[0]
        j.generatefiles = {}
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[builder.do_eval(t["filename"])] = copy.deepcopy(builder.do_eval(t["fileContent"]))

        j.environment = {}
        evr = self.get_requirement("EnvVarRequirement")[0]
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        shellcmd = self.get_requirement("ShellCommandRequirement")[0]
        if shellcmd:
            cmd = []  # type: List[str]
            for b in builder.bindings:
                arg = builder.generate_arg(b)
                if b.get("shellQuote", True):
                    arg = [shellescape.quote(a) for a in aslist(arg)]
                cmd.extend(aslist(arg))
            j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
        else:
            j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = partial(
                self.collect_output_ports, self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j
Пример #27
0
    def job(
            self,
            job_order,  # type: Dict[Text, Text]
            output_callbacks,  # type: Callable[[Any, Any], Any]
            **kwargs  # type: Any
    ):
        # type: (...) -> Generator[Union[CommandLineJob, CallbackJob], None, None]

        jobname = uniquename(
            kwargs.get("name", shortname(self.tool.get("id", "job"))))

        if kwargs.get("cachedir"):
            cacheargs = kwargs.copy()
            cacheargs["outdir"] = "/out"
            cacheargs["tmpdir"] = "/tmp"
            cacheargs["stagedir"] = "/stage"
            cachebuilder = self._init_job(job_order, **cacheargs)
            cachebuilder.pathmapper = PathMapper(cachebuilder.files,
                                                 kwargs["basedir"],
                                                 cachebuilder.stagedir,
                                                 separateDirs=False)
            _check_adjust = partial(check_adjust, cachebuilder)
            adjustFileObjs(cachebuilder.files, _check_adjust)
            adjustFileObjs(cachebuilder.bindings, _check_adjust)
            adjustDirObjs(cachebuilder.files, _check_adjust)
            adjustDirObjs(cachebuilder.bindings, _check_adjust)
            cmdline = flatten(
                map(cachebuilder.generate_arg, cachebuilder.bindings))
            (docker_req,
             docker_is_req) = self.get_requirement("DockerRequirement")
            if docker_req and kwargs.get("use_container") is not False:
                dockerimg = docker_req.get("dockerImageId") or docker_req.get(
                    "dockerPull")
                cmdline = ["docker", "run", dockerimg] + cmdline
            keydict = {u"cmdline": cmdline}

            for _, f in cachebuilder.pathmapper.items():
                if f.type == "File":
                    st = os.stat(f.resolved)
                    keydict[f.resolved] = [st.st_size, int(st.st_mtime * 1000)]

            interesting = {
                "DockerRequirement", "EnvVarRequirement",
                "CreateFileRequirement", "ShellCommandRequirement"
            }
            for rh in (self.requirements, self.hints):
                for r in reversed(rh):
                    if r["class"] in interesting and r["class"] not in keydict:
                        keydict[r["class"]] = r

            keydictstr = json.dumps(keydict,
                                    separators=(',', ':'),
                                    sort_keys=True)
            cachekey = hashlib.md5(keydictstr).hexdigest()

            _logger.debug("[job %s] keydictstr is %s -> %s", jobname,
                          keydictstr, cachekey)

            jobcache = os.path.join(kwargs["cachedir"], cachekey)
            jobcachepending = jobcache + ".pending"

            if os.path.isdir(jobcache) and not os.path.isfile(jobcachepending):
                if docker_req and kwargs.get("use_container") is not False:
                    cachebuilder.outdir = kwargs.get(
                        "docker_outdir") or "/var/spool/cwl"
                else:
                    cachebuilder.outdir = jobcache

                _logger.info("[job %s] Using cached output in %s", jobname,
                             jobcache)
                yield CallbackJob(self, output_callbacks, cachebuilder,
                                  jobcache)
                return
            else:
                _logger.info("[job %s] Output of job will be cached in %s",
                             jobname, jobcache)
                shutil.rmtree(jobcache, True)
                os.makedirs(jobcache)
                kwargs["outdir"] = jobcache
                open(jobcachepending, "w").close()

                def rm_pending_output_callback(output_callbacks,
                                               jobcachepending, outputs,
                                               processStatus):
                    if processStatus == "success":
                        os.remove(jobcachepending)
                    output_callbacks(outputs, processStatus)

                output_callbacks = cast(
                    Callable[..., Any],  # known bug in mypy
                    # https://github.com/python/mypy/issues/797
                    partial(rm_pending_output_callback, output_callbacks,
                            jobcachepending))

        builder = self._init_job(job_order, **kwargs)

        reffiles = copy.deepcopy(builder.files)

        j = self.makeJobRunner()
        j.builder = builder
        j.joborder = builder.job
        j.stdin = None
        j.stderr = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints
        j.name = jobname

        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug(
                u"[job %s] initializing from %s%s", j.name,
                self.tool.get("id", ""), u" as part of %s" %
                kwargs["part_of"] if "part_of" in kwargs else "")
            _logger.debug(u"[job %s] %s", j.name,
                          json.dumps(job_order, indent=4))

        builder.pathmapper = None
        make_path_mapper_kwargs = kwargs
        if "stagedir" in make_path_mapper_kwargs:
            make_path_mapper_kwargs = make_path_mapper_kwargs.copy()
            del make_path_mapper_kwargs["stagedir"]
        builder.pathmapper = self.makePathMapper(reffiles, builder.stagedir,
                                                 **make_path_mapper_kwargs)
        builder.requirements = j.requirements

        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug(
                u"[job %s] path mappings is %s", j.name,
                json.dumps(
                    {
                        p: builder.pathmapper.mapper(p)
                        for p in builder.pathmapper.files()
                    },
                    indent=4))

        _check_adjust = partial(check_adjust, builder)

        adjustFileObjs(builder.files, _check_adjust)
        adjustFileObjs(builder.bindings, _check_adjust)
        adjustDirObjs(builder.files, _check_adjust)
        adjustDirObjs(builder.bindings, _check_adjust)

        if self.tool.get("stdin"):
            with SourceLine(self.tool, "stdin", validate.ValidationException):
                j.stdin = builder.do_eval(self.tool["stdin"])
                reffiles.append({"class": "File", "path": j.stdin})

        if self.tool.get("stderr"):
            with SourceLine(self.tool, "stderr", validate.ValidationException):
                j.stderr = builder.do_eval(self.tool["stderr"])
                if os.path.isabs(j.stderr) or ".." in j.stderr:
                    raise validate.ValidationException(
                        "stderr must be a relative path, got '%s'" % j.stderr)

        if self.tool.get("stdout"):
            with SourceLine(self.tool, "stdout", validate.ValidationException):
                j.stdout = builder.do_eval(self.tool["stdout"])
                if os.path.isabs(j.stdout) or ".." in j.stdout or not j.stdout:
                    raise validate.ValidationException(
                        "stdout must be a relative path, got '%s'" % j.stdout)

        if _logger.isEnabledFor(logging.DEBUG):
            _logger.debug(u"[job %s] command line bindings is %s", j.name,
                          json.dumps(builder.bindings, indent=4))

        dockerReq = self.get_requirement("DockerRequirement")[0]
        if dockerReq and kwargs.get("use_container"):
            out_prefix = kwargs.get("tmp_outdir_prefix")
            j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(
                prefix=out_prefix)
            tmpdir_prefix = kwargs.get('tmpdir_prefix')
            j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(
                prefix=tmpdir_prefix)
            j.stagedir = tempfile.mkdtemp(prefix=tmpdir_prefix)
        else:
            j.outdir = builder.outdir
            j.tmpdir = builder.tmpdir
            j.stagedir = builder.stagedir

        initialWorkdir = self.get_requirement("InitialWorkDirRequirement")[0]
        j.generatefiles = {"class": "Directory", "listing": [], "basename": ""}
        if initialWorkdir:
            ls = []  # type: List[Dict[Text, Any]]
            if isinstance(initialWorkdir["listing"], (str, Text)):
                ls = builder.do_eval(initialWorkdir["listing"])
            else:
                for t in initialWorkdir["listing"]:
                    if "entry" in t:
                        et = {u"entry": builder.do_eval(t["entry"])}
                        if "entryname" in t:
                            et["entryname"] = builder.do_eval(t["entryname"])
                        else:
                            et["entryname"] = None
                        et["writable"] = t.get("writable", False)
                        ls.append(et)
                    else:
                        ls.append(builder.do_eval(t))
            for i, t in enumerate(ls):
                if "entry" in t:
                    if isinstance(t["entry"], basestring):
                        ls[i] = {
                            "class": "File",
                            "basename": t["entryname"],
                            "contents": t["entry"],
                            "writable": t.get("writable")
                        }
                    else:
                        if t["entryname"] or t["writable"]:
                            t = copy.deepcopy(t)
                            if t["entryname"]:
                                t["entry"]["basename"] = t["entryname"]
                            t["entry"]["writable"] = t.get("writable")
                        ls[i] = t["entry"]
            j.generatefiles[u"listing"] = ls

        normalizeFilesDirs(j.generatefiles)

        j.environment = {}
        evr = self.get_requirement("EnvVarRequirement")[0]
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = builder.do_eval(t["envValue"])

        shellcmd = self.get_requirement("ShellCommandRequirement")[0]
        if shellcmd:
            cmd = []  # type: List[Text]
            for b in builder.bindings:
                arg = builder.generate_arg(b)
                if b.get("shellQuote", True):
                    arg = [shellescape.quote(a) for a in aslist(arg)]
                cmd.extend(aslist(arg))
            j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
        else:
            j.command_line = flatten(
                map(builder.generate_arg, builder.bindings))

        j.pathmapper = builder.pathmapper
        j.collect_outputs = partial(self.collect_output_ports,
                                    self.tool["outputs"],
                                    builder,
                                    compute_checksum=kwargs.get(
                                        "compute_checksum", True))
        j.output_callback = output_callbacks

        yield j
Пример #28
0
    def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
        bindings = []
        binding = None
        if "inputBinding" in schema and isinstance(schema["inputBinding"],
                                                   dict):
            binding = copy.copy(schema["inputBinding"])

            if "position" in binding:
                binding["position"] = aslist(lead_pos) + aslist(
                    binding["position"]) + aslist(tail_pos)
            else:
                binding["position"] = aslist(lead_pos) + [0] + aslist(tail_pos)

            if "valueFrom" in binding:
                binding["do_eval"] = binding["valueFrom"]
            binding["valueFrom"] = datum

        # Handle union types
        if isinstance(schema["type"], list):
            for t in schema["type"]:
                if isinstance(t, basestring) and self.names.has_name(t, ""):
                    avsc = self.names.get_name(t, "")
                elif isinstance(t,
                                dict) and "name" in t and self.names.has_name(
                                    t["name"], ""):
                    avsc = self.names.get_name(t["name"], "")
                else:
                    avsc = avro.schema.make_avsc_object(t, self.names)
                if validate.validate(avsc, datum):
                    schema = copy.deepcopy(schema)
                    schema["type"] = t
                    return self.bind_input(schema,
                                           datum,
                                           lead_pos=lead_pos,
                                           tail_pos=tail_pos)
            raise validate.ValidationException("'%s' is not a valid union %s" %
                                               (datum, schema["type"]))
        elif isinstance(schema["type"], dict):
            st = copy.deepcopy(schema["type"])
            if binding and "inputBinding" not in st and "itemSeparator" not in binding and st[
                    "type"] in ("array", "map"):
                st["inputBinding"] = {}
            bindings.extend(
                self.bind_input(st,
                                datum,
                                lead_pos=lead_pos,
                                tail_pos=tail_pos))
        else:
            if schema["type"] in self.schemaDefs:
                schema = self.schemaDefs[schema["type"]]

            if schema["type"] == "record":
                for f in schema["fields"]:
                    if f["name"] in datum:
                        bindings.extend(
                            self.bind_input(f,
                                            datum[f["name"]],
                                            lead_pos=lead_pos,
                                            tail_pos=f["name"]))
                    else:
                        datum[f["name"]] = f.get("default")

            if schema["type"] == "map":
                for n, item in datum.items():
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = [n, item]
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["values"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "array":
                for n, item in enumerate(datum):
                    b2 = None
                    if binding:
                        b2 = copy.deepcopy(binding)
                        b2["valueFrom"] = item
                    bindings.extend(
                        self.bind_input(
                            {
                                "type": schema["items"],
                                "inputBinding": b2
                            },
                            item,
                            lead_pos=n,
                            tail_pos=tail_pos))
                binding = None

            if schema["type"] == "File":
                self.files.append(datum)
                if binding and binding.get("loadContents"):
                    with self.fs_access.open(datum["path"], "rb") as f:
                        datum["contents"] = f.read(CONTENT_LIMIT)

                if "secondaryFiles" in schema:
                    if "secondaryFiles" not in datum:
                        datum["secondaryFiles"] = []
                    for sf in aslist(schema["secondaryFiles"]):
                        if isinstance(sf, dict) or "$(" in sf or "${" in sf:
                            sfpath = self.do_eval(sf, context=datum)
                            if isinstance(sfpath, basestring):
                                sfpath = {"path": sfpath, "class": "File"}
                        else:
                            sfpath = {
                                "path": substitute(datum["path"], sf),
                                "class": "File"
                            }
                        if isinstance(sfpath, list):
                            datum["secondaryFiles"].extend(sfpath)
                        else:
                            datum["secondaryFiles"].append(sfpath)
                for sf in datum.get("secondaryFiles", []):
                    self.files.append(sf)

        # Position to front of the sort key
        if binding:
            for bi in bindings:
                bi["position"] = binding["position"] + bi["position"]
            bindings.append(binding)

        return bindings
Пример #29
0
    def __init__(self, toolpath_object, pos, **kwargs):
        # type: (Dict[Text, Any], int, **Any) -> None
        if "id" in toolpath_object:
            self.id = toolpath_object["id"]
        else:
            self.id = "#step" + Text(pos)

        kwargs["requirements"] = kwargs.get(
            "requirements", []) + toolpath_object.get("requirements", [])
        kwargs["hints"] = kwargs.get("hints", []) + toolpath_object.get(
            "hints", [])

        try:
            if isinstance(toolpath_object["run"], dict):
                self.embedded_tool = kwargs.get("makeTool")(
                    toolpath_object["run"], **kwargs)
            else:
                self.embedded_tool = load_tool(
                    toolpath_object["run"],
                    kwargs.get("makeTool"),
                    kwargs,
                    enable_dev=kwargs.get("enable_dev"),
                    strict=kwargs.get("strict"),
                    fetcher_constructor=kwargs.get("fetcher_constructor"))
        except validate.ValidationException as v:
            raise WorkflowException(
                u"Tool definition %s failed validation:\n%s" %
                (toolpath_object["run"], validate.indent(str(v))))

        validation_errors = []
        self.tool = toolpath_object = copy.deepcopy(toolpath_object)
        bound = set()
        for stepfield, toolfield in (("in", "inputs"), ("out", "outputs")):
            toolpath_object[toolfield] = []
            for n, step_entry in enumerate(toolpath_object[stepfield]):
                if isinstance(step_entry, (str, unicode)):
                    param = CommentedMap()  # type: CommentedMap
                    inputid = step_entry
                else:
                    param = CommentedMap(step_entry.iteritems())
                    inputid = step_entry["id"]

                shortinputid = shortname(inputid)
                found = False
                for tool_entry in self.embedded_tool.tool[toolfield]:
                    frag = shortname(tool_entry["id"])
                    if frag == shortinputid:
                        param.update(tool_entry)  # type: ignore
                        found = True
                        bound.add(frag)
                        break
                if not found:
                    if stepfield == "in":
                        param["type"] = "Any"
                    else:
                        validation_errors.append(
                            SourceLine(self.tool["out"], n).makeError(
                                "Workflow step output '%s' does not correspond to"
                                % shortname(step_entry)) + "\n" +
                            SourceLine(self.embedded_tool.tool, "outputs").
                            makeError("  tool output (expected '%s')" %
                                      ("', '".join([
                                          shortname(tool_entry["id"])
                                          for tool_entry in
                                          self.embedded_tool.tool[toolfield]
                                      ]))))
                param["id"] = inputid
                param.lc.line = toolpath_object[stepfield].lc.data[n][0]
                param.lc.col = toolpath_object[stepfield].lc.data[n][1]
                param.lc.filename = toolpath_object[stepfield].lc.filename
                toolpath_object[toolfield].append(param)

        missing = []
        for i, tool_entry in enumerate(self.embedded_tool.tool["inputs"]):
            if shortname(tool_entry["id"]) not in bound:
                if "null" not in tool_entry[
                        "type"] and "default" not in tool_entry:
                    missing.append(shortname(tool_entry["id"]))

        if missing:
            validation_errors.append(
                SourceLine(self.tool, "in").makeError(
                    "Step is missing required parameter%s '%s'" %
                    ("s" if len(missing) > 1 else "", "', '".join(missing))))

        if validation_errors:
            raise validate.ValidationException("\n".join(validation_errors))

        super(WorkflowStep, self).__init__(toolpath_object, **kwargs)

        if self.embedded_tool.tool["class"] == "Workflow":
            (feature,
             _) = self.get_requirement("SubworkflowFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains embedded workflow but SubworkflowFeatureRequirement not in requirements"
                )

        if "scatter" in self.tool:
            (feature, _) = self.get_requirement("ScatterFeatureRequirement")
            if not feature:
                raise WorkflowException(
                    "Workflow contains scatter but ScatterFeatureRequirement not in requirements"
                )

            inputparms = copy.deepcopy(self.tool["inputs"])
            outputparms = copy.deepcopy(self.tool["outputs"])
            scatter = aslist(self.tool["scatter"])

            method = self.tool.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise validate.ValidationException(
                    "Must specify scatterMethod when scattering over multiple inputs"
                )

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise validate.ValidationException(
                        SourceLine(self.tool, "scatter").makeError(
                            u"Scatter parameter '%s' does not correspond to an input parameter of this "
                            u"step, expecting '%s'" %
                            (shortname(s), "', '".join(
                                shortname(k) for k in inp_map.keys()))))

                inp_map[s]["type"] = {
                    "type": "array",
                    "items": inp_map[s]["type"]
                }

            if self.tool.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for op in outputparms:
                    op["type"] = {"type": "array", "items": op["type"]}
            self.tool["inputs"] = inputparms
            self.tool["outputs"] = outputparms
Пример #30
0
    def __init__(self, toolpath_object, **kwargs):
        # type: (Dict[Text, Any], **Any) -> None
        """
        kwargs:

        metadata: tool document metadata
        requirements: inherited requirements
        hints: inherited hints
        loader: schema_salad.ref_resolver.Loader used to load tool document
        avsc_names: CWL Avro schema object used to validate document
        strict: flag to determine strict validation (fail on unrecognized fields)
        """

        self.metadata = kwargs.get("metadata", {})  # type: Dict[Text,Any]
        self.names = None  # type: schema.Names

        global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY  # pylint: disable=global-statement
        if SCHEMA_FILE is None:
            get_schema("v1.0")
            SCHEMA_ANY = cast(
                Dict[Text, Any],
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/salad#Any"])
            SCHEMA_FILE = cast(
                Dict[Text, Any],
                SCHEMA_CACHE["v1.0"][3].idx["https://w3id.org/cwl/cwl#File"])
            SCHEMA_DIR = cast(
                Dict[Text, Any], SCHEMA_CACHE["v1.0"]
                [3].idx["https://w3id.org/cwl/cwl#Directory"])

        names = schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY],
                                        Loader({}))[0]
        if isinstance(names, schema.SchemaParseException):
            raise names
        else:
            self.names = names
        self.tool = toolpath_object
        self.requirements = (kwargs.get("requirements", []) + self.tool.get(
            "requirements", []) + get_overrides(kwargs.get(
                "overrides", []), self.tool["id"]).get("requirements", []))
        self.hints = kwargs.get("hints", []) + self.tool.get("hints", [])
        self.formatgraph = None  # type: Graph
        if "loader" in kwargs:
            self.formatgraph = kwargs["loader"].graph

        self.doc_loader = kwargs["loader"]
        self.doc_schema = kwargs["avsc_names"]

        checkRequirements(self.tool, supportedProcessRequirements)
        self.validate_hints(kwargs["avsc_names"],
                            self.tool.get("hints", []),
                            strict=kwargs.get("strict"))

        self.schemaDefs = {}  # type: Dict[Text,Dict[Text, Any]]

        sd, _ = self.get_requirement("SchemaDefRequirement")

        if sd:
            sdtypes = sd["types"]
            av = schema.make_valid_avro(
                sdtypes, {t["name"]: t
                          for t in avroize_type(sdtypes)}, set())
            for i in av:
                self.schemaDefs[i["name"]] = i  # type: ignore
            schema.AvroSchemaFromJSONData(av, self.names)  # type: ignore

        # Build record schema from inputs
        self.inputs_record_schema = {
            "name": "input_record_schema",
            "type": "record",
            "fields": []
        }  # type: Dict[Text, Any]
        self.outputs_record_schema = {
            "name": "outputs_record_schema",
            "type": "record",
            "fields": []
        }  # type: Dict[Text, Any]

        for key in ("inputs", "outputs"):
            for i in self.tool[key]:
                c = copy.copy(i)
                c["name"] = shortname(c["id"])
                del c["id"]

                if "type" not in c:
                    raise validate.ValidationException(u"Missing 'type' in "
                                                       "parameter '%s'" %
                                                       c["name"])

                if "default" in c and "null" not in aslist(c["type"]):
                    c["type"] = ["null"] + aslist(c["type"])
                else:
                    c["type"] = c["type"]
                c["type"] = avroize_type(c["type"], c["name"])
                if key == "inputs":
                    self.inputs_record_schema["fields"].append(c)
                elif key == "outputs":
                    self.outputs_record_schema["fields"].append(c)

        with SourceLine(toolpath_object, "inputs",
                        validate.ValidationException):
            self.inputs_record_schema = cast(
                Dict[six.text_type, Any],
                schema.make_valid_avro(self.inputs_record_schema, {}, set()))
            schema.AvroSchemaFromJSONData(self.inputs_record_schema,
                                          self.names)
        with SourceLine(toolpath_object, "outputs",
                        validate.ValidationException):
            self.outputs_record_schema = cast(
                Dict[six.text_type, Any],
                schema.make_valid_avro(self.outputs_record_schema, {}, set()))
            schema.AvroSchemaFromJSONData(self.outputs_record_schema,
                                          self.names)

        if toolpath_object.get("class") is not None and not kwargs.get(
                "disable_js_validation", False):
            if kwargs.get("js_hint_options_file") is not None:
                try:
                    with open(kwargs["js_hint_options_file"]) as options_file:
                        validate_js_options = json.load(options_file)
                except (OSError, ValueError) as e:
                    _logger.error("Failed to read options file %s" %
                                  kwargs["js_hint_options_file"])
                    raise e
            else:
                validate_js_options = None

            validate_js_expressions(
                cast(CommentedMap, toolpath_object),
                self.doc_schema.names[toolpath_object["class"]],
                validate_js_options)

        dockerReq, is_req = self.get_requirement("DockerRequirement")

        if dockerReq and dockerReq.get("dockerOutputDirectory") and not is_req:
            _logger.warn(
                SourceLine(item=dockerReq, raise_type=Text).makeError(
                    """When 'dockerOutputDirectory' is declared, DockerRequirement
  should go in the 'requirements' section, not 'hints'."""))

        if dockerReq and dockerReq.get(
                "dockerOutputDirectory") == "/var/spool/cwl":
            if is_req:
                # In this specific case, it is legal to have /var/spool/cwl, so skip the check.
                pass
            else:
                # Must be a requirement
                var_spool_cwl_detector(self.tool)
        else:
            var_spool_cwl_detector(self.tool)