Exemplo n.º 1
0
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer":
        try:
            obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir}
            return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v,  obj))

    if ex["engine"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
        engineConfig = []
        for r in reversed(requirements):
            if r["class"] == "ExpressionEngineRequirement" and r["id"] == "https://w3id.org/cwl/cwl#JavascriptEngine":
                engineConfig = r.get("engineConfig", [])
                break
        return sandboxjs.execjs(ex["script"], jshead(engineConfig, jobinput, context, tmpdir, outdir))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
            runtime = []

            class DR(object):
                pass
            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement")
            img_id = None
            if docker_req:
                img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            inp = {
                "script": ex["script"],
                "engineConfig": r.get("engineConfig", []),
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                                           json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                             shell=False,
                             close_fds=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "https://w3id.org/cwl/cwl#JsonPointer":
        try:
            obj = {"job": jobinput, "context": context, "outdir": outdir, "tmpdir": tmpdir}
            return schema_salad.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v,  obj))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]:
            runtime = []

            class DR(object):
                pass
            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req, docker_is_req) = process.get_feature(dr, "DockerRequirement")
            img_id = None
            if docker_req:
                img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("engineConfig", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(exdef["ref"][7:]) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "engineConfig": exdefs,
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                                           json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                             shell=False,
                             close_fds=True,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException("Expression engine returned non-zero exit code on evaluation of\n%s" % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
Exemplo n.º 3
0
def get_from_requirements(requirements, hints, pull_image, dry_run=False):
    (r, req) = process.get_feature("DockerRequirement", requirements=requirements, hints=hints)

    if r:
        if get_image(r, pull_image, dry_run):
            return r["dockerImageId"]
        else:
            if req:
                raise Exception("Docker image %s not found" % r["dockerImageId"])

    return None
Exemplo n.º 4
0
    def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs):
        #_logger.info("[job %s] starting with outdir %s", id(self), self.outdir)

        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp:
        #    json.dump(self.joborder, fp)

        runtime = []
        env = {"TMPDIR": self.tmpdir}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")

        for f in self.pathmapper.files():
            if not os.path.exists(self.pathmapper.mapper(f)[0]):
                raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0])

        img_id = None
        if docker_req and kwargs.get("use_container") is not False:
            env = os.environ
            img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)

        if docker_is_req and img_id is None:
            raise WorkflowException("Docker is required for running this tool.")

        if img_id:
            runtime = ["docker", "run", "-i"]
            for src in self.pathmapper.files():
                vol = self.pathmapper.mapper(src)
                runtime.append("--volume=%s:%s:ro" % vol)
            runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/tmp/job_output"))
            runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp/job_tmp"))
            runtime.append("--workdir=%s" % ("/tmp/job_output"))
            runtime.append("--user=%s" % (os.geteuid()))

            if rm_container:
                runtime.append("--rm")

            runtime.append("--env=TMPDIR=/tmp/job_tmp")

            for t,v in self.environment.items():
                runtime.append("--env=%s=%s" % (t, v))

            runtime.append(img_id)
        else:
            env = self.environment
            if not os.path.exists(self.tmpdir):
                os.makedirs(self.tmpdir)
            env["TMPDIR"] = self.tmpdir

        stdin = None
        stdout = None

        _logger.info("[job %s] exec %s%s%s",
                     id(self),
                     " ".join([shellescape.quote(arg) if needs_shell_quoting(arg) else arg for arg in (runtime + self.command_line)]),
                     ' < %s' % (self.stdin) if self.stdin else '',
                     ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '')

        if dry_run:
            return (self.outdir, {})

        outputs = {}

        try:
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    os.symlink(self.generatefiles[t]["path"], os.path.join(self.outdir, t))
                else:
                    with open(os.path.join(self.outdir, t), "w") as f:
                        f.write(self.generatefiles[t])

            if self.stdin:
                stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb")
            else:
                stdin = subprocess.PIPE

            if self.stdout:
                absout = os.path.join(self.outdir, self.stdout)
                dn = os.path.dirname(absout)
                if dn and not os.path.exists(dn):
                    os.makedirs(dn)
                stdout = open(absout, "wb")
            else:
                stdout = sys.stderr

            sp = subprocess.Popen(runtime + self.command_line,
                                  shell=False,
                                  close_fds=True,
                                  stdin=stdin,
                                  stdout=stdout,
                                  env=env,
                                  cwd=self.outdir)

            if stdin == subprocess.PIPE:
                sp.stdin.close()

            rcode = sp.wait()

            if stdin != subprocess.PIPE:
                stdin.close()

            if stdout is not sys.stderr:
                stdout.close()

            if self.successCodes and rcode in self.successCodes:
                processStatus = "success"
            elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
                processStatus = "temporaryFail"
            elif self.permanentFailCodes and rcode in self.permanentFailCodes:
                processStatus = "permanentFail"
            elif rcode == 0:
                processStatus = "success"
            else:
                processStatus = "permanentFail"

            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    os.remove(os.path.join(self.outdir, t))
                    os.symlink(self.pathmapper.reversemap(self.generatefiles[t]["path"])[1], os.path.join(self.outdir, t))

            outputs = self.collect_outputs(self.outdir)

        except Exception as e:
            _logger.exception("Exception while running job")
            processStatus = "permanentFail"

        if processStatus != "success":
            _logger.warn("[job %s] completed %s", id(self), processStatus)
        else:
            _logger.debug("[job %s] completed %s", id(self), processStatus)
        _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4))

        self.output_callback(outputs, processStatus)

        if rm_tmpdir:
            _logger.debug("[job %s] Removing temporary directory %s", id(self), self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

        if move_outputs and empty_subtree(self.outdir):
            _logger.debug("[job %s] Removing empty output directory %s", id(self), self.tmpdir)
            shutil.rmtree(self.outdir, True)
Exemplo n.º 5
0
    def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs=True, **kwargs):
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp:
        #    json.dump(self.joborder, fp)

        runtime = []
        env = {"TMPDIR": self.tmpdir}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")

        for f in self.pathmapper.files():
            if not os.path.isfile(self.pathmapper.mapper(f)[0]):
                raise WorkflowException("Required input file %s not found or is not a regular file." % self.pathmapper.mapper(f)[0])

        img_id = None
        if docker_req and kwargs.get("use_container") is not False:
            env = os.environ
            img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)

        if docker_is_req and img_id is None:
            raise WorkflowException("Docker is required for running this tool.")

        if img_id:
            runtime = ["docker", "run", "-i"]
            for src in self.pathmapper.files():
                vol = self.pathmapper.mapper(src)
                runtime.append("--volume=%s:%s:ro" % vol)
            runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl"))
            runtime.append("--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp"))
            runtime.append("--workdir=%s" % ("/var/spool/cwl"))
            runtime.append("--read-only=true")
            runtime.append("--net=none")
            euid = docker_vm_uid() or os.geteuid()
            runtime.append("--user=%s" % (euid))

            if rm_container:
                runtime.append("--rm")

            runtime.append("--env=TMPDIR=/tmp")

            for t,v in self.environment.items():
                runtime.append("--env=%s=%s" % (t, v))

            runtime.append(img_id)
        else:
            env = self.environment
            if not os.path.exists(self.tmpdir):
                os.makedirs(self.tmpdir)
            env["TMPDIR"] = self.tmpdir
            vars_to_preserve = kwargs.get("preserve_environment")
            if vars_to_preserve is not None:
                for key, value in os.environ.items():
                    if key in vars_to_preserve and key not in env:
                        env[key] = value

        stdin = None
        stdout = None

        scr, _  = get_feature(self, "ShellCommandRequirement")

        if scr:
            shouldquote = lambda x: False
        else:
            shouldquote = needs_shell_quoting_re.search

        _logger.info("[job %s] %s$ %s%s%s",
                     self.name,
                     self.outdir,
                     " ".join([shellescape.quote(str(arg)) if shouldquote(str(arg)) else str(arg) for arg in (runtime + self.command_line)]),
                     ' < %s' % (self.stdin) if self.stdin else '',
                     ' > %s' % os.path.join(self.outdir, self.stdout) if self.stdout else '')

        if dry_run:
            return (self.outdir, {})

        outputs = {}

        try:
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    src = self.generatefiles[t]["path"]
                    dst = os.path.join(self.outdir, t)
                    if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir:
                        _logger.debug("symlinking %s to %s", dst, src)
                        os.symlink(src, dst)
                else:
                    with open(os.path.join(self.outdir, t), "w") as f:
                        f.write(self.generatefiles[t])

            if self.stdin:
                stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb")
            else:
                stdin = subprocess.PIPE

            if self.stdout:
                absout = os.path.join(self.outdir, self.stdout)
                dn = os.path.dirname(absout)
                if dn and not os.path.exists(dn):
                    os.makedirs(dn)
                stdout = open(absout, "wb")
            else:
                stdout = sys.stderr

            sp = subprocess.Popen([str(x) for x in runtime + self.command_line],
                                  shell=False,
                                  close_fds=True,
                                  stdin=stdin,
                                  stdout=stdout,
                                  env=env,
                                  cwd=self.outdir)

            if stdin == subprocess.PIPE:
                sp.stdin.close()

            rcode = sp.wait()

            if stdin != subprocess.PIPE:
                stdin.close()

            if stdout is not sys.stderr:
                stdout.close()

            if self.successCodes and rcode in self.successCodes:
                processStatus = "success"
            elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
                processStatus = "temporaryFail"
            elif self.permanentFailCodes and rcode in self.permanentFailCodes:
                processStatus = "permanentFail"
            elif rcode == 0:
                processStatus = "success"
            else:
                processStatus = "permanentFail"

            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    src = self.generatefiles[t]["path"]
                    dst = os.path.join(self.outdir, t)
                    if os.path.dirname(self.pathmapper.reversemap(src)[1]) != self.outdir:
                        os.remove(dst)
                        os.symlink(self.pathmapper.reversemap(src)[1], dst)

            outputs = self.collect_outputs(self.outdir)

        except OSError as e:
            if e.errno == 2:
                if runtime:
                    _logger.error("'%s' not found", runtime[0])
                else:
                    _logger.error("'%s' not found", self.command_line[0])
            else:
                _logger.exception("Exception while running job")
            processStatus = "permanentFail"
        except WorkflowException as e:
            _logger.error("Error while running job: %s" % e)
            processStatus = "permanentFail"
        except Exception as e:
            _logger.exception("Exception while running job")
            processStatus = "permanentFail"

        if processStatus != "success":
            _logger.warn("[job %s] completed %s", self.name, processStatus)
        else:
            _logger.debug("[job %s] completed %s", self.name, processStatus)
        _logger.debug("[job %s] %s", self.name, json.dumps(outputs, indent=4))

        self.output_callback(outputs, processStatus)

        if rm_tmpdir:
            _logger.debug("[job %s] Removing temporary directory %s", self.name, self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

        if move_outputs and empty_subtree(self.outdir):
            _logger.debug("[job %s] Removing empty output directory %s", self.name, self.outdir)
            shutil.rmtree(self.outdir, True)
    def try_make_job(self, step, basedir, **kwargs):
        inputobj = {}

        _logger.debug("Try to make job %s", step.id)

        (scatterSpec, _) = get_feature("Scatter", requirements=step.tool.get("requirements"), hints=step.tool.get("hints"))
        if scatterSpec:
            inputparms = copy.deepcopy(step.tool["inputs"])
            outputparms = copy.deepcopy(step.tool["outputs"])
            scatter = aslist(scatterSpec["scatter"])

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" % s)

                inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}

            if scatterSpec.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
        else:
            inputparms = step.tool["inputs"]
            outputparms = step.tool["outputs"]

        for inp in inputparms:
            _logger.debug("Trying input %s", inp)
            iid = inp["id"]
            if "connect" in inp:
                connections = inp["connect"]
                for connection in aslist(connections):
                    src = connection["source"]
                    if src in self.state and self.state[src] is not None:
                        if not self.match_types(inp["type"], self.state[src], inp["id"], inputobj):
                            raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], inp["id"], inp["type"]))
                    elif src not in self.state:
                        raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
                    else:
                        return
            elif "default" in inp:
                inputobj[iid] = inp["default"]
            else:
                raise WorkflowException("Value for %s not specified" % (inp["id"]))

        _logger.info("Creating job with input: %s", pprint.pformat(inputobj))

        callback = functools.partial(self.receive_output, step, outputparms)

        if scatterSpec:
            method = scatterSpec.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

            if method == "dotproduct" or method is None:
                jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
            elif method == "nested_crossproduct":
                jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
            elif method == "flat_crossproduct":
                jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs)
        else:
            jobs = step.job(inputobj, basedir, callback, **kwargs)

        for j in jobs:
            yield j
Exemplo n.º 7
0
    def run(self,
            dry_run=False,
            pull_image=True,
            rm_container=True,
            rm_tmpdir=True,
            move_outputs=True,
            **kwargs):
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

        #with open(os.path.join(outdir, "cwl.input.json"), "w") as fp:
        #    json.dump(self.joborder, fp)

        runtime = []
        env = {"TMPDIR": self.tmpdir}

        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")

        for f in self.pathmapper.files():
            if not os.path.exists(self.pathmapper.mapper(f)[0]):
                raise WorkflowException("Required input file %s not found" %
                                        self.pathmapper.mapper(f)[0])

        img_id = None
        if docker_req and kwargs.get("use_container") is not False:
            env = os.environ
            img_id = docker.get_from_requirements(docker_req, docker_is_req,
                                                  pull_image)

        if docker_is_req and img_id is None:
            raise WorkflowException(
                "Docker is required for running this tool.")

        if img_id:
            runtime = ["docker", "run", "-i"]
            for src in self.pathmapper.files():
                vol = self.pathmapper.mapper(src)
                runtime.append("--volume=%s:%s:ro" % vol)
            runtime.append("--volume=%s:%s:rw" %
                           (os.path.abspath(self.outdir), "/tmp/job_output"))
            runtime.append("--volume=%s:%s:rw" %
                           (os.path.abspath(self.tmpdir), "/tmp/job_tmp"))
            runtime.append("--workdir=%s" % ("/tmp/job_output"))
            euid = docker_vm_uid() or os.geteuid()
            runtime.append("--user=%s" % (euid))

            if rm_container:
                runtime.append("--rm")

            runtime.append("--env=TMPDIR=/tmp/job_tmp")

            for t, v in self.environment.items():
                runtime.append("--env=%s=%s" % (t, v))

            runtime.append(img_id)
        else:
            env = self.environment
            if not os.path.exists(self.tmpdir):
                os.makedirs(self.tmpdir)
            env["TMPDIR"] = self.tmpdir

        stdin = None
        stdout = None

        _logger.info(
            "[job %s] %s$ %s%s%s", id(self), self.outdir, " ".join([
                shellescape.quote(arg) if needs_shell_quoting(arg) else arg
                for arg in (runtime + self.command_line)
            ]), ' < %s' % (self.stdin) if self.stdin else '', ' > %s' %
            os.path.join(self.outdir, self.stdout) if self.stdout else '')

        if dry_run:
            return (self.outdir, {})

        outputs = {}

        try:
            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    os.symlink(self.generatefiles[t]["path"],
                               os.path.join(self.outdir, t))
                else:
                    with open(os.path.join(self.outdir, t), "w") as f:
                        f.write(self.generatefiles[t])

            if self.stdin:
                stdin = open(self.pathmapper.mapper(self.stdin)[0], "rb")
            else:
                stdin = subprocess.PIPE

            if self.stdout:
                absout = os.path.join(self.outdir, self.stdout)
                dn = os.path.dirname(absout)
                if dn and not os.path.exists(dn):
                    os.makedirs(dn)
                stdout = open(absout, "wb")
            else:
                stdout = sys.stderr

            sp = subprocess.Popen(runtime + self.command_line,
                                  shell=False,
                                  close_fds=True,
                                  stdin=stdin,
                                  stdout=stdout,
                                  env=env,
                                  cwd=self.outdir)

            if stdin == subprocess.PIPE:
                sp.stdin.close()

            rcode = sp.wait()

            if stdin != subprocess.PIPE:
                stdin.close()

            if stdout is not sys.stderr:
                stdout.close()

            if self.successCodes and rcode in self.successCodes:
                processStatus = "success"
            elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
                processStatus = "temporaryFail"
            elif self.permanentFailCodes and rcode in self.permanentFailCodes:
                processStatus = "permanentFail"
            elif rcode == 0:
                processStatus = "success"
            else:
                processStatus = "permanentFail"

            for t in self.generatefiles:
                if isinstance(self.generatefiles[t], dict):
                    os.remove(os.path.join(self.outdir, t))
                    os.symlink(
                        self.pathmapper.reversemap(
                            self.generatefiles[t]["path"])[1],
                        os.path.join(self.outdir, t))

            outputs = self.collect_outputs(self.outdir)

        except OSError as e:
            if e.errno == 2:
                if runtime:
                    _logger.error("'%s' not found", runtime[0])
                else:
                    _logger.error("'%s' not found", self.command_line[0])
            else:
                _logger.exception("Exception while running job")
            processStatus = "permanentFail"
        except WorkflowException as e:
            _logger.error("Error while running job: %s" % e)
            processStatus = "permanentFail"
        except Exception as e:
            _logger.exception("Exception while running job")
            processStatus = "permanentFail"

        if processStatus != "success":
            _logger.warn("[job %s] completed %s", id(self), processStatus)
        else:
            _logger.debug("[job %s] completed %s", id(self), processStatus)
        _logger.debug("[job %s] %s", id(self), json.dumps(outputs, indent=4))

        self.output_callback(outputs, processStatus)

        if rm_tmpdir:
            _logger.debug("[job %s] Removing temporary directory %s", id(self),
                          self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

        if move_outputs and empty_subtree(self.outdir):
            _logger.debug("[job %s] Removing empty output directory %s",
                          id(self), self.tmpdir)
            shutil.rmtree(self.outdir, True)
def exeval(ex, jobinput, requirements, outdir, tmpdir, context, pull_image):
    if ex["engine"] == "cwl:JsonPointer":
        try:
            obj = {
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir
            }
            return avro_ld.ref_resolver.resolve_json_pointer(obj, ex["script"])
        except ValueError as v:
            raise WorkflowException("%s in %s" % (v, obj))

    for r in reversed(requirements):
        if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex[
                "engine"]:
            runtime = []

            class DR(object):
                pass

            dr = DR()
            dr.requirements = r.get("requirements", [])
            dr.hints = r.get("hints", [])

            (docker_req,
             docker_is_req) = process.get_feature(dr, "DockerRequirement")
            if docker_req:
                img_id = docker.get_from_requirements(docker_req,
                                                      docker_is_req,
                                                      pull_image)
            if img_id:
                runtime = ["docker", "run", "-i", "--rm", img_id]

            exdefs = []
            for exdef in r.get("engineConfig", []):
                if isinstance(exdef, dict) and "ref" in exdef:
                    with open(exdef["ref"][7:]) as f:
                        exdefs.append(f.read())
                elif isinstance(exdef, basestring):
                    exdefs.append(exdef)

            inp = {
                "script": ex["script"],
                "engineConfig": exdefs,
                "job": jobinput,
                "context": context,
                "outdir": outdir,
                "tmpdir": tmpdir,
            }

            _logger.debug("Invoking expression engine %s with %s",
                          runtime + aslist(r["engineCommand"]),
                          json.dumps(inp, indent=4))

            sp = subprocess.Popen(runtime + aslist(r["engineCommand"]),
                                  shell=False,
                                  close_fds=True,
                                  stdin=subprocess.PIPE,
                                  stdout=subprocess.PIPE)

            (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n")
            if sp.returncode != 0:
                raise WorkflowException(
                    "Expression engine returned non-zero exit code on evaluation of\n%s"
                    % json.dumps(inp, indent=4))

            return json.loads(stdoutdata)

    raise WorkflowException("Unknown expression engine '%s'" % ex["engine"])
    def try_make_job(self, step, basedir, **kwargs):
        inputobj = {}

        (scatterSpec, _) = get_feature("Scatter", requirements=step.tool.get("requirements"), hints=step.tool.get("hints"))
        if scatterSpec:
            inputparms = copy.deepcopy(step.tool["inputs"])
            outputparms = copy.deepcopy(step.tool["outputs"])
            scatter = aslist(scatterSpec["scatter"])

            inp_map = {i["id"]: i for i in inputparms}
            for s in scatter:
                if s not in inp_map:
                    raise WorkflowException("Invalid Scatter parameter '%s'" % s)

                inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]}

            if scatterSpec.get("scatterMethod") == "nested_crossproduct":
                nesting = len(scatter)
            else:
                nesting = 1

            for r in xrange(0, nesting):
                for i in outputparms:
                    i["type"] = {"type": "array", "items": i["type"]}
        else:
            inputparms = step.tool["inputs"]
            outputparms = step.tool["outputs"]

        for inp in inputparms:
            _logger.debug(inp)
            iid = idk(inp["id"])
            if "connect" in inp:
                connections = inp["connect"]
                is_array = isinstance(inp["type"], dict) and inp["type"]["type"] == "array"
                for connection in aslist(connections):
                    src = idk(connection["source"])
                    if src in self.state and self.state[src] is not None:
                        if self.state[src].parameter["type"] == inp["type"]:
                            # source and input types are the same
                            if is_array and iid in inputobj:
                                # there's already a value in the input object, so extend the existing array
                                inputobj[iid].extend(self.state[src].value)
                            else:
                                # simply assign the value from state to input
                                inputobj[iid] = copy.deepcopy(self.state[src].value)
                        elif is_array and self.state[src].parameter["type"] == inp["type"]["items"]:
                            # source type is the item type on the input array
                            # promote single item to array entry
                            if iid in inputobj:
                                inputobj[iid].append(self.state[src].value)
                            else:
                                inputobj[iid] = [self.state[src].value]
                        else:
                            raise WorkflowException("Type mismatch between source '%s' (%s) and sink '%s' (%s)" % (src, self.state[src].parameter["type"], idk(inp["id"]), inp["type"]))
                    elif src not in self.state:
                        raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"]))
                    else:
                        return
            elif "default" in inp:
                inputobj[iid] = inp["default"]
            else:
                raise WorkflowException("Value for %s not specified" % (inp["id"]))

        _logger.info("Creating job with input: %s", inputobj)

        callback = functools.partial(self.receive_output, step, outputparms)

        if scatterSpec:
            method = scatterSpec.get("scatterMethod")
            if method is None and len(scatter) != 1:
                raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs")

            if method == "dotproduct" or method is None:
                jobs = dotproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
            elif method == "nested_crossproduct":
                jobs = nested_crossproduct_scatter(step, inputobj, basedir, scatter, callback, **kwargs)
            elif method == "flat_crossproduct":
                jobs = flat_crossproduct_scatter(step, inputobj, basedir, scatter, callback, 0, **kwargs)
        else:
            jobs = step.job(inputobj, basedir, callback, **kwargs)

        for j in jobs:
            yield j
Exemplo n.º 10
0
    def run(self, outdir, dry_run=False, pull_image=True, rm_container=True):

        with open(os.path.join(outdir, "cwl.input.json"), "w") as fp:
            json.dump(self.joborder, fp)

        runtime = []
        env = {}

        (docker_req, docker_is_req) = get_feature("DockerRequirement", requirements=self.requirements, hints=self.hints)

        for f in self.pathmapper.files():
            if not os.path.exists(self.pathmapper.mapper(f)[0]):
                raise WorkflowException("Required input file %s not found" % self.pathmapper.mapper(f)[0])

        if docker_req:
            img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
            runtime = ["docker", "run", "-i"]
            for d in self.pathmapper.dirs:
                runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d]))
            runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output"))
            runtime.append("--workdir=%s" % ("/tmp/job_output"))
            runtime.append("--user=%s" % (os.geteuid()))
            if rm_container:
                runtime.append("--rm")
            for t,v in self.environment.items():
                runtime.append("--env=%s=%s" % (t, v))
            runtime.append(img_id)
        else:
            env = self.environment

        stdin = None
        stdout = None

        _logger.info("%s%s%s",
                     " ".join(runtime + self.command_line),
                     ' < %s' % (self.stdin) if self.stdin else '',
                     ' > %s' % (self.stdout) if self.stdout else '')

        if dry_run:
            return (outdir, {})

        os.chdir(outdir)

        for t in self.generatefiles:
            with open(os.path.join(outdir, t), "w") as f:
                f.write(self.generatefiles[t])

        if self.stdin:
            stdin = open(self.stdin, "rb")
        else:
            stdin = subprocess.PIPE

        if self.stdout:
            dn = os.path.dirname(self.stdout)
            if dn and not os.path.exists(dn):
                os.makedirs(dn)
            stdout = open(self.stdout, "wb")
        else:
            stdout = sys.stderr

        sp = subprocess.Popen(runtime + self.command_line,
                              shell=False,
                              close_fds=True,
                              stdin=stdin,
                              stdout=stdout,
                              env=env,
                              cwd=outdir)

        if stdin == subprocess.PIPE:
            sp.stdin.close()

        rcode = sp.wait()

        if stdin != subprocess.PIPE:
            stdin.close()

        if stdout is not sys.stderr:
            stdout.close()

        outputs = self.collect_outputs(outdir)

        if self.successCodes and rcode in self.successCodes:
            processStatus = "success"
        elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
            processStatus = "temporaryFail"
        elif self.permanentFailCodes and rcode in self.permanentFailCodes:
            processStatus = "permanentFail"
        elif rcode == 0:
            processStatus = "success"
        else:
            processStatus = "permanentFail"

        self.output_callback(outputs, processStatus)
    def job(self, joborder, input_basedir, output_callback, use_container=True, **kwargs):
        builder = self._init_job(joborder, input_basedir, **kwargs)

        if self.tool["baseCommand"]:
            for n, b in enumerate(aslist(self.tool["baseCommand"])):
                builder.bindings.append({
                    "position": [-1000000, n],
                    "valueFrom": b
                })

        if self.tool.get("arguments"):
            for i, a in enumerate(self.tool["arguments"]):
                if isinstance(a, dict):
                    a = copy.copy(a)
                    if a.get("position"):
                        a["position"] = [a["position"], i]
                    else:
                        a["position"] = [0, i]
                    a["do_eval"] = a["valueFrom"]
                    a["valueFrom"] = None
                    builder.bindings.append(a)
                else:
                    builder.bindings.append({
                        "position": [0, i],
                        "valueFrom": a
                    })

        builder.bindings.sort(key=lambda a: a["position"])

        reffiles = [f["path"] for f in builder.files]

        j = CommandLineJob()
        j.joborder = builder.job
        j.stdin = None
        j.stdout = None
        j.successCodes = self.tool.get("successCodes")
        j.temporaryFailCodes = self.tool.get("temporaryFailCodes")
        j.permanentFailCodes = self.tool.get("permanentFailCodes")
        j.requirements = self.requirements
        j.hints = self.hints

        builder.pathmapper = None

        if self.tool.get("stdin"):
            j.stdin = expression.do_eval(self.tool["stdin"], builder.job, self.requirements, self.docpath, j.stdin)
            if isinstance(j.stdin, dict) and "ref" in j.stdin:
                j.stdin = builder.job[j.stdin["ref"][1:]]["path"]
            reffiles.append(j.stdin)

        if self.tool.get("stdout"):
            j.stdout = expression.do_eval(self.tool["stdout"], builder.job, j.requirements, self.docpath)
            if os.path.isabs(j.stdout):
                raise validate.ValidationException("stdout must be a relative path")

        dockerReq, _ = get_feature("DockerRequirement", requirements=self.requirements, hints=self.hints)
        if dockerReq and use_container:
                builder.pathmapper = DockerPathMapper(reffiles, input_basedir)

        if builder.pathmapper is None:
            builder.pathmapper = PathMapper(reffiles, input_basedir)

        for f in builder.files:
            f["path"] = builder.pathmapper.mapper(f["path"])[1]

        _logger.debug("Bindings is %s", pprint.pformat(builder.bindings))
        _logger.debug("Files is %s", pprint.pformat({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}))

        builder.requirements = j.requirements

        j.generatefiles = {}
        createFiles, _ = get_feature("CreateFileRequirement", requirements=self.requirements, hints=self.hints)
        if createFiles:
            for t in createFiles["fileDef"]:
                j.generatefiles[t["filename"]] = expression.do_eval(t["fileContent"], builder.job, j.requirements, self.docpath)

        j.environment = {}
        evr, _ = get_feature("EnvVarRequirement", requirements=self.requirements, hints=self.hints)
        if evr:
            for t in evr["envDef"]:
                j.environment[t["envName"]] = expression.do_eval(t["envValue"], builder.job, j.requirements, self.docpath)

        j.command_line = flatten(map(builder.generate_arg, builder.bindings))

        if j.stdin:
            j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(input_basedir, j.stdin)

        j.pathmapper = builder.pathmapper
        j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder)
        j.output_callback = output_callback

        yield j