Exemplo n.º 1
0
class TESTask(JobBase):
    JobOrderType = Dict[Text, Union[Dict[Text, Any], List, Text]]

    def __init__(
            self,
            builder,  # type: Builder
            joborder,  # type: JobOrderType
            make_path_mapper,  # type: Callable[..., PathMapper]
            requirements,  # type: List[Dict[Text, Text]]
            hints,  # type: List[Dict[Text, Text]]
            name,  # type: Text
            runtime_context,
            url,
            spec):
        super(TESTask, self).__init__(builder, joborder, make_path_mapper,
                                      requirements, hints, name)
        self.runtime_context = runtime_context
        self.spec = spec
        self.outputs = None
        self.inplace_update = False
        if runtime_context.basedir is not None:
            self.basedir = runtime_context.basedir
        else:
            self.basedir = os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)

        self.id = None
        self.docker_workdir = '/var/spool/cwl'
        self.state = "UNKNOWN"
        self.poll_interval = 1
        self.poll_retries = 10
        self.client = tes.HTTPClient(url)

    def get_container(self):
        default = "python:2.7"
        container = default
        if self.runtime_context.default_container:
            container = self.runtime_context.default_container

        reqs = self.spec.get("requirements", []) + self.spec.get("hints", [])
        for i in reqs:
            if i.get("class", "NA") == "DockerRequirement":
                container = i.get("dockerPull", i.get("dockerImageId",
                                                      default))
        return container

    def create_input(self, name, d):
        if "contents" in d:
            return tes.Input(name=name,
                             description="cwl_input:%s" % (name),
                             path=d["path"],
                             content=d["contents"],
                             type=d["class"].upper())
        else:
            return tes.Input(name=name,
                             description="cwl_input:%s" % (name),
                             url=d["location"],
                             path=d["path"],
                             type=d["class"].upper())

    def parse_job_order(self, k, v, inputs):
        if isinstance(v, dict):
            if all([i in v for i in ["location", "path", "class"]]):
                inputs.append(self.create_input(k, v))

                if "secondaryFiles" in v:
                    for f in v["secondaryFiles"]:
                        self.parse_job_order(f["basename"], f, inputs)

            else:
                for sk, sv in v.items():
                    if isinstance(sv, dict):
                        self.parse_job_order(sk, sv, inputs)

                    else:
                        break

        elif isinstance(v, list):
            for i in range(len(v)):
                if isinstance(v[i], dict):
                    self.parse_job_order("%s[%s]" % (k, i), v[i], inputs)

                else:
                    break

        return inputs

    def parse_listing(self, listing, inputs):
        for item in listing:

            if "writable" in item:
                raise UnsupportedRequirement(
                    "The TES spec does not allow for writable inputs")

            if "contents" in item:
                loc = self.fs_access.join(self.tmpdir, item["basename"])
                with self.fs_access.open(loc, "wb") as gen:
                    gen.write(item["contents"])
            else:
                loc = item["location"]

            parameter = tes.Input(
                name=item["basename"],
                description="InitialWorkDirRequirement:cwl_input:%s" %
                (item["basename"]),
                url=file_uri(loc),
                path=self.fs_access.join(self.docker_workdir,
                                         item["basename"]),
                type=item["class"].upper())
            inputs.append(parameter)

        return inputs

    def get_inputs(self):
        inputs = []

        # find all primary and secondary input files
        for k, v in self.joborder.items():
            self.parse_job_order(k, v, inputs)

        # manage InitialWorkDirRequirement
        self.parse_listing(self.generatefiles["listing"], inputs)

        return inputs

    def get_envvars(self):
        env = self.environment
        vars_to_preserve = self.runtime_context.preserve_environment
        if self.runtime_context.preserve_entire_environment:
            vars_to_preserve = os.environ
        if vars_to_preserve is not None:
            for key, value in os.environ.items():
                if key in vars_to_preserve and key not in env:
                    # On Windows, subprocess env can't handle unicode.
                    env[key] = str(value) if onWindows() else value
        env["HOME"] = str(self.outdir) if onWindows() else self.outdir
        env["TMPDIR"] = str(self.tmpdir) if onWindows() else self.tmpdir
        return env

    def create_task_msg(self):
        input_parameters = self.get_inputs()
        output_parameters = []

        if self.stdout is not None:
            parameter = tes.Output(name="stdout",
                                   url=self.output2url(self.stdout),
                                   path=self.output2path(self.stdout))
            output_parameters.append(parameter)

        if self.stderr is not None:
            parameter = tes.Output(name="stderr",
                                   url=self.output2url(self.stderr),
                                   path=self.output2path(self.stderr))
            output_parameters.append(parameter)

        output_parameters.append(
            tes.Output(name="workdir",
                       url=self.output2url(""),
                       path=self.docker_workdir,
                       type="DIRECTORY"))

        container = self.get_container()
        cpus = None
        ram = None
        disk = None

        for i in self.builder.requirements:
            if i.get("class", "NA") == "ResourceRequirement":
                cpus = i.get("coresMin", i.get("coresMax", None))
                ram = i.get("ramMin", i.get("ramMax", None))
                disk = i.get("outdirMin", i.get("outdirMax", None))

                if (cpus is None or isinstance(cpus, str)) or \
                   (ram is None or isinstance(ram, str)) or \
                   (disk is None or isinstance(disk, str)):
                    raise UnsupportedRequirement(
                        "cwl-tes does not support dynamic resource requests")

                ram = ram / 953.674 if ram is not None else None
                disk = disk / 953.674 if disk is not None else None
            elif i.get("class", "NA") == "DockerRequirement":
                if i.get("dockerOutputDirectory", None) is not None:
                    output_parameters.append(
                        tes.Output(name="dockerOutputDirectory",
                                   url=self.output2url(""),
                                   path=i.get("dockerOutputDirectory"),
                                   type="DIRECTORY"))

        create_body = tes.Task(name=self.name,
                               description=self.spec.get("doc", ""),
                               executors=[
                                   tes.Executor(
                                       command=self.command_line,
                                       image=container,
                                       workdir=self.docker_workdir,
                                       stdout=self.output2path(self.stdout),
                                       stderr=self.output2path(self.stderr),
                                       stdin=self.stdin,
                                       env=self.get_envvars())
                               ],
                               inputs=input_parameters,
                               outputs=output_parameters,
                               resources=tes.Resources(cpu_cores=cpus,
                                                       ram_gb=ram,
                                                       disk_gb=disk),
                               tags={"CWLDocumentId": self.spec.get("id")})

        return create_body

    def run(self, runtimeContext):
        log.debug("[job %s] self.__dict__ in run() ----------------------",
                  self.name)
        log.debug(pformat(self.__dict__))

        task = self.create_task_msg()

        log.info("[job %s] CREATED TASK MSG----------------------", self.name)
        log.info(pformat(task))

        try:
            self.id = self.client.create_task(task)
            log.info("[job %s] SUBMITTED TASK ----------------------",
                     self.name)
            log.info("[job %s] task id: %s ", self.name, self.id)
        except Exception as e:
            log.error("[job %s] Failed to submit task to TES service:\n%s",
                      self.name, e)
            raise WorkflowException(e)

        max_tries = 10
        current_try = 1
        while not self.is_done():
            delay = 1.5 * current_try**2
            time.sleep(
                random.randint(round(delay - 0.5 * delay),
                               round(delay + 0.5 * delay)))
            log.debug("[job %s] POLLING %s", self.name, pformat(self.id))
            try:
                task = self.client.get_task(self.id, "MINIMAL")
                self.state = task.state
            except Exception as e:
                log.error("[job %s] POLLING ERROR %s", self.name, e)
                if current_try <= max_tries:
                    current_try += 1
                    continue
                else:
                    log.error("[job %s] MAX POLLING RETRIES EXCEEDED",
                              self.name)
                    break

        try:
            outputs = self.collect_outputs(self.outdir)
            cleaned_outputs = {}
            for k, v in outputs.items():
                if isinstance(k, bytes):
                    k = k.decode("utf8")
                if isinstance(v, bytes):
                    v = v.decode("utf8")
                cleaned_outputs[k] = v
                self.outputs = cleaned_outputs
            self.output_callback(self.outputs, "success")
        except WorkflowException as e:
            log.error("[job %s] job error:\n%s", self.name, e)
            self.output_callback({}, "permanentFail")
        except Exception as e:
            log.error("[job %s] job error:\n%s", self.name, e)
            self.output_callback({}, "permanentFail")
        finally:
            if self.outputs is not None:
                log.info("[job %s] OUTPUTS ------------------", self.name)
                log.info(pformat(self.outputs))
            self.cleanup(self.runtime_context.rm_tmpdir)
        return

    def is_done(self):
        terminal_states = [
            "COMPLETE", "CANCELED", "EXECUTOR_ERROR", "SYSTEM_ERROR"
        ]
        if self.state in terminal_states:
            log.info("[job %s] FINAL JOB STATE: %s ------------------",
                     self.name, self.state)
            if self.state != "COMPLETE":
                log.error("[job %s] task id: %s", self.name, self.id)
                log.error("[job %s] logs: %s", self.name,
                          self.client.get_task(self.id, "FULL").logs)
            return True
        return False

    def cleanup(self, rm_tmpdir):
        log.debug("[job %s] STARTING CLEAN UP ------------------", self.name)
        if self.stagedir and os.path.exists(self.stagedir):
            log.debug("[job %s] Removing input staging directory %s",
                      self.name, self.stagedir)
            shutil.rmtree(self.stagedir, True)

        if rm_tmpdir:
            log.debug("[job %s] Removing temporary directory %s", self.name,
                      self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

    def output2url(self, path):
        if path is not None:
            return file_uri(
                self.fs_access.join(self.outdir, os.path.basename(path)))
        return None

    def output2path(self, path):
        if path is not None:
            return self.fs_access.join(self.docker_workdir, path)
        return None
Exemplo n.º 2
0
class TESTask(JobBase):
    JobOrderType = Dict[Text, Union[Dict[Text, Any], List, Text]]

    def __init__(
            self,
            builder,  # type: Builder
            joborder,  # type: JSON
            make_path_mapper,  # type: Callable[..., PathMapper]
            requirements,  # type: List[Dict[Text, Text]]
            hints,  # type: List[Dict[Text, Text]]
            name,  # type: Text
            runtime_context,
            url,
            spec,
            remote_storage_url=None,
            token=None):
        super(TESTask, self).__init__(builder, joborder, make_path_mapper,
                                      requirements, hints, name)
        self.runtime_context = runtime_context
        self.spec = spec
        self.outputs = None
        self.inplace_update = False
        self.basedir = runtime_context.basedir or os.getcwd()
        self.fs_access = StdFsAccess(self.basedir)

        self.id = None
        self.state = "UNKNOWN"
        self.exit_code = None
        self.poll_interval = 1
        self.poll_retries = 10
        self.client = tes.HTTPClient(url, token=token)
        self.remote_storage_url = remote_storage_url
        self.token = token

    def get_container(self):
        default = self.runtime_context.default_container or "python:2.7"
        container = default

        docker_req, _ = self.get_requirement("DockerRequirement")
        if docker_req:
            container = docker_req.get(
                "dockerPull", docker_req.get("dockerImageId", default))
        return container

    def create_input(self, name, d):
        if "contents" in d:
            return tes.Input(name=name,
                             description="cwl_input:%s" % (name),
                             path=d["path"],
                             content=d["contents"],
                             type=d["class"].upper())
        return tes.Input(name=name,
                         description="cwl_input:%s" % (name),
                         url=d["location"],
                         path=d["path"],
                         type=d["class"].upper())

    def parse_job_order(self, k, v, inputs):
        if isinstance(v, MutableMapping):
            if all([i in v for i in ["location", "path", "class"]]):
                inputs.append(self.create_input(k, v))

                if "secondaryFiles" in v:
                    for f in v["secondaryFiles"]:
                        self.parse_job_order(f["basename"], f, inputs)

            else:
                for sk, sv in v.items():
                    if isinstance(sv, MutableMapping):
                        self.parse_job_order(sk, sv, inputs)

                    else:
                        break

        elif isinstance(v, MutableSequence):
            for i in range(len(v)):
                if isinstance(v[i], MutableMapping):
                    self.parse_job_order("%s[%s]" % (k, i), v[i], inputs)

                else:
                    break

        return inputs

    def parse_listing(self, listing, inputs):
        for item in listing:

            if "writable" in item:
                raise UnsupportedRequirement(
                    "The TES spec does not allow for writable inputs")

            if "contents" in item:
                loc = self.fs_access.join(self.tmpdir, item["basename"])
                with self.fs_access.open(loc, "wb") as gen:
                    gen.write(item["contents"])
            else:
                loc = item["location"]

            if urllib.parse.urlparse(loc).scheme:
                url = loc
            else:
                url = file_uri(loc)
            parameter = tes.Input(
                name=item["basename"],
                description="InitialWorkDirRequirement:cwl_input:%s" %
                (item["basename"]),
                url=url,
                path=self.fs_access.join(self.builder.outdir,
                                         item["basename"]),
                type=item["class"].upper())
            inputs.append(parameter)

        return inputs

    def get_inputs(self):
        inputs = []

        # find all primary and secondary input files
        for k, v in self.joborder.items():
            self.parse_job_order(k, v, inputs)

        # manage InitialWorkDirRequirement
        self.parse_listing(self.generatefiles["listing"], inputs)

        return inputs

    def get_envvars(self):
        env = self.environment
        vars_to_preserve = self.runtime_context.preserve_environment
        if self.runtime_context.preserve_entire_environment:
            vars_to_preserve = os.environ
        if vars_to_preserve is not None:
            for key, value in os.environ.items():
                if key in vars_to_preserve and key not in env:
                    # On Windows, subprocess env can't handle unicode.
                    env[key] = str(value) if onWindows() else value
        env["HOME"] = str(self.builder.outdir) if onWindows() \
            else self.builder.outdir
        env["TMPDIR"] = str(self.builder.tmpdir) if onWindows() \
            else self.builder.tmpdir
        return env

    def create_task_msg(self):
        input_parameters = self.get_inputs()
        output_parameters = []

        if self.stdout is not None:
            parameter = tes.Output(name="stdout",
                                   url=self.output2url(self.stdout),
                                   path=self.output2path(self.stdout))
            output_parameters.append(parameter)

        if self.stderr is not None:
            parameter = tes.Output(name="stderr",
                                   url=self.output2url(self.stderr),
                                   path=self.output2path(self.stderr))
            output_parameters.append(parameter)

        output_parameters.append(
            tes.Output(name="workdir",
                       url=self.output2url(""),
                       path=self.builder.outdir,
                       type="DIRECTORY"))

        container = self.get_container()

        res_reqs = self.builder.resources
        ram = res_reqs['ram'] / 953.674
        disk = (res_reqs['outdirSize'] + res_reqs['tmpdirSize']) / 953.674
        cpus = res_reqs['cores']

        docker_req, _ = self.get_requirement("DockerRequirement")
        if docker_req and hasattr(docker_req, "dockerOutputDirectory"):
            output_parameters.append(
                tes.Output(name="dockerOutputDirectory",
                           url=self.output2url(""),
                           path=docker_req.dockerOutputDirectory,
                           type="DIRECTORY"))

        create_body = tes.Task(name=self.name,
                               description=self.spec.get("doc", ""),
                               executors=[
                                   tes.Executor(
                                       command=self.command_line,
                                       image=container,
                                       workdir=self.builder.outdir,
                                       stdout=self.output2path(self.stdout),
                                       stderr=self.output2path(self.stderr),
                                       stdin=self.stdin,
                                       env=self.get_envvars())
                               ],
                               inputs=input_parameters,
                               outputs=output_parameters,
                               resources=tes.Resources(cpu_cores=cpus,
                                                       ram_gb=ram,
                                                       disk_gb=disk),
                               tags={"CWLDocumentId": self.spec.get("id")})

        return create_body

    def run(
        self,
        runtimeContext,  # type: RuntimeContext
        tmpdir_lock=None  # type: Optional[threading.Lock]
    ):  # type: (...) -> None
        log.debug("[job %s] self.__dict__ in run() ----------------------",
                  self.name)
        log.debug(pformat(self.__dict__))
        if not self.successCodes:
            self.successCodes = [0]

        task = self.create_task_msg()

        log.info("[job %s] CREATED TASK MSG----------------------", self.name)
        log.info(pformat(task))

        try:
            self.id = self.client.create_task(task)
            log.info("[job %s] SUBMITTED TASK ----------------------",
                     self.name)
            log.info("[job %s] task id: %s ", self.name, self.id)
        except Exception as e:
            log.error("[job %s] Failed to submit task to TES service:\n%s",
                      self.name, e)
            raise WorkflowException(e)

        max_tries = 10
        current_try = 1
        self.exit_code = None
        while not self.is_done():
            delay = 1.5 * current_try**2
            time.sleep(
                random.randint(round(delay - 0.5 * delay),
                               round(delay + 0.5 * delay)))
            try:
                task = self.client.get_task(self.id, "MINIMAL")
                self.state = task.state
                log.debug("[job %s] POLLING %s, result: %s", self.name,
                          pformat(self.id), task.state)
            except Exception as e:
                log.error("[job %s] POLLING ERROR %s", self.name, e)
                if current_try <= max_tries:
                    current_try += 1
                    continue
                else:
                    log.error("[job %s] MAX POLLING RETRIES EXCEEDED",
                              self.name)
                    break

        try:
            process_status = None
            if self.state != "COMPLETE" \
                    and self.exit_code not in self.successCodes:
                process_status = "permanentFail"
                log.error("[job %s] job error:\n%s", self.name, self.state)
            remote_cwl_output_json = False
            if self.remote_storage_url:
                remote_fs_access = runtimeContext.make_fs_access(
                    self.remote_storage_url)
                remote_cwl_output_json = remote_fs_access.exists(
                    remote_fs_access.join(self.remote_storage_url,
                                          "cwl.output.json"))
            if self.remote_storage_url:
                original_outdir = self.builder.outdir
                if not remote_cwl_output_json:
                    self.builder.outdir = self.remote_storage_url
                outputs = self.collect_outputs(self.remote_storage_url,
                                               self.exit_code)
                self.builder.outdir = original_outdir
            else:
                outputs = self.collect_outputs(self.outdir, self.exit_code)
            cleaned_outputs = {}
            for k, v in outputs.items():
                if isinstance(k, bytes):
                    k = k.decode("utf8")
                if isinstance(v, bytes):
                    v = v.decode("utf8")
                cleaned_outputs[k] = v
            self.outputs = cleaned_outputs
            if not process_status:
                process_status = "success"
        except (WorkflowException, Exception) as err:
            log.error("[job %s] job error:\n%s", self.name, err)
            if log.isEnabledFor(logging.DEBUG):
                log.exception(err)
            process_status = "permanentFail"
        finally:
            if self.outputs is None:
                self.outputs = {}
            with self.runtime_context.workflow_eval_lock:
                self.output_callback(self.outputs, process_status)
            log.info("[job %s] OUTPUTS ------------------", self.name)
            log.info(pformat(self.outputs))
            self.cleanup(self.runtime_context.rm_tmpdir)
        return

    def is_done(self):
        terminal_states = [
            "COMPLETE", "CANCELED", "EXECUTOR_ERROR", "SYSTEM_ERROR"
        ]
        if self.state in terminal_states:
            log.info("[job %s] FINAL JOB STATE: %s ------------------",
                     self.name, self.state)
            if self.state != "COMPLETE":
                log.error("[job %s] task id: %s", self.name, self.id)
                logs = self.client.get_task(self.id, "FULL").logs
                log.error("[job %s] logs: %s", self.name, logs)
                if isinstance(logs, MutableSequence):
                    last_log = logs[-1]
                    if isinstance(last_log, tes.TaskLog) and last_log.logs:
                        self.exit_code = last_log.logs[-1].exit_code
            return True
        return False

    def cleanup(self, rm_tmpdir):
        log.debug("[job %s] STARTING CLEAN UP ------------------", self.name)
        if self.stagedir and os.path.exists(self.stagedir):
            log.debug("[job %s] Removing input staging directory %s",
                      self.name, self.stagedir)
            shutil.rmtree(self.stagedir, True)

        if rm_tmpdir:
            log.debug("[job %s] Removing temporary directory %s", self.name,
                      self.tmpdir)
            shutil.rmtree(self.tmpdir, True)

    def output2url(self, path):
        if path is not None:
            if self.remote_storage_url:
                return self.fs_access.join(self.remote_storage_url,
                                           os.path.basename(path))
            return file_uri(
                self.fs_access.join(self.outdir, os.path.basename(path)))
        return None

    def output2path(self, path):
        if path is not None:
            return self.fs_access.join(self.builder.outdir, path)
        return None