示例#1
0
    def arvExecutor(self, tool, job_order, **kwargs):
        self.debug = kwargs.get("debug")

        tool.visit(self.check_writable)

        if kwargs.get("quiet"):
            logger.setLevel(logging.WARN)
            logging.getLogger('arvados.arv-run').setLevel(logging.WARN)

        useruuid = self.api.users().current().execute()["uuid"]
        self.project_uuid = kwargs.get("project_uuid") if kwargs.get(
            "project_uuid") else useruuid
        self.pipeline = None
        make_fs_access = kwargs.get("make_fs_access") or partial(
            CollectionFsAccess, api_client=self.api)
        self.fs_access = make_fs_access(kwargs["basedir"])

        if kwargs.get("create_template"):
            tmpl = RunnerTemplate(self, tool, job_order,
                                  kwargs.get("enable_reuse"))
            tmpl.save()
            # cwltool.main will write our return value to stdout.
            return tmpl.uuid

        self.debug = kwargs.get("debug")
        self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")

        kwargs["make_fs_access"] = make_fs_access
        kwargs["enable_reuse"] = kwargs.get("enable_reuse")
        kwargs["use_container"] = True
        kwargs["tmpdir_prefix"] = "tmp"
        kwargs["on_error"] = "continue"
        kwargs["compute_checksum"] = kwargs.get("compute_checksum")

        if self.work_api == "containers":
            kwargs["outdir"] = "/var/spool/cwl"
            kwargs["docker_outdir"] = "/var/spool/cwl"
            kwargs["tmpdir"] = "/tmp"
            kwargs["docker_tmpdir"] = "/tmp"
        elif self.work_api == "jobs":
            kwargs["outdir"] = "$(task.outdir)"
            kwargs["docker_outdir"] = "$(task.outdir)"
            kwargs["tmpdir"] = "$(task.tmpdir)"

        runnerjob = None
        if kwargs.get("submit"):
            if self.work_api == "containers":
                if tool.tool["class"] == "CommandLineTool":
                    runnerjob = tool.job(job_order, self.output_callback,
                                         **kwargs).next()
                else:
                    runnerjob = RunnerContainer(self, tool, job_order,
                                                kwargs.get("enable_reuse"))
            else:
                runnerjob = RunnerJob(self, tool, job_order,
                                      kwargs.get("enable_reuse"))

        if not kwargs.get(
                "submit"
        ) and "cwl_runner_job" not in kwargs and not self.work_api == "containers":
            # Create pipeline for local run
            self.pipeline = self.api.pipeline_instances().create(
                body={
                    "owner_uuid": self.project_uuid,
                    "name": shortname(tool.tool["id"]),
                    "components": {},
                    "state": "RunningOnClient"
                }).execute(num_retries=self.num_retries)
            logger.info("Pipeline instance %s", self.pipeline["uuid"])

        if runnerjob and not kwargs.get("wait"):
            runnerjob.run()
            return runnerjob.uuid

        self.poll_api = arvados.api('v1')
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        if runnerjob:
            jobiter = iter((runnerjob, ))
        else:
            if "cwl_runner_job" in kwargs:
                self.uuid = kwargs.get("cwl_runner_job").get('uuid')
            jobiter = tool.job(job_order, self.output_callback, **kwargs)

        try:
            self.cond.acquire()
            # Will continue to hold the lock for the duration of this code
            # except when in cond.wait(), at which point on_message can update
            # job state and process output callbacks.

            for runnable in jobiter:
                if runnable:
                    runnable.run(**kwargs)
                else:
                    if self.processes:
                        self.cond.wait(1)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable jobs and not waiting on any pending jobs."
                        )
                        break

            while self.processes:
                self.cond.wait(1)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt:
                logger.error("Interrupted, marking pipeline as failed")
            else:
                logger.error(
                    "Caught unhandled exception, marking pipeline as failed.  Error was: %s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)
            if runnerjob and runnerjob.uuid and self.work_api == "containers":
                self.api.container_requests().update(
                    uuid=runnerjob.uuid, body={
                        "priority": "0"
                    }).execute(num_retries=self.num_retries)
        finally:
            self.cond.release()
            self.stop_polling.set()
            self.polling_thread.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_status != "success":
            raise WorkflowException("Workflow failed.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if kwargs.get("compute_checksum"):
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        return self.final_output
示例#2
0
文件: tes.py 项目: uniqueg/cwl-tes
    def run(self,
            runtimeContext,   # type: RuntimeContext
            tmpdir_lock=None  # type: Optional[threading.Lock]
            ):  # type: (...) -> None
        log.debug(
            "[job %s] self.__dict__ in run() ----------------------",
            self.name
        )
        log.debug(pformat(self.__dict__))
        if not self.successCodes:
            self.successCodes = [0]

        task = self.create_task_msg()

        log.info(
            "[job %s] CREATED TASK MSG----------------------",
            self.name
        )
        log.info(pformat(task))

        try:
            self.id = self.client.create_task(task)
            log.info(
                "[job %s] SUBMITTED TASK ----------------------",
                self.name
            )
            log.info("[job %s] task id: %s ", self.name, self.id)
        except Exception as e:
            log.error(
                "[job %s] Failed to submit task to TES service:\n%s",
                self.name, e
            )
            raise WorkflowException(e)

        max_tries = 10
        current_try = 1
        self.exit_code = None
        while not self.is_done():
            delay = 1.5 * current_try**2
            time.sleep(
                random.randint(
                    round(
                        delay -
                        0.5 *
                        delay),
                    round(
                        delay +
                        0.5 *
                        delay)))
            try:
                task = self.client.get_task(self.id, "MINIMAL")
                self.state = task.state
                log.debug(
                    "[job %s] POLLING %s, result: %s", self.name,
                    pformat(self.id), task.state
                )
            except Exception as e:
                log.error("[job %s] POLLING ERROR %s", self.name, e)
                if current_try <= max_tries:
                    current_try += 1
                    continue
                else:
                    log.error("[job %s] MAX POLLING RETRIES EXCEEDED",
                              self.name)
                    break

        try:
            process_status = None
            if self.state != "COMPLETE" \
                    and self.exit_code not in self.successCodes:
                process_status = "permanentFail"
                log.error("[job %s] job error:\n%s", self.name, self.state)
            remote_cwl_output_json = False
            if self.remote_storage_url:
                remote_fs_access = runtimeContext.make_fs_access(
                    self.remote_storage_url)
                remote_cwl_output_json = remote_fs_access.exists(
                    remote_fs_access.join(
                        self.remote_storage_url, "cwl.output.json"))
            if self.remote_storage_url:
                original_outdir = self.builder.outdir
                if not remote_cwl_output_json:
                    self.builder.outdir = self.remote_storage_url
                outputs = self.collect_outputs(self.remote_storage_url,
                                               self.exit_code)
                self.builder.outdir = original_outdir
            else:
                outputs = self.collect_outputs(self.outdir, self.exit_code)
            cleaned_outputs = {}
            for k, v in outputs.items():
                if isinstance(k, bytes):
                    k = k.decode("utf8")
                if isinstance(v, bytes):
                    v = v.decode("utf8")
                cleaned_outputs[k] = v
            self.outputs = cleaned_outputs
            if not process_status:
                process_status = "success"
        except (WorkflowException, Exception) as err:
            log.error("[job %s] job error:\n%s", self.name, err)
            if log.isEnabledFor(logging.DEBUG):
                log.exception(err)
            process_status = "permanentFail"
        finally:
            if self.outputs is None:
                self.outputs = {}
            with self.runtime_context.workflow_eval_lock:
                self.output_callback(self.outputs, process_status)
            log.info(
                "[job %s] OUTPUTS ------------------",
                self.name
            )
            log.info(pformat(self.outputs))
            self.cleanup(self.runtime_context.rm_tmpdir)
        return
示例#3
0
    def run(self, runtimeContext):  # noqa: C901
        """Run a job."""
        self._setup(runtimeContext)

        env = self.environment
        if not os.path.exists(self.tmpdir):
            os.makedirs(self.tmpdir)
        vars_to_preserve = runtimeContext.preserve_environment
        if runtimeContext.preserve_entire_environment:
            vars_to_preserve = os.environ
        if vars_to_preserve is not None:
            for key, value in os.environ.items():
                if key in vars_to_preserve and key not in env:
                    env[key] = value
        env["HOME"] = self.builder.outdir
        env["TMPDIR"] = self.tmpdir
        if "PATH" not in env:
            env["PATH"] = os.environ["PATH"]
        if "SYSTEMROOT" not in env and "SYSTEMROOT" in os.environ:
            env["SYSTEMROOT"] = os.environ["SYSTEMROOT"]

        try:
            stage_files(self.pathmapper, ignore_writable=True, symlink=False)
            if getattr(self, "generatemapper", ""):
                stage_files(
                    self.generatemapper,
                    ignore_writable=self.inplace_update,
                    symlink=False,
                )
                relink_initialworkdir(
                    self.generatemapper,
                    self.outdir,
                    self.builder.outdir,
                    inplace_update=self.inplace_update,
                )
        except OSError:
            # cwltool/process.py, line 239, in stage_files
            # shutil.copytree(p.resolved, p.target)
            pass
        self.add_volumes(self.pathmapper)
        if getattr(self, "generatemapper", ""):
            self.add_volumes(self.generatemapper)

        # useful for debugging
        log.debug(f"[job {self.name}] self.__dict__ in run() ---------------")
        log.debug(pformat(self.__dict__))

        task = self.create_task_msg(
            runtimeContext.working_dir, runtimeContext.workflow_uuid
        )

        log.info(f"[job {self.name}] CREATED TASK MSG----------------------")
        log.info(pformat(task))

        try:
            # task_id = job_id received from job-controller
            task_id = runtimeContext.pipeline.service.submit(**task)
            task_id = str(task_id["job_id"])
            running_jobs = {"total": 1, "job_ids": [task_id]}
            runtimeContext.publisher.publish_workflow_status(
                runtimeContext.workflow_uuid,
                1,
                message={"progress": {"running": running_jobs,}},
            )
            log.info(f"[job {self.name}] SUBMITTED TASK --------------------")
            log.info(f"[job {self.name}] task id: {task_id} ")
            self.task_name_map[self.name] = task_id
            operation = runtimeContext.pipeline.service.check_status(task_id)
        except Exception as e:
            log.error(
                f"[job {self.name}] " f"Failed to submit task to job controller:\n{e}"
            )
            raise WorkflowException(e)

        def callback(rcode):
            try:
                outputs = self.collect_outputs(self.outdir, rcode=rcode)
                cleaned_outputs = {}
                for k, v in outputs.items():
                    if isinstance(k, bytes):
                        k = k.decode("utf8")
                    if isinstance(v, bytes):
                        v = v.decode("utf8")
                    cleaned_outputs[k] = v
                self.outputs = cleaned_outputs
                self.output_callback(self.outputs, "success")
            except WorkflowException as e:
                log.error(f"[job {self.name}] workflow job error:\n{e}")
                self.output_callback({}, "permanentFail")
            except Exception as e:
                log.error(f"[job {self.name}] job error:\n{e}")
                self.output_callback({}, "permanentFail")
            finally:
                if self.outputs is not None:
                    log.info(f"[job {self.name}] OUTPUTS ------------------")
                    log.info(pformat(self.outputs))
                self.cleanup(runtimeContext.rm_tmpdir)

        poll = ReanaPipelinePoll(
            workflow_uuid=runtimeContext.workflow_uuid,
            task_id=self.task_name_map.get(self.name),
            jobname=self.name,
            service=runtimeContext.pipeline.service,
            operation=operation,
            callback=callback,
            publisher=runtimeContext.publisher,
        )

        runtimeContext.pipeline.add_thread(poll)
        poll.start()
示例#4
0
    def arv_executor(self,
                     updated_tool,
                     job_order,
                     runtimeContext,
                     logger=None):
        self.debug = runtimeContext.debug

        workbench1 = self.api.config()["Services"]["Workbench1"]["ExternalURL"]
        workbench2 = self.api.config()["Services"]["Workbench2"]["ExternalURL"]
        controller = self.api.config()["Services"]["Controller"]["ExternalURL"]
        logger.info("Using cluster %s (%s)",
                    self.api.config()["ClusterID"], workbench2 or workbench1
                    or controller)

        updated_tool.visit(self.check_features)

        self.project_uuid = runtimeContext.project_uuid
        self.pipeline = None
        self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
        self.secret_store = runtimeContext.secret_store

        self.trash_intermediate = runtimeContext.trash_intermediate
        if self.trash_intermediate and self.work_api != "containers":
            raise Exception(
                "--trash-intermediate is only supported with --api=containers."
            )

        self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl
        if self.intermediate_output_ttl and self.work_api != "containers":
            raise Exception(
                "--intermediate-output-ttl is only supported with --api=containers."
            )
        if self.intermediate_output_ttl < 0:
            raise Exception(
                "Invalid value %d for --intermediate-output-ttl, cannot be less than zero"
                % self.intermediate_output_ttl)

        if runtimeContext.submit_request_uuid and self.work_api != "containers":
            raise Exception(
                "--submit-request-uuid requires containers API, but using '{}' api"
                .format(self.work_api))

        default_storage_classes = ",".join([
            k for k, v in self.api.config().get("StorageClasses", {
                "default": {
                    "Default": True
                }
            }).items() if v.get("Default") is True
        ])
        if runtimeContext.storage_classes == "default":
            runtimeContext.storage_classes = default_storage_classes
        if runtimeContext.intermediate_storage_classes == "default":
            runtimeContext.intermediate_storage_classes = default_storage_classes

        if not runtimeContext.name:
            runtimeContext.name = self.name = updated_tool.tool.get(
                "label") or updated_tool.metadata.get(
                    "label") or os.path.basename(updated_tool.tool["id"])

        # Upload local file references in the job order.
        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
                                     updated_tool, job_order)

        # the last clause means: if it is a command line tool, and we
        # are going to wait for the result, and always_submit_runner
        # is false, then we don't submit a runner process.

        submitting = (runtimeContext.update_workflow
                      or runtimeContext.create_workflow or
                      (runtimeContext.submit
                       and not (updated_tool.tool["class"] == "CommandLineTool"
                                and runtimeContext.wait
                                and not runtimeContext.always_submit_runner)))

        loadingContext = self.loadingContext.copy()
        loadingContext.do_validate = False
        if submitting:
            loadingContext.do_update = False
            # Document may have been auto-updated. Reload the original
            # document with updating disabled because we want to
            # submit the document with its original CWL version, not
            # the auto-updated one.
            tool = load_tool(updated_tool.tool["id"], loadingContext)
        else:
            tool = updated_tool

        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
        # Also uploads docker images.
        merged_map = upload_workflow_deps(self, tool)

        # Recreate process object (ArvadosWorkflow or
        # ArvadosCommandTool) because tool document may have been
        # updated by upload_workflow_deps in ways that modify
        # inheritance of hints or requirements.
        loadingContext.loader = tool.doc_loader
        loadingContext.avsc_names = tool.doc_schema
        loadingContext.metadata = tool.metadata
        tool = load_tool(tool.tool, loadingContext)

        existing_uuid = runtimeContext.update_workflow
        if existing_uuid or runtimeContext.create_workflow:
            # Create a pipeline template or workflow record and exit.
            if self.work_api == "containers":
                uuid = upload_workflow(
                    self,
                    tool,
                    job_order,
                    self.project_uuid,
                    uuid=existing_uuid,
                    submit_runner_ram=runtimeContext.submit_runner_ram,
                    name=runtimeContext.name,
                    merged_map=merged_map,
                    submit_runner_image=runtimeContext.submit_runner_image)
                self.stdout.write(uuid + "\n")
                return (None, "success")

        self.apply_reqs(job_order, tool)

        self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse
        self.eval_timeout = runtimeContext.eval_timeout

        runtimeContext = runtimeContext.copy()
        runtimeContext.use_container = True
        runtimeContext.tmpdir_prefix = "tmp"
        runtimeContext.work_api = self.work_api

        if self.work_api == "containers":
            if self.ignore_docker_for_reuse:
                raise Exception(
                    "--ignore-docker-for-reuse not supported with containers API."
                )
            runtimeContext.outdir = "/var/spool/cwl"
            runtimeContext.docker_outdir = "/var/spool/cwl"
            runtimeContext.tmpdir = "/tmp"
            runtimeContext.docker_tmpdir = "/tmp"

        if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
            raise Exception("--priority must be in the range 1..1000.")

        if self.should_estimate_cache_size:
            visited = set()
            estimated_size = [0]

            def estimate_collection_cache(obj):
                if obj.get("location", "").startswith("keep:"):
                    m = pdh_size.match(obj["location"][5:])
                    if m and m.group(1) not in visited:
                        visited.add(m.group(1))
                        estimated_size[0] += int(m.group(2))

            visit_class(job_order, ("File", "Directory"),
                        estimate_collection_cache)
            runtimeContext.collection_cache_size = max(
                ((estimated_size[0] * 192) // (1024 * 1024)) + 1, 256)
            self.collection_cache.set_cap(
                runtimeContext.collection_cache_size * 1024 * 1024)

        logger.info("Using collection cache size %s MiB",
                    runtimeContext.collection_cache_size)

        runnerjob = None
        if runtimeContext.submit:
            # Submit a runner job to run the workflow for us.
            if self.work_api == "containers":
                if submitting:
                    tool = RunnerContainer(
                        self,
                        updated_tool,
                        tool,
                        loadingContext,
                        runtimeContext.enable_reuse,
                        self.output_name,
                        self.output_tags,
                        submit_runner_ram=runtimeContext.submit_runner_ram,
                        name=runtimeContext.name,
                        on_error=runtimeContext.on_error,
                        submit_runner_image=runtimeContext.submit_runner_image,
                        intermediate_output_ttl=runtimeContext.
                        intermediate_output_ttl,
                        merged_map=merged_map,
                        priority=runtimeContext.priority,
                        secret_store=self.secret_store,
                        collection_cache_size=runtimeContext.
                        collection_cache_size,
                        collection_cache_is_default=self.
                        should_estimate_cache_size)
                else:
                    runtimeContext.runnerjob = tool.tool["id"]

        if runtimeContext.cwl_runner_job is not None:
            self.uuid = runtimeContext.cwl_runner_job.get('uuid')

        jobiter = tool.job(job_order, self.output_callback, runtimeContext)

        if runtimeContext.submit and not runtimeContext.wait:
            runnerjob = next(jobiter)
            runnerjob.run(runtimeContext)
            self.stdout.write(runnerjob.uuid + "\n")
            return (None, "success")

        current_container = arvados_cwl.util.get_current_container(
            self.api, self.num_retries, logger)
        if current_container:
            logger.info("Running inside container %s",
                        current_container.get("uuid"))

        self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count)

        try:
            self.workflow_eval_lock.acquire()

            # Holds the lock while this code runs and releases it when
            # it is safe to do so in self.workflow_eval_lock.wait(),
            # at which point on_message can update job state and
            # process output callbacks.

            loopperf = Perf(metrics, "jobiter")
            loopperf.__enter__()
            for runnable in jobiter:
                loopperf.__exit__()

                if self.stop_polling.is_set():
                    break

                if self.task_queue.error is not None:
                    raise self.task_queue.error

                if runnable:
                    with Perf(metrics, "run"):
                        self.start_run(runnable, runtimeContext)
                else:
                    if (self.task_queue.in_flight + len(self.processes)) > 0:
                        self.workflow_eval_lock.wait(3)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable processes and not waiting on any pending processes."
                        )
                        break

                if self.stop_polling.is_set():
                    break

                loopperf.__enter__()
            loopperf.__exit__()

            while (self.task_queue.in_flight + len(self.processes)) > 0:
                if self.task_queue.error is not None:
                    raise self.task_queue.error
                self.workflow_eval_lock.wait(3)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info(
            )[0] is SystemExit:
                logger.error("Interrupted, workflow will be cancelled")
            elif isinstance(sys.exc_info()[1], WorkflowException):
                logger.error(
                    "Workflow execution failed:\n%s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            else:
                logger.exception("Workflow execution failed")

            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)

            if self.work_api == "containers" and not current_container:
                # Not running in a crunch container, so cancel any outstanding processes.
                for p in self.processes:
                    try:
                        self.api.container_requests().update(
                            uuid=p, body={
                                "priority": "0"
                            }).execute(num_retries=self.num_retries)
                    except Exception:
                        pass
        finally:
            self.workflow_eval_lock.release()
            self.task_queue.drain()
            self.stop_polling.set()
            self.polling_thread.join()
            self.task_queue.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if runtimeContext.submit and isinstance(tool, Runner):
            logger.info("Final output collection %s", tool.final_output)
            if workbench2 or workbench1:
                logger.info("Output at %scollections/%s", workbench2
                            or workbench1, tool.final_output)
        else:
            if self.output_name is None:
                self.output_name = "Output of %s" % (shortname(
                    tool.tool["id"]))
            if self.output_tags is None:
                self.output_tags = ""

            storage_classes = ""
            storage_class_req, _ = tool.get_requirement(
                "http://arvados.org/cwl#OutputStorageClass")
            if storage_class_req and storage_class_req.get(
                    "finalStorageClass"):
                storage_classes = aslist(
                    storage_class_req["finalStorageClass"])
            else:
                storage_classes = runtimeContext.storage_classes.strip().split(
                    ",")

            self.final_output, self.final_output_collection = self.make_output_collection(
                self.output_name, storage_classes, self.output_tags,
                self.final_output)
            self.set_crunch_output()

        if runtimeContext.compute_checksum:
            adjustDirObjs(self.final_output,
                          partial(get_listing, self.fs_access))
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        if self.trash_intermediate and self.final_status == "success":
            self.trash_intermediate_output()

        return (self.final_output, self.final_status)
示例#5
0
    def run(self, runtimeContext):
        # ArvadosCommandTool subclasses from cwltool.CommandLineTool,
        # which calls makeJobRunner() to get a new ArvadosContainer
        # object.  The fields that define execution such as
        # command_line, environment, etc are set on the
        # ArvadosContainer object by CommandLineTool.job() before
        # run() is called.

        runtimeContext = self.job_runtime

        container_request = {
            "command": self.command_line,
            "name": self.name,
            "output_path": self.outdir,
            "cwd": self.outdir,
            "priority": runtimeContext.priority,
            "state": "Committed",
            "properties": {},
        }
        runtime_constraints = {}

        if runtimeContext.project_uuid:
            container_request["owner_uuid"] = runtimeContext.project_uuid

        if self.arvrunner.secret_store.has_secret(self.command_line):
            raise WorkflowException(
                "Secret material leaked on command line, only file literals may contain secrets"
            )

        if self.arvrunner.secret_store.has_secret(self.environment):
            raise WorkflowException(
                "Secret material leaked in environment, only file literals may contain secrets"
            )

        resources = self.builder.resources
        if resources is not None:
            runtime_constraints["vcpus"] = math.ceil(resources.get("cores", 1))
            runtime_constraints["ram"] = math.ceil(
                resources.get("ram") * 2**20)

        mounts = {
            self.outdir: {
                "kind": "tmp",
                "capacity": math.ceil(resources.get("outdirSize", 0) * 2**20)
            },
            self.tmpdir: {
                "kind": "tmp",
                "capacity": math.ceil(resources.get("tmpdirSize", 0) * 2**20)
            }
        }
        secret_mounts = {}
        scheduling_parameters = {}

        rf = [
            self.pathmapper.mapper(f) for f in self.pathmapper.referenced_files
        ]
        rf.sort(key=lambda k: k.resolved)
        prevdir = None
        for resolved, target, tp, stg in rf:
            if not stg:
                continue
            if prevdir and target.startswith(prevdir):
                continue
            if tp == "Directory":
                targetdir = target
            else:
                targetdir = os.path.dirname(target)
            sp = resolved.split("/", 1)
            pdh = sp[0][5:]  # remove "keep:"
            mounts[targetdir] = {
                "kind": "collection",
                "portable_data_hash": pdh
            }
            if pdh in self.pathmapper.pdh_to_uuid:
                mounts[targetdir]["uuid"] = self.pathmapper.pdh_to_uuid[pdh]
            if len(sp) == 2:
                if tp == "Directory":
                    path = sp[1]
                else:
                    path = os.path.dirname(sp[1])
                if path and path != "/":
                    mounts[targetdir]["path"] = path
            prevdir = targetdir + "/"

        with Perf(metrics, "generatefiles %s" % self.name):
            if self.generatefiles["listing"]:
                vwd = arvados.collection.Collection(
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                generatemapper = NoFollowPathMapper(
                    self.generatefiles["listing"], "", "", separateDirs=False)

                sorteditems = sorted(generatemapper.items(),
                                     key=lambda n: n[1].target)

                logger.debug("generatemapper is %s", sorteditems)

                with Perf(metrics, "createfiles %s" % self.name):
                    for f, p in sorteditems:
                        if not p.target:
                            pass
                        elif p.type in ("File", "Directory", "WritableFile",
                                        "WritableDirectory"):
                            if p.resolved.startswith("_:"):
                                vwd.mkdirs(p.target)
                            else:
                                source, path = self.arvrunner.fs_access.get_collection(
                                    p.resolved)
                                vwd.copy(path or ".",
                                         p.target,
                                         source_collection=source)
                        elif p.type == "CreateFile":
                            if self.arvrunner.secret_store.has_secret(
                                    p.resolved):
                                secret_mounts[
                                    "%s/%s" % (self.outdir, p.target)] = {
                                        "kind":
                                        "text",
                                        "content":
                                        self.arvrunner.secret_store.retrieve(
                                            p.resolved)
                                    }
                            else:
                                with vwd.open(p.target, "w") as n:
                                    n.write(p.resolved)

                def keepemptydirs(p):
                    if isinstance(p, arvados.collection.RichCollectionBase):
                        if len(p) == 0:
                            p.open(".keep", "w").close()
                        else:
                            for c in p:
                                keepemptydirs(p[c])

                keepemptydirs(vwd)

                if not runtimeContext.current_container:
                    runtimeContext.current_container = arvados_cwl.util.get_current_container(
                        self.arvrunner.api, self.arvrunner.num_retries, logger)
                info = arvados_cwl.util.get_intermediate_collection_info(
                    self.name, runtimeContext.current_container,
                    runtimeContext.intermediate_output_ttl)
                vwd.save_new(name=info["name"],
                             owner_uuid=runtimeContext.project_uuid,
                             ensure_unique_name=True,
                             trash_at=info["trash_at"],
                             properties=info["properties"])

                prev = None
                for f, p in sorteditems:
                    if (not p.target or self.arvrunner.secret_store.has_secret(
                            p.resolved) or
                        (prev is not None and p.target.startswith(prev))):
                        continue
                    mountpoint = "%s/%s" % (self.outdir, p.target)
                    mounts[mountpoint] = {
                        "kind": "collection",
                        "portable_data_hash": vwd.portable_data_hash(),
                        "path": p.target
                    }
                    if p.type.startswith("Writable"):
                        mounts[mountpoint]["writable"] = True
                    prev = p.target + "/"

        container_request["environment"] = {
            "TMPDIR": self.tmpdir,
            "HOME": self.outdir
        }
        if self.environment:
            container_request["environment"].update(self.environment)

        if self.stdin:
            sp = self.stdin[6:].split("/", 1)
            mounts["stdin"] = {
                "kind": "collection",
                "portable_data_hash": sp[0],
                "path": sp[1]
            }

        if self.stderr:
            mounts["stderr"] = {
                "kind": "file",
                "path": "%s/%s" % (self.outdir, self.stderr)
            }

        if self.stdout:
            mounts["stdout"] = {
                "kind": "file",
                "path": "%s/%s" % (self.outdir, self.stdout)
            }

        (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
        if not docker_req:
            docker_req = {"dockerImageId": "arvados/jobs"}

        container_request["container_image"] = arv_docker_get_image(
            self.arvrunner.api, docker_req, runtimeContext.pull_image,
            runtimeContext.project_uuid)

        network_req, _ = self.get_requirement("NetworkAccess")
        if network_req:
            runtime_constraints["API"] = network_req["networkAccess"]

        api_req, _ = self.get_requirement(
            "http://arvados.org/cwl#APIRequirement")
        if api_req:
            runtime_constraints["API"] = True

        runtime_req, _ = self.get_requirement(
            "http://arvados.org/cwl#RuntimeConstraints")
        if runtime_req:
            if "keep_cache" in runtime_req:
                runtime_constraints["keep_cache_ram"] = math.ceil(
                    runtime_req["keep_cache"] * 2**20)
            if "outputDirType" in runtime_req:
                if runtime_req["outputDirType"] == "local_output_dir":
                    # Currently the default behavior.
                    pass
                elif runtime_req["outputDirType"] == "keep_output_dir":
                    mounts[self.outdir] = {
                        "kind": "collection",
                        "writable": True
                    }

        partition_req, _ = self.get_requirement(
            "http://arvados.org/cwl#PartitionRequirement")
        if partition_req:
            scheduling_parameters["partitions"] = aslist(
                partition_req["partition"])

        intermediate_output_req, _ = self.get_requirement(
            "http://arvados.org/cwl#IntermediateOutput")
        if intermediate_output_req:
            self.output_ttl = intermediate_output_req["outputTTL"]
        else:
            self.output_ttl = self.arvrunner.intermediate_output_ttl

        if self.output_ttl < 0:
            raise WorkflowException(
                "Invalid value %d for output_ttl, cannot be less than zero" %
                container_request["output_ttl"])

        if self.timelimit is not None and self.timelimit > 0:
            scheduling_parameters["max_run_time"] = self.timelimit

        extra_submit_params = {}
        if runtimeContext.submit_runner_cluster:
            extra_submit_params[
                "cluster_id"] = runtimeContext.submit_runner_cluster

        container_request["output_name"] = "Output for step %s" % (self.name)
        container_request["output_ttl"] = self.output_ttl
        container_request["mounts"] = mounts
        container_request["secret_mounts"] = secret_mounts
        container_request["runtime_constraints"] = runtime_constraints
        container_request["scheduling_parameters"] = scheduling_parameters

        enable_reuse = runtimeContext.enable_reuse
        if enable_reuse:
            reuse_req, _ = self.get_requirement("WorkReuse")
            if reuse_req:
                enable_reuse = reuse_req["enableReuse"]
            reuse_req, _ = self.get_requirement(
                "http://arvados.org/cwl#ReuseRequirement")
            if reuse_req:
                enable_reuse = reuse_req["enableReuse"]
        container_request["use_existing"] = enable_reuse

        if runtimeContext.runnerjob.startswith("arvwf:"):
            wfuuid = runtimeContext.runnerjob[6:runtimeContext.runnerjob.
                                              index("#")]
            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(
                num_retries=self.arvrunner.num_retries)
            if container_request["name"] == "main":
                container_request["name"] = wfrecord["name"]
            container_request["properties"]["template_uuid"] = wfuuid

        self.output_callback = self.arvrunner.get_wrapped_callback(
            self.output_callback)

        try:
            if runtimeContext.submit_request_uuid:
                response = self.arvrunner.api.container_requests().update(
                    uuid=runtimeContext.submit_request_uuid,
                    body=container_request,
                    **extra_submit_params).execute(
                        num_retries=self.arvrunner.num_retries)
            else:
                response = self.arvrunner.api.container_requests().create(
                    body=container_request, **extra_submit_params).execute(
                        num_retries=self.arvrunner.num_retries)

            self.uuid = response["uuid"]
            self.arvrunner.process_submitted(self)

            if response["state"] == "Final":
                logger.info("%s reused container %s",
                            self.arvrunner.label(self),
                            response["container_uuid"])
            else:
                logger.info("%s %s state is %s", self.arvrunner.label(self),
                            response["uuid"], response["state"])
        except Exception:
            logger.exception("%s got an error", self.arvrunner.label(self))
            self.output_callback({}, "permanentFail")
示例#6
0
    def arv_executor(self, tool, job_order, runtimeContext, logger=None):
        self.debug = runtimeContext.debug

        tool.visit(self.check_features)

        self.project_uuid = runtimeContext.project_uuid
        self.pipeline = None
        self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
        self.secret_store = runtimeContext.secret_store

        self.trash_intermediate = runtimeContext.trash_intermediate
        if self.trash_intermediate and self.work_api != "containers":
            raise Exception(
                "--trash-intermediate is only supported with --api=containers."
            )

        self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl
        if self.intermediate_output_ttl and self.work_api != "containers":
            raise Exception(
                "--intermediate-output-ttl is only supported with --api=containers."
            )
        if self.intermediate_output_ttl < 0:
            raise Exception(
                "Invalid value %d for --intermediate-output-ttl, cannot be less than zero"
                % self.intermediate_output_ttl)

        if runtimeContext.submit_request_uuid and self.work_api != "containers":
            raise Exception(
                "--submit-request-uuid requires containers API, but using '{}' api"
                .format(self.work_api))

        if not runtimeContext.name:
            runtimeContext.name = self.name = tool.tool.get(
                "label") or tool.metadata.get("label") or os.path.basename(
                    tool.tool["id"])

        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
        # Also uploads docker images.
        merged_map = upload_workflow_deps(self, tool)

        # Reload tool object which may have been updated by
        # upload_workflow_deps
        # Don't validate this time because it will just print redundant errors.
        loadingContext = self.loadingContext.copy()
        loadingContext.loader = tool.doc_loader
        loadingContext.avsc_names = tool.doc_schema
        loadingContext.metadata = tool.metadata
        loadingContext.do_validate = False

        tool = self.arv_make_tool(tool.doc_loader.idx[tool.tool["id"]],
                                  loadingContext)

        # Upload local file references in the job order.
        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
                                     tool, job_order)

        existing_uuid = runtimeContext.update_workflow
        if existing_uuid or runtimeContext.create_workflow:
            # Create a pipeline template or workflow record and exit.
            if self.work_api == "jobs":
                tmpl = RunnerTemplate(
                    self,
                    tool,
                    job_order,
                    runtimeContext.enable_reuse,
                    uuid=existing_uuid,
                    submit_runner_ram=runtimeContext.submit_runner_ram,
                    name=runtimeContext.name,
                    merged_map=merged_map,
                    loadingContext=loadingContext)
                tmpl.save()
                # cwltool.main will write our return value to stdout.
                return (tmpl.uuid, "success")
            elif self.work_api == "containers":
                return (upload_workflow(
                    self,
                    tool,
                    job_order,
                    self.project_uuid,
                    uuid=existing_uuid,
                    submit_runner_ram=runtimeContext.submit_runner_ram,
                    name=runtimeContext.name,
                    merged_map=merged_map), "success")

        self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse
        self.eval_timeout = runtimeContext.eval_timeout

        runtimeContext = runtimeContext.copy()
        runtimeContext.use_container = True
        runtimeContext.tmpdir_prefix = "tmp"
        runtimeContext.work_api = self.work_api

        if self.work_api == "containers":
            if self.ignore_docker_for_reuse:
                raise Exception(
                    "--ignore-docker-for-reuse not supported with containers API."
                )
            runtimeContext.outdir = "/var/spool/cwl"
            runtimeContext.docker_outdir = "/var/spool/cwl"
            runtimeContext.tmpdir = "/tmp"
            runtimeContext.docker_tmpdir = "/tmp"
        elif self.work_api == "jobs":
            if runtimeContext.priority != DEFAULT_PRIORITY:
                raise Exception("--priority not implemented for jobs API.")
            runtimeContext.outdir = "$(task.outdir)"
            runtimeContext.docker_outdir = "$(task.outdir)"
            runtimeContext.tmpdir = "$(task.tmpdir)"

        if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
            raise Exception("--priority must be in the range 1..1000.")

        if self.should_estimate_cache_size:
            visited = set()
            estimated_size = [0]

            def estimate_collection_cache(obj):
                if obj.get("location", "").startswith("keep:"):
                    m = pdh_size.match(obj["location"][5:])
                    if m and m.group(1) not in visited:
                        visited.add(m.group(1))
                        estimated_size[0] += int(m.group(2))

            visit_class(job_order, ("File", "Directory"),
                        estimate_collection_cache)
            runtimeContext.collection_cache_size = max(
                ((estimated_size[0] * 192) / (1024 * 1024)) + 1, 256)
            self.collection_cache.set_cap(
                runtimeContext.collection_cache_size * 1024 * 1024)

        logger.info("Using collection cache size %s MiB",
                    runtimeContext.collection_cache_size)

        runnerjob = None
        if runtimeContext.submit:
            # Submit a runner job to run the workflow for us.
            if self.work_api == "containers":
                if tool.tool[
                        "class"] == "CommandLineTool" and runtimeContext.wait and (
                            not runtimeContext.always_submit_runner):
                    runtimeContext.runnerjob = tool.tool["id"]
                else:
                    tool = RunnerContainer(
                        self,
                        tool,
                        loadingContext,
                        runtimeContext.enable_reuse,
                        self.output_name,
                        self.output_tags,
                        submit_runner_ram=runtimeContext.submit_runner_ram,
                        name=runtimeContext.name,
                        on_error=runtimeContext.on_error,
                        submit_runner_image=runtimeContext.submit_runner_image,
                        intermediate_output_ttl=runtimeContext.
                        intermediate_output_ttl,
                        merged_map=merged_map,
                        priority=runtimeContext.priority,
                        secret_store=self.secret_store,
                        collection_cache_size=runtimeContext.
                        collection_cache_size,
                        collection_cache_is_default=self.
                        should_estimate_cache_size)
            elif self.work_api == "jobs":
                tool = RunnerJob(
                    self,
                    tool,
                    loadingContext,
                    runtimeContext.enable_reuse,
                    self.output_name,
                    self.output_tags,
                    submit_runner_ram=runtimeContext.submit_runner_ram,
                    name=runtimeContext.name,
                    on_error=runtimeContext.on_error,
                    submit_runner_image=runtimeContext.submit_runner_image,
                    merged_map=merged_map)
        elif runtimeContext.cwl_runner_job is None and self.work_api == "jobs":
            # Create pipeline for local run
            self.pipeline = self.api.pipeline_instances().create(
                body={
                    "owner_uuid":
                    self.project_uuid,
                    "name":
                    runtimeContext.name if runtimeContext.
                    name else shortname(tool.tool["id"]),
                    "components": {},
                    "state":
                    "RunningOnClient"
                }).execute(num_retries=self.num_retries)
            logger.info("Pipeline instance %s", self.pipeline["uuid"])

        if runtimeContext.cwl_runner_job is not None:
            self.uuid = runtimeContext.cwl_runner_job.get('uuid')

        jobiter = tool.job(job_order, self.output_callback, runtimeContext)

        if runtimeContext.submit and not runtimeContext.wait:
            runnerjob = jobiter.next()
            runnerjob.run(runtimeContext)
            return (runnerjob.uuid, "success")

        current_container = arvados_cwl.util.get_current_container(
            self.api, self.num_retries, logger)
        if current_container:
            logger.info("Running inside container %s",
                        current_container.get("uuid"))

        self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count)

        try:
            self.workflow_eval_lock.acquire()

            # Holds the lock while this code runs and releases it when
            # it is safe to do so in self.workflow_eval_lock.wait(),
            # at which point on_message can update job state and
            # process output callbacks.

            loopperf = Perf(metrics, "jobiter")
            loopperf.__enter__()
            for runnable in jobiter:
                loopperf.__exit__()

                if self.stop_polling.is_set():
                    break

                if self.task_queue.error is not None:
                    raise self.task_queue.error

                if runnable:
                    with Perf(metrics, "run"):
                        self.start_run(runnable, runtimeContext)
                else:
                    if (self.task_queue.in_flight + len(self.processes)) > 0:
                        self.workflow_eval_lock.wait(3)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable processes and not waiting on any pending processes."
                        )
                        break

                if self.stop_polling.is_set():
                    break

                loopperf.__enter__()
            loopperf.__exit__()

            while (self.task_queue.in_flight + len(self.processes)) > 0:
                if self.task_queue.error is not None:
                    raise self.task_queue.error
                self.workflow_eval_lock.wait(3)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info(
            )[0] is SystemExit:
                logger.error("Interrupted, workflow will be cancelled")
            else:
                logger.error(
                    "Execution failed:\n%s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)
            if runtimeContext.submit and isinstance(tool, Runner):
                runnerjob = tool
                if runnerjob.uuid and self.work_api == "containers":
                    self.api.container_requests().update(
                        uuid=runnerjob.uuid, body={
                            "priority": "0"
                        }).execute(num_retries=self.num_retries)
        finally:
            self.workflow_eval_lock.release()
            self.task_queue.drain()
            self.stop_polling.set()
            self.polling_thread.join()
            self.task_queue.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if runtimeContext.submit and isinstance(tool, Runner):
            logger.info("Final output collection %s", tool.final_output)
        else:
            if self.output_name is None:
                self.output_name = "Output of %s" % (shortname(
                    tool.tool["id"]))
            if self.output_tags is None:
                self.output_tags = ""

            storage_classes = runtimeContext.storage_classes.strip().split(",")
            self.final_output, self.final_output_collection = self.make_output_collection(
                self.output_name, storage_classes, self.output_tags,
                self.final_output)
            self.set_crunch_output()

        if runtimeContext.compute_checksum:
            adjustDirObjs(self.final_output,
                          partial(get_listing, self.fs_access))
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        if self.trash_intermediate and self.final_status == "success":
            self.trash_intermediate_output()

        return (self.final_output, self.final_status)
示例#7
0
    def collect_output(
            self,
            schema,  # type: Dict[Text, Any]
            builder,  # type: Builder
            outdir,  # type: Text
            fs_access,  # type: StdFsAccess
            compute_checksum=True  # type: bool
    ):
        # type: (...) -> Optional[Union[Dict[Text, Any], List[Union[Dict[Text, Any], Text]]]]
        """
        Collect outputs from the step :term:`Process` following its execution.

        .. note:
            When :term:`CWL` runner tries to forward ``step(i) outputs -> step(i+1) inputs``
            using :meth:`collect_outputs`, it expects exact ``outputBindings`` locations to be matched.
            In other words, a definition like ``outputBindings: {glob: outputs/*.txt}`` will generate results located
            in ``step(i)`` as ``"<tmp-workdir>/outputs/file.txt"`` and ``step(i+1)`` will look explicitly
            in ``"<tmp-workdir>/outputs`` using the ``glob`` pattern. Because each of our :term:`Process` in
            the workflow are distinct/remote entities, each one stages its outputs at different URL locations,
            not sharing the same *root directory*. When we stage intermediate results locally, the sub-dirs are lost.
            Therefore, they act like individual :term:`CWL` runner calls where the *final results* are moved back
            to the local directory for convenient access, but our *local directory* is the URL WPS-outputs location.
            To let :term:`CWL` :term:`Workflow` inter-steps mapping work as intended, we must remap the locations
            ignoring any nested dirs where the modified *outputBindings* definition will be able to match as if each
            step :term:`Process` outputs were generated locally.
        """
        result = []  # type: List[Any]
        empty_and_optional = False
        debug = LOGGER.isEnabledFor(logging.DEBUG)
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            globpatterns = []  # type: List[Text]

            revmap = partial(command_line_tool.revmap_file, builder, outdir)

            if "glob" in binding:
                with SourceLine(binding, "glob", WorkflowException, debug):
                    for glob in aslist(binding["glob"]):
                        glob = builder.do_eval(glob)
                        if glob:
                            globpatterns.extend(aslist(glob))

                    # rebase glob pattern as applicable (see note)
                    for glob in list(globpatterns):
                        if not any(
                                glob.startswith(part)
                                for part in [".", "/", "~"]) and "/" in glob:
                            glob = builder.do_eval(glob.split("/")[-1])
                            if glob:
                                globpatterns.extend(aslist(glob))

                    for glob in globpatterns:
                        if glob.startswith(outdir):
                            glob = glob[len(outdir) + 1:]
                        elif glob == ".":
                            glob = outdir
                        elif glob.startswith("/"):
                            raise WorkflowException(
                                "glob patterns must not start with '/'")
                        try:
                            prefix = fs_access.glob(outdir)
                            key = cmp_to_key(
                                cast(Callable[[Text, Text], int],
                                     locale.strcoll))

                            # In case of stdout.log or stderr.log file not created
                            if "stdout" in self.tool and "stderr" in self.tool \
                                    and glob in (self.tool["stdout"], self.tool["stderr"]):
                                filepath = Path(fs_access.join(outdir, glob))
                                if not filepath.is_file():
                                    Path(filepath).touch()

                            result.extend([{
                                "location":
                                g,
                                "path":
                                fs_access.join(builder.outdir,
                                               g[len(prefix[0]) + 1:]),
                                "basename":
                                os.path.basename(g),
                                "nameroot":
                                os.path.splitext(os.path.basename(g))[0],
                                "nameext":
                                os.path.splitext(os.path.basename(g))[1],
                                "class":
                                "File" if fs_access.isfile(g) else "Directory"
                            } for g in sorted(fs_access.glob(
                                fs_access.join(outdir, glob)),
                                              key=key)])
                        except (OSError, IOError) as exc:
                            LOGGER.warning(Text(exc))
                        except Exception:
                            LOGGER.exception("Unexpected error from fs_access")
                            raise

                for files in result:
                    rfile = files.copy()
                    # TODO This function raise an exception and seems to be related to docker (which is not used here)
                    # revmap(rfile)
                    if files["class"] == "Directory":
                        load_listing = builder.loadListing or (
                            binding and binding.get("loadListing"))
                        if load_listing and load_listing != "no_listing":
                            get_listing(fs_access, files,
                                        (load_listing == "deep_listing"))
                    else:
                        with fs_access.open(rfile["location"], "rb") as f:
                            contents = b""
                            if binding.get("loadContents") or compute_checksum:
                                contents = f.read(CONTENT_LIMIT)
                            if binding.get("loadContents"):
                                files["contents"] = contents.decode("utf-8")
                            if compute_checksum:
                                checksum = hashlib.sha1()  # nosec: B303
                                while contents != b"":
                                    checksum.update(contents)
                                    contents = f.read(1024 * 1024)
                                files[
                                    "checksum"] = f"sha1${checksum.hexdigest()}"
                            f.seek(0, 2)
                            file_size = f.tell()
                        files["size"] = file_size

            optional = False
            single = False
            if isinstance(schema["type"], list):
                if "null" in schema["type"]:
                    optional = True
                if "File" in schema["type"] or "Directory" in schema["type"]:
                    single = True
            elif schema["type"] == "File" or schema["type"] == "Directory":
                single = True

            if "outputEval" in binding:
                with SourceLine(binding, "outputEval", WorkflowException,
                                debug):
                    result = builder.do_eval(binding["outputEval"],
                                             context=result)

            if single:
                if not result and not optional:
                    with SourceLine(binding, "glob", WorkflowException, debug):
                        raise WorkflowException(
                            f"Did not find output file with glob pattern: '{globpatterns}'"
                        )
                elif not result and optional:
                    pass
                elif isinstance(result, list):
                    if len(result) > 1:
                        raise WorkflowException(
                            "Multiple matches for output item that is a single file."
                        )
                    result = result[0]

            if "secondaryFiles" in schema:
                with SourceLine(schema, "secondaryFiles", WorkflowException,
                                debug):
                    for primary in aslist(result):
                        if isinstance(primary, dict):
                            primary.setdefault("secondaryFiles", [])
                            pathprefix = primary["path"][0:primary["path"].
                                                         rindex("/") + 1]
                            for file in aslist(schema["secondaryFiles"]):
                                if isinstance(
                                        file,
                                        dict) or "$(" in file or "${" in file:
                                    sfpath = builder.do_eval(file,
                                                             context=primary)
                                    subst = False
                                else:
                                    sfpath = file
                                    subst = True
                                for sfitem in aslist(sfpath):
                                    if isinstance(sfitem, str):
                                        if subst:
                                            sfitem = {
                                                "path":
                                                substitute(
                                                    primary["path"], sfitem)
                                            }
                                        else:
                                            sfitem = {
                                                "path": pathprefix + sfitem
                                            }
                                    if "path" in sfitem and "location" not in sfitem:
                                        revmap(sfitem)
                                    if fs_access.isfile(sfitem["location"]):
                                        sfitem["class"] = "File"
                                        primary["secondaryFiles"].append(
                                            sfitem)
                                    elif fs_access.isdir(sfitem["location"]):
                                        sfitem["class"] = "Directory"
                                        primary["secondaryFiles"].append(
                                            sfitem)

            if "format" in schema:
                for primary in aslist(result):
                    primary["format"] = builder.do_eval(schema["format"],
                                                        context=primary)

            # Ensure files point to local references outside of the run environment
            # TODO: Again removing revmap....
            # adjustFileObjs(result, revmap)

            if not result and optional:
                return None

        if not empty_and_optional and isinstance(
                schema["type"], dict) and schema["type"]["type"] == "record":
            out = {}
            for f in schema["type"]["fields"]:
                out[shortname(
                    f["name"])] = self.collect_output(  # type: ignore
                        f,
                        builder,
                        outdir,
                        fs_access,
                        compute_checksum=compute_checksum)
            return out
        return result
示例#8
0
    def add_volumes(self, pathmapper, mounts, secret_store=None):
        # type: (PathMapper, List[Text], SecretStore) -> None

        host_outdir = self.outdir
        container_outdir = self.builder.outdir

        for src, vol in pathmapper.items():
            if not vol.staged:
                continue
            host_outdir_tgt = None  # type: Optional[Text]
            if vol.target.startswith(container_outdir+"/"):
                host_outdir_tgt = os.path.join(
                    host_outdir, vol.target[len(container_outdir)+1:])
            if vol.type in ("File", "Directory"):
                if not vol.resolved.startswith("_:"):
                    mounts.append(u"%s:%s" % (
                        docker_windows_path_adjust(vol.resolved),
                        docker_windows_path_adjust(vol.target)))
            elif vol.type == "WritableFile":
                if self.inplace_update:
                    mounts.append(u"%s:%s" % (
                        docker_windows_path_adjust(vol.resolved),
                        docker_windows_path_adjust(vol.target)))
                else:
                    if host_outdir_tgt:
                        shutil.copy(vol.resolved, host_outdir_tgt)
                        ensure_writable(host_outdir_tgt)
                    else:
                        raise WorkflowException(
                            "Unable to compute host_outdir_tgt for "
                            "WriteableFile.")
            elif vol.type == "WritableDirectory":
                if vol.resolved.startswith("_:"):
                    if host_outdir_tgt:
                        os.makedirs(host_outdir_tgt, 0o0755)
                    else:
                        raise WorkflowException(
                            "Unable to compute host_outdir_tgt for "
                            "WritableDirectory.")
                else:
                    if self.inplace_update:
                        mounts.append(u"%s:%s" % (
                            docker_windows_path_adjust(vol.resolved),
                            docker_windows_path_adjust(vol.target)))
                    else:
                        if host_outdir_tgt:
                            shutil.copytree(vol.resolved, host_outdir_tgt)
                            ensure_writable(host_outdir_tgt)
                        else:
                            raise WorkflowException(
                                "Unable to compute host_outdir_tgt for "
                                "WritableDirectory.")
            elif vol.type == "CreateFile":
                if secret_store:
                    contents = secret_store.retrieve(vol.resolved)
                else:
                    contents = vol.resolved
                if host_outdir_tgt:
                    with open(host_outdir_tgt, "wb") as f:
                        f.write(contents.encode("utf-8"))
                else:
                    fd, createtmp = tempfile.mkstemp(dir=self.tmpdir)
                    with os.fdopen(fd, "wb") as f:
                        f.write(contents.encode("utf-8"))
                    mounts.append(u"%s:%s" % (
                        docker_windows_path_adjust(createtmp),
                        docker_windows_path_adjust(vol.target)))
        return mounts
示例#9
0
    def executor(self, tool, job_order, **kwargs):
        final_output = []
        final_status = []

        def output_callback(out, status):
            final_status.append(status)
            final_output.append(out)

        if "basedir" not in kwargs:
            raise WorkflowException("Must provide 'basedir' in kwargs")

        output_dirs = set()

        if kwargs.get("outdir"):
            finaloutdir = os.path.abspath(kwargs.get("outdir"))
        else:
            finaloutdir = None

        if kwargs.get("tmp_outdir_prefix"):
            kwargs["outdir"] = tempfile.mkdtemp(
                prefix=kwargs["tmp_outdir_prefix"]
            )
        else:
            kwargs["outdir"] = tempfile.mkdtemp()

        output_dirs.add(kwargs["outdir"])
        kwargs["mutation_manager"] = MutationManager()

        jobReqs = None
        if "cwl:requirements" in job_order:
            jobReqs = job_order["cwl:requirements"]
        elif ("cwl:defaults" in tool.metadata and
              "cwl:requirements" in tool.metadata["cwl:defaults"]):
            jobReqs = tool.metadata["cwl:defaults"]["cwl:requirements"]

        if jobReqs:
            for req in jobReqs:
                tool.requirements.append(req)

        if kwargs.get("default_container"):
            tool.requirements.insert(0, {
                "class": "DockerRequirement",
                "dockerPull": kwargs["default_container"]
            })

        jobs = tool.job(job_order, output_callback, **kwargs)

        try:
            for runnable in jobs:
                if runnable:
                    builder = kwargs.get("builder", None)
                    if builder is not None:
                        runnable.builder = builder
                    if runnable.outdir:
                        output_dirs.add(runnable.outdir)
                    runnable.run(**kwargs)
                else:
                    time.sleep(1)
        except WorkflowException as e:
            raise e
        except Exception as e:
            log.error('Workflow error')
            raise WorkflowException(unicode(e))

        self.wait()
        log.info('All processes have joined')

        if final_output and final_output[0] and finaloutdir:
            final_output[0] = relocateOutputs(
                final_output[0], finaloutdir,
                output_dirs, kwargs.get("move_outputs"),
                kwargs["make_fs_access"](""))

        if kwargs.get("rm_tmpdir"):
            cleanIntermediate(output_dirs)

        if final_output and final_status:
            return (final_output[0], final_status[0])
        else:
            return (None, "permanentFail")
示例#10
0
 def make_workflow_exception(msg):
     name = shortname(port["id"])
     return WorkflowException(
         f"Error collecting output for parameter '{name}':\n{msg}"
     )
示例#11
0
def arv_docker_get_image(api_client, dockerRequirement, pull_image,
                         project_uuid, force_pull, tmp_outdir_prefix,
                         match_local_docker):
    """Check if a Docker image is available in Keep, if not, upload it using arv-keepdocker."""

    if "http://arvados.org/cwl#dockerCollectionPDH" in dockerRequirement:
        return dockerRequirement["http://arvados.org/cwl#dockerCollectionPDH"]

    if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
        dockerRequirement = copy.deepcopy(dockerRequirement)
        dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
        if hasattr(dockerRequirement, 'lc'):
            dockerRequirement.lc.data[
                "dockerImageId"] = dockerRequirement.lc.data["dockerPull"]

    global cached_lookups
    global cached_lookups_lock
    with cached_lookups_lock:
        if dockerRequirement["dockerImageId"] in cached_lookups:
            return cached_lookups[dockerRequirement["dockerImageId"]]

    with SourceLine(dockerRequirement, "dockerImageId", WorkflowException,
                    logger.isEnabledFor(logging.DEBUG)):
        sp = dockerRequirement["dockerImageId"].split(":")
        image_name = sp[0]
        image_tag = sp[1] if len(sp) > 1 else "latest"

        images = arvados.commands.keepdocker.list_images_in_arv(
            api_client, 3, image_name=image_name, image_tag=image_tag)

        if images and match_local_docker:
            local_image_id = determine_image_id(
                dockerRequirement["dockerImageId"])
            if local_image_id:
                # find it in the list
                found = False
                for i in images:
                    if i[1]["dockerhash"] == local_image_id:
                        found = True
                        images = [i]
                        break
                if not found:
                    # force re-upload.
                    images = []

        if not images:
            # Fetch Docker image if necessary.
            try:
                result = cwltool.docker.DockerCommandLineJob.get_image(
                    dockerRequirement, pull_image, force_pull,
                    tmp_outdir_prefix)
                if not result:
                    raise WorkflowException("Docker image '%s' not available" %
                                            dockerRequirement["dockerImageId"])
            except OSError as e:
                raise WorkflowException(
                    "While trying to get Docker image '%s', failed to execute 'docker': %s"
                    % (dockerRequirement["dockerImageId"], e))

            # Upload image to Arvados
            args = []
            if project_uuid:
                args.append("--project-uuid=" + project_uuid)
            args.append(image_name)
            args.append(image_tag)
            logger.info("Uploading Docker image %s:%s", image_name, image_tag)
            try:
                arvados.commands.put.api_client = api_client
                arvados.commands.keepdocker.main(args,
                                                 stdout=sys.stderr,
                                                 install_sig_handlers=False,
                                                 api=api_client)
            except SystemExit as e:
                # If e.code is None or zero, then keepdocker exited normally and we can continue
                if e.code:
                    raise WorkflowException("keepdocker exited with code %s" %
                                            e.code)

            images = arvados.commands.keepdocker.list_images_in_arv(
                api_client, 3, image_name=image_name, image_tag=image_tag)

        if not images:
            raise WorkflowException("Could not find Docker image %s:%s" %
                                    (image_name, image_tag))

        pdh = api_client.collections().get(
            uuid=images[0][0]).execute()["portable_data_hash"]

        with cached_lookups_lock:
            cached_lookups[dockerRequirement["dockerImageId"]] = pdh

    return pdh
示例#12
0
 def setUp(self):
     self.executor = ThreadPoolJobExecutor(1000, 2)
     self.workflow_exception = WorkflowException('workflow exception')
     self.logger = Mock()
示例#13
0
def validate_cluster_target(arvrunner, runtimeContext):
    if (runtimeContext.submit_runner_cluster and
        runtimeContext.submit_runner_cluster not in arvrunner.api._rootDesc["remoteHosts"] and
        runtimeContext.submit_runner_cluster != arvrunner.api._rootDesc["uuidPrefix"]):
        raise WorkflowException("Unknown or invalid cluster id '%s' known remote clusters are %s" % (runtimeContext.submit_runner_cluster,
                                                                                                  ", ".join(list(arvrunner.api._rootDesc["remoteHosts"].keys()))))
示例#14
0
def _parsl_execute(
    self,
    runtime,  # type: List[Text]
    env,  # type: MutableMapping[Text, Text]
    runtimeContext  # type: RuntimeContext
):  # type: (...) -> None

    scr, _ = self.get_requirement("ShellCommandRequirement")

    shouldquote = needs_shell_quoting_re.search  # type: Callable[[Any], Any]
    if scr:
        shouldquote = lambda x: False

    _logger.info(
        u"[job %s] %s$ %s%s%s%s", self.name, self.outdir, " \\\n    ".join([
            shellescape.quote(Text(arg))
            if shouldquote(Text(arg)) else Text(arg)
            for arg in (runtime + self.command_line)
        ]), u' < %s' % self.stdin if self.stdin else '', u' > %s' %
        os.path.join(self.outdir, self.stdout) if self.stdout else '',
        u' 2> %s' %
        os.path.join(self.outdir, self.stderr) if self.stderr else '')
    if self.joborder and runtimeContext.research_obj:
        job_order = self.joborder
        assert runtimeContext.prov_obj
        runtimeContext.prov_obj.used_artefacts(
            job_order, runtimeContext.process_run_id,
            runtimeContext.reference_locations, str(self.name))
    outputs = {}  # type: Dict[Text,Text]
    try:
        stdin_path = None
        if self.stdin:
            rmap = self.pathmapper.reversemap(self.stdin)
            if not rmap:
                raise WorkflowException("{} missing from pathmapper".format(
                    self.stdin))
            else:
                stdin_path = rmap[1]

        stderr_path = None
        if self.stderr:
            abserr = os.path.join(self.outdir, self.stderr)
            dnerr = os.path.dirname(abserr)
            if dnerr and not os.path.exists(dnerr):
                os.makedirs(dnerr)
            stderr_path = abserr

        stdout_path = None
        if self.stdout:
            absout = os.path.join(self.outdir, self.stdout)
            dn = os.path.dirname(absout)
            if dn and not os.path.exists(dn):
                os.makedirs(dn)
            stdout_path = absout

        commands = [Text(x) for x in (runtime + self.command_line)]
        if runtimeContext.secret_store:
            commands = runtimeContext.secret_store.retrieve(commands)
            env = runtimeContext.secret_store.retrieve(env)

        job_script_contents = None  # type: Optional[Text]
        builder = getattr(self, "builder", None)  # type: Builder
        if builder is not None:
            job_script_contents = builder.build_job_script(commands)

        print("Running my own execution layer")
        rcode = _job_popen(
            commands,
            stdin_path=stdin_path,
            stdout_path=stdout_path,
            stderr_path=stderr_path,
            env=env,
            cwd=self.outdir,
            job_dir=tempfile.mkdtemp(prefix=getdefault(
                runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)),
            job_script_contents=job_script_contents,
            timelimit=self.timelimit,
            name=self.name)

        if self.successCodes and rcode in self.successCodes:
            processStatus = "success"
        elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
            processStatus = "temporaryFail"
        elif self.permanentFailCodes and rcode in self.permanentFailCodes:
            processStatus = "permanentFail"
        elif rcode == 0:
            processStatus = "success"
        else:
            processStatus = "permanentFail"

        if self.generatefiles["listing"]:
            assert self.generatemapper is not None
            relink_initialworkdir(self.generatemapper,
                                  self.outdir,
                                  self.builder.outdir,
                                  inplace_update=self.inplace_update)

        outputs = self.collect_outputs(self.outdir)
        outputs = bytes2str_in_dicts(outputs)  # type: ignore
    except OSError as e:
        if e.errno == 2:
            if runtime:
                _logger.error(u"'%s' not found", runtime[0])
            else:
                _logger.error(u"'%s' not found", self.command_line[0])
        else:
            _logger.exception("Exception while running job")
        processStatus = "permanentFail"
    except WorkflowException as e:
        _logger.error(u"[job %s] Job error:\n%s" % (self.name, e))
        processStatus = "permanentFail"
    except Exception as e:
        _logger.exception("Exception while running job")
        processStatus = "permanentFail"
    if runtimeContext.research_obj and self.prov_obj and \
            runtimeContext.process_run_id:
        #creating entities for the outputs produced by each step (in the provenance document)
        self.prov_obj.generate_output_prov(outputs,
                                           runtimeContext.process_run_id,
                                           str(self.name))
        self.prov_obj.document.wasEndedBy(runtimeContext.process_run_id, None,
                                          self.prov_obj.workflow_run_uri,
                                          datetime.datetime.now())
    if processStatus != "success":
        _logger.warning(u"[job %s] completed %s", self.name, processStatus)
    else:
        _logger.info(u"[job %s] completed %s", self.name, processStatus)

    if _logger.isEnabledFor(logging.DEBUG):
        _logger.debug(u"[job %s] %s", self.name, json_dumps(outputs, indent=4))

    if self.generatemapper and runtimeContext.secret_store:
        # Delete any runtime-generated files containing secrets.
        for f, p in self.generatemapper.items():
            if p.type == "CreateFile":
                if runtimeContext.secret_store.has_secret(p.resolved):
                    host_outdir = self.outdir
                    container_outdir = self.builder.outdir
                    host_outdir_tgt = p.target
                    if p.target.startswith(container_outdir + "/"):
                        host_outdir_tgt = os.path.join(
                            host_outdir, p.target[len(container_outdir) + 1:])
                    os.remove(host_outdir_tgt)

    if runtimeContext.workflow_eval_lock is None:
        raise WorkflowException(
            "runtimeContext.workflow_eval_lock must not be None")

    with runtimeContext.workflow_eval_lock:
        self.output_callback(outputs, processStatus)

    if self.stagedir and os.path.exists(self.stagedir):
        _logger.debug(u"[job %s] Removing input staging directory %s",
                      self.name, self.stagedir)
        shutil.rmtree(self.stagedir, True)

    if runtimeContext.rm_tmpdir:
        _logger.debug(u"[job %s] Removing temporary directory %s", self.name,
                      self.tmpdir)
        shutil.rmtree(self.tmpdir, True)
示例#15
0
    def collect_output_ports(
        self,
        ports,  # type: Set[Dict[Text, Any]]
        builder,  # type: Builder
        outdir,  # type: Text
        compute_checksum=True,  # type: bool
        jobname="",  # type: Text
        readers=None  # type: Dict[Text, Any]
    ):  # type: (...) -> OutputPorts
        ret = {}  # type: OutputPorts
        debug = LOGGER.isEnabledFor(logging.DEBUG)
        try:
            fs_access = builder.make_fs_access(outdir)
            custom_output = fs_access.join(outdir, "cwl.output.json")
            if fs_access.exists(custom_output):
                with fs_access.open(custom_output, "r") as f:
                    ret = json.load(f)
                if debug:
                    LOGGER.debug(u"Raw output from %s: %s", custom_output,
                                 json.dumps(ret, indent=4))
            else:
                for i, port in enumerate(ports):

                    def make_workflow_exception(msg):
                        return WorkflowException(
                            u"Error collecting output for parameter '%s':\n%s"
                            % (shortname(port["id"]), msg))

                    with SourceLine(ports, i, make_workflow_exception, debug):
                        fragment = shortname(port["id"])
                        ret[fragment] = self.collect_output(
                            port,
                            builder,
                            outdir,
                            fs_access,
                            compute_checksum=compute_checksum)
            if ret:
                # revmap = partial(command_line_tool.revmap_file, builder, outdir)
                adjustDirObjs(ret, trim_listing)

                # TODO: Attempt to avoid a crash because the revmap fct is not functional
                #       (intend for a docker usage only?)
                # visit_class(ret, ("File", "Directory"), cast(Callable[[Any], Any], revmap))
                visit_class(ret, ("File", "Directory"),
                            command_line_tool.remove_path)
                normalizeFilesDirs(ret)
                visit_class(
                    ret, ("File", "Directory"),
                    partial(command_line_tool.check_valid_locations,
                            fs_access))

                if compute_checksum:
                    adjustFileObjs(ret, partial(compute_checksums, fs_access))

            validate.validate_ex(self.names.get_name("outputs_record_schema",
                                                     ""),
                                 ret,
                                 strict=False,
                                 logger=LOGGER)
            if ret is not None and builder.mutation_manager is not None:
                adjustFileObjs(ret, builder.mutation_manager.set_generation)
            return ret if ret is not None else {}
        except validate.ValidationException as exc:
            raise WorkflowException(
                "Error validating output record: {!s}\nIn:\n{}".format(
                    exc, json.dumps(ret, indent=4)))
        finally:
            if builder.mutation_manager and readers:
                for reader in readers.values():
                    builder.mutation_manager.release_reader(jobname, reader)
示例#16
0
    def done(self, record):
        try:
            self.update_pipeline_component(record)
        except:
            pass

        try:
            if record["state"] == "Complete":
                processStatus = "success"
            else:
                processStatus = "permanentFail"

            try:
                outputs = {}
                if record["output"]:
                    logc = arvados.collection.Collection(record["log"])
                    log = logc.open(logc.keys()[0])
                    tmpdir = None
                    outdir = None
                    keepdir = None
                    for l in log:
                        # Determine the tmpdir, outdir and keepdir paths from
                        # the job run.  Unfortunately, we can't take the first
                        # values we find (which are expected to be near the
                        # top) and stop scanning because if the node fails and
                        # the job restarts on a different node these values
                        # will different runs, and we need to know about the
                        # final run that actually produced output.

                        g = tmpdirre.match(l)
                        if g:
                            tmpdir = g.group(1)
                        g = outdirre.match(l)
                        if g:
                            outdir = g.group(1)
                        g = keepre.match(l)
                        if g:
                            keepdir = g.group(1)

                    colname = "Output %s of %s" % (record["output"][0:7],
                                                   self.name)

                    # check if collection already exists with same owner, name and content
                    collection_exists = self.arvrunner.api.collections().list(
                        filters=[[
                            "owner_uuid", "=", self.arvrunner.project_uuid
                        ], ['portable_data_hash', '=', record["output"]],
                                 ["name", "=", colname]]).execute(
                                     num_retries=self.arvrunner.num_retries)

                    if not collection_exists["items"]:
                        # Create a collection located in the same project as the
                        # pipeline with the contents of the output.
                        # First, get output record.
                        collections = self.arvrunner.api.collections().list(
                            limit=1,
                            filters=[[
                                'portable_data_hash', '=', record["output"]
                            ]],
                            select=[
                                "manifest_text"
                            ]).execute(num_retries=self.arvrunner.num_retries)

                        if not collections["items"]:
                            raise WorkflowException(
                                "Job output '%s' cannot be found on API server"
                                % (record["output"]))

                        # Create new collection in the parent project
                        # with the output contents.
                        self.arvrunner.api.collections().create(
                            body={
                                "owner_uuid":
                                self.arvrunner.project_uuid,
                                "name":
                                colname,
                                "portable_data_hash":
                                record["output"],
                                "manifest_text":
                                collections["items"][0]["manifest_text"]
                            },
                            ensure_unique_name=True).execute(
                                num_retries=self.arvrunner.num_retries)

                    self.builder.outdir = outdir
                    self.builder.pathmapper.keepdir = keepdir
                    outputs = self.collect_outputs("keep:" + record["output"])
            except WorkflowException as e:
                logger.error("Error while collecting job outputs:\n%s",
                             e,
                             exc_info=(e if self.arvrunner.debug else False))
                processStatus = "permanentFail"
            except Exception as e:
                logger.exception(
                    "Got unknown exception while collecting job outputs:")
                processStatus = "permanentFail"

            self.output_callback(outputs, processStatus)
        finally:
            del self.arvrunner.jobs[record["uuid"]]
示例#17
0
 def make_workflow_exception(msg):
     return WorkflowException(
         u"Error collecting output for parameter '%s':\n%s"
         % (shortname(port["id"]), msg))
示例#18
0
    def run(self, runtimeContext):
        # type: (RuntimeContext) -> None

        (docker_req, docker_is_req) = self.get_requirement("DockerRequirement")
        self.prov_obj = runtimeContext.prov_obj
        img_id = None
        env = cast(MutableMapping[Text, Text], os.environ)
        user_space_docker_cmd = runtimeContext.user_space_docker_cmd
        if docker_req and user_space_docker_cmd:
            # For user-space docker implementations, a local image name or ID
            # takes precedence over a network pull
            if 'dockerImageId' in docker_req:
                img_id = str(docker_req["dockerImageId"])
            elif 'dockerPull' in docker_req:
                img_id = str(docker_req["dockerPull"])
            # else:
            #     raise WorkflowException(SourceLine(docker_req).makeError(
            #         "Docker image must be specified as 'dockerImageId' or "
            #         "'dockerPull' when using user space implementations of "
            #         "Docker"))
        else:
            try:
                if docker_req and runtimeContext.use_container:
                    img_id = str(
                        self.get_from_requirements(
                            docker_req, True, runtimeContext.pull_image,
                            getdefault(runtimeContext.force_docker_pull, False),
                            getdefault(runtimeContext.tmp_outdir_prefix, DEFAULT_TMP_PREFIX)))
                if img_id is None:
                    if self.builder.find_default_container:
                        default_container = self.builder.find_default_container()
                        if default_container:
                            img_id = str(default_container)

                if docker_req and img_id is None and runtimeContext.use_container:
                    raise Exception("Docker image not available")

                if self.prov_obj and img_id and runtimeContext.process_run_id:
                    # TODO: Integrate with record_container_id
                    container_agent = self.prov_obj.document.agent(
                        uuid.uuid4().urn,
                        {"prov:type": PROV["SoftwareAgent"],
                         "cwlprov:image": img_id,
                         "prov:label": "Container execution of image %s" % img_id})
                    # FIXME: img_id is not a sha256 id, it might just be "debian:8"
                    #img_entity = document.entity("nih:sha-256;%s" % img_id,
                    #                  {"prov:label": "Container image %s" % img_id} )
                    # The image is the plan for this activity-agent association
                    #document.wasAssociatedWith(process_run_ID, container_agent, img_entity)
                    self.prov_obj.document.wasAssociatedWith(
                        runtimeContext.process_run_id, container_agent)
            except Exception as err:
                container = "Shifter"
                _logger.debug("%s error", container, exc_info=True)
                if docker_is_req:
                    raise UnsupportedRequirement(
                        "%s is required to run this tool: %s" % (container, err))
                else:
                    raise WorkflowException(
                        "{0} is not available for this tool, try "
                        "--no-container to disable {0}, or install "
                        "a user space Docker replacement like uDocker with "
                        "--user-space-docker-cmd.: {1}".format(container, err))

        self._setup(runtimeContext)
        stageFiles(self.pathmapper, ignoreWritable=True, symLink=True,
            secret_store=runtimeContext.secret_store)
        runtime = self.create_runtime(env, runtimeContext, img_id)
        self._execute(runtime, env, runtimeContext)
示例#19
0
    def collect_output(
            self,
            schema,  # type: Dict[Text, Any]
            builder,  # type: Builder
            outdir,  # type: Text
            fs_access,  # type: StdFsAccess
            compute_checksum=True  # type: bool
    ):
        # type: (...) -> Optional[Union[Dict[Text, Any], List[Union[Dict[Text, Any], Text]]]]
        result = []  # type: List[Any]
        empty_and_optional = False
        debug = LOGGER.isEnabledFor(logging.DEBUG)
        if "outputBinding" in schema:
            binding = schema["outputBinding"]
            globpatterns = []  # type: List[Text]

            revmap = partial(command_line_tool.revmap_file, builder, outdir)

            if "glob" in binding:
                with SourceLine(binding, "glob", WorkflowException, debug):
                    for glob in aslist(binding["glob"]):
                        glob = builder.do_eval(glob)
                        if glob:
                            globpatterns.extend(aslist(glob))

                    for glob in globpatterns:
                        if glob.startswith(outdir):
                            glob = glob[len(outdir) + 1:]
                        elif glob == ".":
                            glob = outdir
                        elif glob.startswith("/"):
                            raise WorkflowException(
                                "glob patterns must not start with '/'")
                        try:
                            prefix = fs_access.glob(outdir)
                            key = cmp_to_key(
                                cast(Callable[[Text, Text], int],
                                     locale.strcoll))

                            # In case of stdout.log or stderr.log file not created
                            if "stdout" in self.tool and "stderr" in self.tool \
                                    and glob in (self.tool["stdout"], self.tool["stderr"]):
                                filepath = Path(fs_access.join(outdir, glob))
                                if not filepath.is_file():
                                    Path(filepath).touch()

                            result.extend([{
                                "location":
                                g,
                                "path":
                                fs_access.join(builder.outdir,
                                               g[len(prefix[0]) + 1:]),
                                "basename":
                                os.path.basename(g),
                                "nameroot":
                                os.path.splitext(os.path.basename(g))[0],
                                "nameext":
                                os.path.splitext(os.path.basename(g))[1],
                                "class":
                                "File" if fs_access.isfile(g) else "Directory"
                            } for g in sorted(fs_access.glob(
                                fs_access.join(outdir, glob)),
                                              key=key)])
                        except (OSError, IOError) as exc:
                            LOGGER.warning(Text(exc))
                        except Exception:
                            LOGGER.exception("Unexpected error from fs_access")
                            raise

                for files in result:
                    rfile = files.copy()
                    # TODO This function raise an exception and seems to be related to docker (which is not used here)
                    # revmap(rfile)
                    if files["class"] == "Directory":
                        load_listing = builder.loadListing or (
                            binding and binding.get("loadListing"))
                        if load_listing and load_listing != "no_listing":
                            get_listing(fs_access, files,
                                        (load_listing == "deep_listing"))
                    else:
                        with fs_access.open(rfile["location"], "rb") as f:
                            contents = b""
                            if binding.get("loadContents") or compute_checksum:
                                contents = f.read(CONTENT_LIMIT)
                            if binding.get("loadContents"):
                                files["contents"] = contents.decode("utf-8")
                            if compute_checksum:
                                checksum = hashlib.sha1()  # nosec: B303
                                while contents != b"":
                                    checksum.update(contents)
                                    contents = f.read(1024 * 1024)
                                files[
                                    "checksum"] = "sha1$%s" % checksum.hexdigest(
                                    )
                            f.seek(0, 2)
                            file_size = f.tell()
                        files["size"] = file_size

            optional = False
            single = False
            if isinstance(schema["type"], list):
                if "null" in schema["type"]:
                    optional = True
                if "File" in schema["type"] or "Directory" in schema["type"]:
                    single = True
            elif schema["type"] == "File" or schema["type"] == "Directory":
                single = True

            if "outputEval" in binding:
                with SourceLine(binding, "outputEval", WorkflowException,
                                debug):
                    result = builder.do_eval(binding["outputEval"],
                                             context=result)

            if single:
                if not result and not optional:
                    with SourceLine(binding, "glob", WorkflowException, debug):
                        raise WorkflowException(
                            "Did not find output file with glob pattern: '{}'".
                            format(globpatterns))
                elif not result and optional:
                    pass
                elif isinstance(result, list):
                    if len(result) > 1:
                        raise WorkflowException(
                            "Multiple matches for output item that is a single file."
                        )
                    result = result[0]

            if "secondaryFiles" in schema:
                with SourceLine(schema, "secondaryFiles", WorkflowException,
                                debug):
                    for primary in aslist(result):
                        if isinstance(primary, dict):
                            primary.setdefault("secondaryFiles", [])
                            pathprefix = primary["path"][0:primary["path"].
                                                         rindex("/") + 1]
                            for file in aslist(schema["secondaryFiles"]):
                                if isinstance(
                                        file,
                                        dict) or "$(" in file or "${" in file:
                                    sfpath = builder.do_eval(file,
                                                             context=primary)
                                    subst = False
                                else:
                                    sfpath = file
                                    subst = True
                                for sfitem in aslist(sfpath):
                                    if isinstance(sfitem, str):
                                        if subst:
                                            sfitem = {
                                                "path":
                                                substitute(
                                                    primary["path"], sfitem)
                                            }
                                        else:
                                            sfitem = {
                                                "path": pathprefix + sfitem
                                            }
                                    if "path" in sfitem and "location" not in sfitem:
                                        revmap(sfitem)
                                    if fs_access.isfile(sfitem["location"]):
                                        sfitem["class"] = "File"
                                        primary["secondaryFiles"].append(
                                            sfitem)
                                    elif fs_access.isdir(sfitem["location"]):
                                        sfitem["class"] = "Directory"
                                        primary["secondaryFiles"].append(
                                            sfitem)

            if "format" in schema:
                for primary in aslist(result):
                    primary["format"] = builder.do_eval(schema["format"],
                                                        context=primary)

            # Ensure files point to local references outside of the run environment
            # TODO: Again removing revmap....
            # adjustFileObjs(result, revmap)

            if not result and optional:
                return None

        if not empty_and_optional and isinstance(
                schema["type"], dict) and schema["type"]["type"] == "record":
            out = {}
            for f in schema["type"]["fields"]:
                out[shortname(
                    f["name"])] = self.collect_output(  # type: ignore
                        f,
                        builder,
                        outdir,
                        fs_access,
                        compute_checksum=compute_checksum)
            return out
        return result
    def executor(self, tool, job_order, runtimeContext, **kwargs):
        """Executor method."""
        final_output = []
        final_status = []

        def output_callback(out, processStatus):
            final_status.append(processStatus)
            final_output.append(out)

        if not runtimeContext.basedir:
            raise WorkflowException('`runtimeContext` should contain a '
                                    '`basedir`')

        output_dirs = set()

        if runtimeContext.outdir:
            finaloutdir = os.path.abspath(runtimeContext.outdir)
        else:
            finaloutdir = None
        if runtimeContext.tmp_outdir_prefix:
            runtimeContext.outdir = tempfile.mkdtemp(
                prefix=runtimeContext.tmp_outdir_prefix)
        else:
            runtimeContext.outdir = tempfile.mkdtemp()

        output_dirs.add(runtimeContext.outdir)
        runtimeContext.mutation_manager = MutationManager()

        jobReqs = None
        if "cwl:requirements" in job_order:
            jobReqs = job_order["cwl:requirements"]
        elif ("cwl:defaults" in tool.metadata
              and "cwl:requirements" in tool.metadata["cwl:defaults"]):
            jobReqs = tool.metadata["cwl:defaults"]["cwl:requirements"]
        if jobReqs:
            for req in jobReqs:
                tool.requirements.append(req)

        if not runtimeContext.default_container:
            runtimeContext.default_container = 'frolvlad/alpine-bash'
        runtimeContext.docker_outdir = os.path.join(runtimeContext.working_dir,
                                                    "cwl/docker_outdir")
        runtimeContext.docker_tmpdir = os.path.join(runtimeContext.working_dir,
                                                    "cwl/docker_tmpdir")
        runtimeContext.docker_stagedir = os.path.join(
            runtimeContext.working_dir, "cwl/docker_stagedir")

        jobs = tool.job(job_order, output_callback, runtimeContext)
        try:
            for runnable in jobs:
                if runnable:
                    if runtimeContext.builder:
                        runnable.builder = runtimeContext.builder
                    if runnable.outdir:
                        output_dirs.add(runnable.outdir)
                    runnable.run(runtimeContext)
                else:
                    # log.error(
                    #     "Workflow cannot make any more progress"
                    # )
                    # break
                    time.sleep(1)

        except WorkflowException as e:
            traceback.print_exc()
            raise e
        except Exception as e:
            traceback.print_exc()
            raise WorkflowException(str(e))

        # wait for all processes to finish
        self.wait()

        if final_output and final_output[0] and finaloutdir:
            final_output[0] = relocateOutputs(
                final_output[0], finaloutdir, output_dirs,
                runtimeContext.move_outputs, runtimeContext.make_fs_access(""))

        if runtimeContext.rm_tmpdir:
            cleanIntermediate(output_dirs)

        if final_output and final_status:
            return str(final_output[0]), str(final_status[0])
        else:
            return None, "permanentFail"
示例#21
0
    def execute(self, runtime, env, runtime_context):  # noqa: E811
        # type: (List[Text], MutableMapping[Text, Text], RuntimeContext) -> None

        self.results = self.wps_process.execute(self.builder.job, self.outdir,
                                                self.expected_outputs)

        if self.joborder and runtime_context.research_obj:
            job_order = self.joborder
            assert runtime_context.prov_obj
            assert runtime_context.process_run_id
            runtime_context.prov_obj.used_artefacts(
                job_order, runtime_context.process_run_id, str(self.name))
        outputs = {}  # type: Dict[Text, Text]
        try:
            rcode = 0

            if self.successCodes:
                process_status = "success"
            elif self.temporaryFailCodes:
                process_status = "temporaryFail"
            elif self.permanentFailCodes:
                process_status = "permanentFail"
            elif rcode == 0:
                process_status = "success"
            else:
                process_status = "permanentFail"

            if self.generatefiles["listing"]:
                assert self.generatemapper is not None
                relink_initialworkdir(self.generatemapper,
                                      self.outdir,
                                      self.builder.outdir,
                                      inplace_update=self.inplace_update)

            outputs = self.collect_outputs(self.outdir)
            outputs = bytes2str_in_dicts(outputs)  # type: ignore
        except OSError as exc:
            if exc.errno == 2:
                if runtime:
                    LOGGER.exception(u"'%s' not found", runtime[0])
                else:
                    LOGGER.exception(u"'%s' not found", self.command_line[0])
            else:
                LOGGER.exception("Exception while running job")
            process_status = "permanentFail"
        except WorkflowException as err:
            LOGGER.exception(u"[job %s] Job error:\n%s", self.name, err)
            process_status = "permanentFail"
        except Exception:  # noqa: W0703 # nosec: B110
            LOGGER.exception("Exception while running job")
            process_status = "permanentFail"
        if runtime_context.research_obj and self.prov_obj and \
                runtime_context.process_run_id:
            # creating entities for the outputs produced by each step (in the provenance document)
            self.prov_obj.generate_output_prov(outputs,
                                               runtime_context.process_run_id,
                                               str(self.name))
            self.prov_obj.document.wasEndedBy(runtime_context.process_run_id,
                                              None,
                                              self.prov_obj.workflow_run_uri,
                                              now())
        if process_status != "success":
            LOGGER.warning(u"[job %s] completed %s", self.name, process_status)
        else:
            LOGGER.info(u"[job %s] completed %s", self.name, process_status)

        if LOGGER.isEnabledFor(logging.DEBUG):
            LOGGER.debug(u"[job %s] %s", self.name,
                         json.dumps(outputs, indent=4))

        if self.generatemapper and runtime_context.secret_store:
            # Delete any runtime-generated files containing secrets.
            for _, path_item in self.generatemapper.items():
                if path_item.type == "CreateFile":
                    if runtime_context.secret_store.has_secret(
                            path_item.resolved):
                        host_outdir = self.outdir
                        container_outdir = self.builder.outdir
                        host_outdir_tgt = path_item.target
                        if path_item.target.startswith(container_outdir + "/"):
                            host_outdir_tgt = os.path.join(
                                host_outdir,
                                path_item.target[len(container_outdir) + 1:])
                        os.remove(host_outdir_tgt)

        if runtime_context.workflow_eval_lock is None:
            raise WorkflowException(
                "runtime_context.workflow_eval_lock must not be None")

        with runtime_context.workflow_eval_lock:
            self.output_callback(outputs, process_status)

        if self.stagedir and os.path.exists(self.stagedir):
            LOGGER.debug(u"[job %s] Removing input staging directory %s",
                         self.name, self.stagedir)
            shutil.rmtree(self.stagedir, True)

        if runtime_context.rm_tmpdir:
            LOGGER.debug(u"[job %s] Removing temporary directory %s",
                         self.name, self.tmpdir)
            shutil.rmtree(self.tmpdir, True)
示例#22
0
    def arv_executor(self, tool, job_order, **kwargs):
        self.debug = kwargs.get("debug")

        tool.visit(self.check_features)

        self.project_uuid = kwargs.get("project_uuid")
        self.pipeline = None
        make_fs_access = kwargs.get("make_fs_access") or partial(
            CollectionFsAccess, collection_cache=self.collection_cache)
        self.fs_access = make_fs_access(kwargs["basedir"])

        self.trash_intermediate = kwargs["trash_intermediate"]
        if self.trash_intermediate and self.work_api != "containers":
            raise Exception(
                "--trash-intermediate is only supported with --api=containers."
            )

        self.intermediate_output_ttl = kwargs["intermediate_output_ttl"]
        if self.intermediate_output_ttl and self.work_api != "containers":
            raise Exception(
                "--intermediate-output-ttl is only supported with --api=containers."
            )
        if self.intermediate_output_ttl < 0:
            raise Exception(
                "Invalid value %d for --intermediate-output-ttl, cannot be less than zero"
                % self.intermediate_output_ttl)

        if not kwargs.get("name"):
            kwargs["name"] = self.name = tool.tool.get(
                "label") or tool.metadata.get("label") or os.path.basename(
                    tool.tool["id"])

        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
        # Also uploads docker images.
        override_tools = {}
        upload_workflow_deps(self, tool, override_tools)

        # Reload tool object which may have been updated by
        # upload_workflow_deps
        tool = self.arv_make_tool(tool.doc_loader.idx[tool.tool["id"]],
                                  makeTool=self.arv_make_tool,
                                  loader=tool.doc_loader,
                                  avsc_names=tool.doc_schema,
                                  metadata=tool.metadata,
                                  override_tools=override_tools)

        # Upload local file references in the job order.
        job_order = upload_job_order(self, "%s input" % kwargs["name"], tool,
                                     job_order)

        existing_uuid = kwargs.get("update_workflow")
        if existing_uuid or kwargs.get("create_workflow"):
            # Create a pipeline template or workflow record and exit.
            if self.work_api == "jobs":
                tmpl = RunnerTemplate(
                    self,
                    tool,
                    job_order,
                    kwargs.get("enable_reuse"),
                    uuid=existing_uuid,
                    submit_runner_ram=kwargs.get("submit_runner_ram"),
                    name=kwargs["name"])
                tmpl.save()
                # cwltool.main will write our return value to stdout.
                return (tmpl.uuid, "success")
            elif self.work_api == "containers":
                return (upload_workflow(
                    self,
                    tool,
                    job_order,
                    self.project_uuid,
                    uuid=existing_uuid,
                    submit_runner_ram=kwargs.get("submit_runner_ram"),
                    name=kwargs["name"]), "success")

        self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")

        kwargs["make_fs_access"] = make_fs_access
        kwargs["enable_reuse"] = kwargs.get("enable_reuse")
        kwargs["use_container"] = True
        kwargs["tmpdir_prefix"] = "tmp"
        kwargs["compute_checksum"] = kwargs.get("compute_checksum")

        if self.work_api == "containers":
            kwargs["outdir"] = "/var/spool/cwl"
            kwargs["docker_outdir"] = "/var/spool/cwl"
            kwargs["tmpdir"] = "/tmp"
            kwargs["docker_tmpdir"] = "/tmp"
        elif self.work_api == "jobs":
            kwargs["outdir"] = "$(task.outdir)"
            kwargs["docker_outdir"] = "$(task.outdir)"
            kwargs["tmpdir"] = "$(task.tmpdir)"

        runnerjob = None
        if kwargs.get("submit"):
            # Submit a runner job to run the workflow for us.
            if self.work_api == "containers":
                if tool.tool["class"] == "CommandLineTool" and kwargs.get(
                        "wait"):
                    kwargs["runnerjob"] = tool.tool["id"]
                    runnerjob = tool.job(job_order, self.output_callback,
                                         **kwargs).next()
                else:
                    runnerjob = RunnerContainer(
                        self,
                        tool,
                        job_order,
                        kwargs.get("enable_reuse"),
                        self.output_name,
                        self.output_tags,
                        submit_runner_ram=kwargs.get("submit_runner_ram"),
                        name=kwargs.get("name"),
                        on_error=kwargs.get("on_error"),
                        submit_runner_image=kwargs.get("submit_runner_image"),
                        intermediate_output_ttl=kwargs.get(
                            "intermediate_output_ttl"))
            elif self.work_api == "jobs":
                runnerjob = RunnerJob(
                    self,
                    tool,
                    job_order,
                    kwargs.get("enable_reuse"),
                    self.output_name,
                    self.output_tags,
                    submit_runner_ram=kwargs.get("submit_runner_ram"),
                    name=kwargs.get("name"),
                    on_error=kwargs.get("on_error"),
                    submit_runner_image=kwargs.get("submit_runner_image"))
        elif "cwl_runner_job" not in kwargs and self.work_api == "jobs":
            # Create pipeline for local run
            self.pipeline = self.api.pipeline_instances().create(
                body={
                    "owner_uuid":
                    self.project_uuid,
                    "name":
                    kwargs["name"] if kwargs.
                    get("name") else shortname(tool.tool["id"]),
                    "components": {},
                    "state":
                    "RunningOnClient"
                }).execute(num_retries=self.num_retries)
            logger.info("Pipeline instance %s", self.pipeline["uuid"])

        if runnerjob and not kwargs.get("wait"):
            runnerjob.run(wait=kwargs.get("wait"))
            return (runnerjob.uuid, "success")

        self.poll_api = arvados.api('v1')
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        if runnerjob:
            jobiter = iter((runnerjob, ))
        else:
            if "cwl_runner_job" in kwargs:
                self.uuid = kwargs.get("cwl_runner_job").get('uuid')
            jobiter = tool.job(job_order, self.output_callback, **kwargs)

        try:
            self.cond.acquire()
            # Will continue to hold the lock for the duration of this code
            # except when in cond.wait(), at which point on_message can update
            # job state and process output callbacks.

            loopperf = Perf(metrics, "jobiter")
            loopperf.__enter__()
            for runnable in jobiter:
                loopperf.__exit__()

                if self.stop_polling.is_set():
                    break

                if runnable:
                    with Perf(metrics, "run"):
                        runnable.run(**kwargs)
                else:
                    if self.processes:
                        self.cond.wait(1)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable jobs and not waiting on any pending jobs."
                        )
                        break
                loopperf.__enter__()
            loopperf.__exit__()

            while self.processes:
                self.cond.wait(1)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt:
                logger.error("Interrupted, marking pipeline as failed")
            else:
                logger.error(
                    "Execution failed: %s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)
            if runnerjob and runnerjob.uuid and self.work_api == "containers":
                self.api.container_requests().update(
                    uuid=runnerjob.uuid, body={
                        "priority": "0"
                    }).execute(num_retries=self.num_retries)
        finally:
            self.cond.release()
            self.stop_polling.set()
            self.polling_thread.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if kwargs.get("submit") and isinstance(runnerjob, Runner):
            logger.info("Final output collection %s", runnerjob.final_output)
        else:
            if self.output_name is None:
                self.output_name = "Output of %s" % (shortname(
                    tool.tool["id"]))
            if self.output_tags is None:
                self.output_tags = ""
            self.final_output, self.final_output_collection = self.make_output_collection(
                self.output_name, self.output_tags, self.final_output)
            self.set_crunch_output()

        if kwargs.get("compute_checksum"):
            adjustDirObjs(self.final_output,
                          partial(get_listing, self.fs_access))
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        if self.trash_intermediate and self.final_status == "success":
            self.trash_intermediate_output()

        return (self.final_output, self.final_status)
示例#23
0
    def arv_executor(self, tool, job_order, **kwargs):
        self.debug = kwargs.get("debug")

        tool.visit(self.check_writable)

        self.project_uuid = kwargs.get("project_uuid")
        self.pipeline = None
        make_fs_access = kwargs.get("make_fs_access") or partial(
            CollectionFsAccess,
            api_client=self.api,
            keep_client=self.keep_client)
        self.fs_access = make_fs_access(kwargs["basedir"])

        existing_uuid = kwargs.get("update_workflow")
        if existing_uuid or kwargs.get("create_workflow"):
            if self.work_api == "jobs":
                tmpl = RunnerTemplate(
                    self,
                    tool,
                    job_order,
                    kwargs.get("enable_reuse"),
                    uuid=existing_uuid,
                    submit_runner_ram=kwargs.get("submit_runner_ram"),
                    name=kwargs.get("name"))
                tmpl.save()
                # cwltool.main will write our return value to stdout.
                return tmpl.uuid
            else:
                return upload_workflow(
                    self,
                    tool,
                    job_order,
                    self.project_uuid,
                    uuid=existing_uuid,
                    submit_runner_ram=kwargs.get("submit_runner_ram"),
                    name=kwargs.get("name"))

        self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")

        kwargs["make_fs_access"] = make_fs_access
        kwargs["enable_reuse"] = kwargs.get("enable_reuse")
        kwargs["use_container"] = True
        kwargs["tmpdir_prefix"] = "tmp"
        kwargs["on_error"] = "continue"
        kwargs["compute_checksum"] = kwargs.get("compute_checksum")

        if not kwargs["name"]:
            del kwargs["name"]

        if self.work_api == "containers":
            kwargs["outdir"] = "/var/spool/cwl"
            kwargs["docker_outdir"] = "/var/spool/cwl"
            kwargs["tmpdir"] = "/tmp"
            kwargs["docker_tmpdir"] = "/tmp"
        elif self.work_api == "jobs":
            kwargs["outdir"] = "$(task.outdir)"
            kwargs["docker_outdir"] = "$(task.outdir)"
            kwargs["tmpdir"] = "$(task.tmpdir)"

        upload_instance(self, shortname(tool.tool["id"]), tool, job_order)

        runnerjob = None
        if kwargs.get("submit"):
            if self.work_api == "containers":
                if tool.tool["class"] == "CommandLineTool":
                    kwargs["runnerjob"] = tool.tool["id"]
                    runnerjob = tool.job(job_order, self.output_callback,
                                         **kwargs).next()
                else:
                    runnerjob = RunnerContainer(
                        self,
                        tool,
                        job_order,
                        kwargs.get("enable_reuse"),
                        self.output_name,
                        self.output_tags,
                        submit_runner_ram=kwargs.get("submit_runner_ram"),
                        name=kwargs.get("name"))
            else:
                runnerjob = RunnerJob(
                    self,
                    tool,
                    job_order,
                    kwargs.get("enable_reuse"),
                    self.output_name,
                    self.output_tags,
                    submit_runner_ram=kwargs.get("submit_runner_ram"),
                    name=kwargs.get("name"))

        if not kwargs.get(
                "submit"
        ) and "cwl_runner_job" not in kwargs and not self.work_api == "containers":
            # Create pipeline for local run
            self.pipeline = self.api.pipeline_instances().create(
                body={
                    "owner_uuid":
                    self.project_uuid,
                    "name":
                    kwargs["name"] if kwargs.
                    get("name") else shortname(tool.tool["id"]),
                    "components": {},
                    "state":
                    "RunningOnClient"
                }).execute(num_retries=self.num_retries)
            logger.info("Pipeline instance %s", self.pipeline["uuid"])

        if runnerjob and not kwargs.get("wait"):
            runnerjob.run(wait=kwargs.get("wait"))
            return runnerjob.uuid

        self.poll_api = arvados.api('v1')
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        if runnerjob:
            jobiter = iter((runnerjob, ))
        else:
            if "cwl_runner_job" in kwargs:
                self.uuid = kwargs.get("cwl_runner_job").get('uuid')
            jobiter = tool.job(job_order, self.output_callback, **kwargs)

        try:
            self.cond.acquire()
            # Will continue to hold the lock for the duration of this code
            # except when in cond.wait(), at which point on_message can update
            # job state and process output callbacks.

            loopperf = Perf(metrics, "jobiter")
            loopperf.__enter__()
            for runnable in jobiter:
                loopperf.__exit__()

                if self.stop_polling.is_set():
                    break

                if runnable:
                    with Perf(metrics, "run"):
                        runnable.run(**kwargs)
                else:
                    if self.processes:
                        self.cond.wait(1)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable jobs and not waiting on any pending jobs."
                        )
                        break
                loopperf.__enter__()
            loopperf.__exit__()

            while self.processes:
                self.cond.wait(1)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt:
                logger.error("Interrupted, marking pipeline as failed")
            else:
                logger.error(
                    "Execution failed: %s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)
            if runnerjob and runnerjob.uuid and self.work_api == "containers":
                self.api.container_requests().update(
                    uuid=runnerjob.uuid, body={
                        "priority": "0"
                    }).execute(num_retries=self.num_retries)
        finally:
            self.cond.release()
            self.stop_polling.set()
            self.polling_thread.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if kwargs.get("submit") and isinstance(runnerjob, Runner):
            logger.info("Final output collection %s", runnerjob.final_output)
        else:
            if self.output_name is None:
                self.output_name = "Output of %s" % (shortname(
                    tool.tool["id"]))
            if self.output_tags is None:
                self.output_tags = ""
            self.final_output, self.final_output_collection = self.make_output_collection(
                self.output_name, self.output_tags, self.final_output)
            self.set_crunch_output()

        if self.final_status != "success":
            raise WorkflowException("Workflow failed.")

        if kwargs.get("compute_checksum"):
            adjustDirObjs(self.final_output, partial(getListing,
                                                     self.fs_access))
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        return self.final_output