Exemplo n.º 1
0
def test_pack_input_named_name() -> None:
    loadingContext, workflowobj, uri = fetch_document(
        get_data("tests/wf/trick_revsort.cwl")
    )
    loadingContext.do_update = False
    loadingContext, uri = resolve_and_validate_document(
        loadingContext, workflowobj, uri
    )
    loader = loadingContext.loader
    assert loader
    loader.resolve_ref(uri)[0]

    with open(get_data("tests/wf/expect_trick_packed.cwl")) as packed_file:
        expect_packed = yaml.main.round_trip_load(packed_file)

    packed = cwltool.pack.pack(loadingContext, uri)
    adjustFileObjs(
        packed, partial(make_relative, os.path.abspath(get_data("tests/wf")))
    )
    adjustDirObjs(packed, partial(make_relative, os.path.abspath(get_data("tests/wf"))))

    assert "$schemas" in packed
    assert len(packed["$schemas"]) == len(expect_packed["$schemas"])
    del packed["$schemas"]
    del expect_packed["$schemas"]

    assert packed == expect_packed
Exemplo n.º 2
0
def test_packing(unpacked: str, expected: str) -> None:
    """Compare expected version reality with various workflows and --pack."""
    loadingContext, workflowobj, uri = fetch_document(get_data(unpacked))
    loadingContext.do_update = False
    loadingContext, uri = resolve_and_validate_document(
        loadingContext, workflowobj, uri
    )

    packed = json.loads(print_pack(loadingContext, uri))
    context_dir = os.path.abspath(os.path.dirname(get_data(unpacked)))
    adjustFileObjs(packed, partial(make_relative, context_dir))
    adjustDirObjs(packed, partial(make_relative, context_dir))

    with open(get_data(expected)) as packed_file:
        expect_packed = json.load(packed_file)

    if "$schemas" in expect_packed:
        assert "$schemas" in packed
        packed_schemas = packed["$schemas"]
        assert isinstance(packed_schemas, Sized)
        assert len(packed_schemas) == len(expect_packed["$schemas"])
        del packed["$schemas"]
        del expect_packed["$schemas"]

    assert packed == expect_packed
Exemplo n.º 3
0
    def done(self, record):
        """Base method for handling a completed runner."""

        try:
            if record["state"] == "Complete":
                if record.get("exit_code") is not None:
                    if record["exit_code"] == 33:
                        processStatus = "UnsupportedRequirement"
                    elif record["exit_code"] == 0:
                        processStatus = "success"
                    else:
                        processStatus = "permanentFail"
                else:
                    processStatus = "success"
            else:
                processStatus = "permanentFail"

            outputs = {}

            if processStatus == "permanentFail":
                logc = arvados.collection.CollectionReader(
                    record["log"],
                    api_client=self.arvrunner.api,
                    keep_client=self.arvrunner.keep_client,
                    num_retries=self.arvrunner.num_retries)
                done.logtail(logc,
                             logger.error,
                             "%s (%s) error log:" %
                             (self.arvrunner.label(self), record["uuid"]),
                             maxlen=40)

            self.final_output = record["output"]
            outc = arvados.collection.CollectionReader(
                self.final_output,
                api_client=self.arvrunner.api,
                keep_client=self.arvrunner.keep_client,
                num_retries=self.arvrunner.num_retries)
            if "cwl.output.json" in outc:
                with outc.open("cwl.output.json", "rb") as f:
                    if f.size() > 0:
                        outputs = json.loads(f.read().decode())

            def keepify(fileobj):
                path = fileobj["location"]
                if not path.startswith("keep:"):
                    fileobj["location"] = "keep:%s/%s" % (record["output"],
                                                          path)

            adjustFileObjs(outputs, keepify)
            adjustDirObjs(outputs, keepify)
        except Exception:
            logger.exception("[%s] While getting final output object",
                             self.name)
            self.arvrunner.output_callback({}, "permanentFail")
        else:
            self.arvrunner.output_callback(outputs, processStatus)
Exemplo n.º 4
0
def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None,
                    submit_runner_ram=0, name=None, merged_map=None,
                    submit_runner_image=None):

    packed = packed_workflow(arvRunner, tool, merged_map)

    adjustDirObjs(job_order, trim_listing)
    adjustFileObjs(job_order, trim_anonymous_location)
    adjustDirObjs(job_order, trim_anonymous_location)

    main = [p for p in packed["$graph"] if p["id"] == "#main"][0]
    for inp in main["inputs"]:
        sn = shortname(inp["id"])
        if sn in job_order:
            inp["default"] = job_order[sn]

    if not name:
        name = tool.tool.get("label", os.path.basename(tool.tool["id"]))

    upload_dependencies(arvRunner, name, tool.doc_loader,
                        packed, tool.tool["id"], False)

    wf_runner_resources = None

    hints = main.get("hints", [])
    found = False
    for h in hints:
        if h["class"] == "http://arvados.org/cwl#WorkflowRunnerResources":
            wf_runner_resources = h
            found = True
            break
    if not found:
        wf_runner_resources = {"class": "http://arvados.org/cwl#WorkflowRunnerResources"}
        hints.append(wf_runner_resources)

    wf_runner_resources["acrContainerImage"] = arvados_jobs_image(arvRunner, submit_runner_image or "arvados/jobs:"+__version__)

    if submit_runner_ram:
        wf_runner_resources["ramMin"] = submit_runner_ram

    main["hints"] = hints

    body = {
        "workflow": {
            "name": name,
            "description": tool.tool.get("doc", ""),
            "definition":json.dumps(packed, sort_keys=True, indent=4, separators=(',',': '))
        }}
    if project_uuid:
        body["workflow"]["owner_uuid"] = project_uuid

    if uuid:
        call = arvRunner.api.workflows().update(uuid=uuid, body=body)
    else:
        call = arvRunner.api.workflows().create(body=body)
    return call.execute(num_retries=arvRunner.num_retries)["uuid"]
Exemplo n.º 5
0
def test_pack_fragment() -> None:
    with open(get_data("tests/wf/scatter2_subwf.cwl")) as packed_file:
        expect_packed = yaml.main.safe_load(packed_file)

    loadingContext, workflowobj, uri = fetch_document(get_data("tests/wf/scatter2.cwl"))
    packed = cwltool.pack.pack(loadingContext, uri + "#scatterstep/mysub")
    adjustFileObjs(
        packed, partial(make_relative, os.path.abspath(get_data("tests/wf")))
    )
    adjustDirObjs(packed, partial(make_relative, os.path.abspath(get_data("tests/wf"))))

    assert json.dumps(packed, sort_keys=True, indent=2) == json.dumps(
        expect_packed, sort_keys=True, indent=2
    )
Exemplo n.º 6
0
def test_pack() -> None:
    loadingContext, workflowobj, uri = fetch_document(get_data("tests/wf/revsort.cwl"))

    with open(get_data("tests/wf/expect_packed.cwl")) as packed_file:
        expect_packed = yaml.main.safe_load(packed_file)

    packed = cwltool.pack.pack(loadingContext, uri)
    adjustFileObjs(
        packed, partial(make_relative, os.path.abspath(get_data("tests/wf")))
    )
    adjustDirObjs(packed, partial(make_relative, os.path.abspath(get_data("tests/wf"))))

    assert "$schemas" in packed
    assert len(packed["$schemas"]) == len(expect_packed["$schemas"])
    del packed["$schemas"]
    del expect_packed["$schemas"]

    assert packed == expect_packed
Exemplo n.º 7
0
    def job(self, joborder, output_callback, runtimeContext):

        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
        runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)

        req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
        if not req:
            return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)

        # RunInSingleContainer is true

        with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
            if "id" not in self.tool:
                raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))

        discover_secondary_files(self.arvrunner.fs_access, builder,
                                 self.tool["inputs"], joborder)

        with Perf(metrics, "subworkflow upload_deps"):
            upload_dependencies(self.arvrunner,
                                os.path.basename(joborder.get("id", "#")),
                                self.doc_loader,
                                joborder,
                                joborder.get("id", "#"),
                                False)

            if self.wf_pdh is None:
                packed = pack(self.loadingContext, self.tool["id"], loader=self.doc_loader)

                for p in packed["$graph"]:
                    if p["id"] == "#main":
                        p["requirements"] = dedup_reqs(self.requirements)
                        p["hints"] = dedup_reqs(self.hints)

                def visit(item):
                    if "requirements" in item:
                        item["requirements"] = [i for i in item["requirements"] if i["class"] != "DockerRequirement"]
                    for t in ("hints", "requirements"):
                        if t not in item:
                            continue
                        for req in item[t]:
                            if req["class"] == "ResourceRequirement":
                                dyn = False
                                for k in max_res_pars + sum_res_pars:
                                    if k in req:
                                        if isinstance(req[k], basestring):
                                            if item["id"] == "#main":
                                                # only the top-level requirements/hints may contain expressions
                                                self.dynamic_resource_req.append(req)
                                                dyn = True
                                                break
                                            else:
                                                with SourceLine(req, k, WorkflowException):
                                                    raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
                                if not dyn:
                                    self.static_resource_req.append(req)

                visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)

                if self.static_resource_req:
                    self.static_resource_req = [get_overall_res_req(self.static_resource_req)]

                upload_dependencies(self.arvrunner,
                                    runtimeContext.name,
                                    self.doc_loader,
                                    packed,
                                    self.tool["id"],
                                    False)

                # Discover files/directories referenced by the
                # workflow (mainly "default" values)
                visit_class(packed, ("File", "Directory"), self.wf_reffiles.append)


        if self.dynamic_resource_req:
            # Evaluate dynamic resource requirements using current builder
            rs = copy.copy(self.static_resource_req)
            for dyn_rs in self.dynamic_resource_req:
                eval_req = {"class": "ResourceRequirement"}
                for a in max_res_pars + sum_res_pars:
                    if a in dyn_rs:
                        eval_req[a] = builder.do_eval(dyn_rs[a])
                rs.append(eval_req)
            job_res_reqs = [get_overall_res_req(rs)]
        else:
            job_res_reqs = self.static_resource_req

        with Perf(metrics, "subworkflow adjust"):
            joborder_resolved = copy.deepcopy(joborder)
            joborder_keepmount = copy.deepcopy(joborder)

            reffiles = []
            visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append)

            mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir,
                                   "/keep/%s",
                                   "/keep/%s/%s")

            # For containers API, we need to make sure any extra
            # referenced files (ie referenced by the workflow but
            # not in the inputs) are included in the mounts.
            if self.wf_reffiles:
                runtimeContext = runtimeContext.copy()
                runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles)

            def keepmount(obj):
                remove_redundant_fields(obj)
                with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)):
                    if "location" not in obj:
                        raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
                with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)):
                    if obj["location"].startswith("keep:"):
                        obj["location"] = mapper.mapper(obj["location"]).target
                        if "listing" in obj:
                            del obj["listing"]
                    elif obj["location"].startswith("_:"):
                        del obj["location"]
                    else:
                        raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])

            visit_class(joborder_keepmount, ("File", "Directory"), keepmount)

            def resolved(obj):
                if obj["location"].startswith("keep:"):
                    obj["location"] = mapper.mapper(obj["location"]).resolved

            visit_class(joborder_resolved, ("File", "Directory"), resolved)

            if self.wf_pdh is None:
                adjustFileObjs(packed, keepmount)
                adjustDirObjs(packed, keepmount)
                self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed)

        self.loadingContext = self.loadingContext.copy()
        self.loadingContext.metadata = self.loadingContext.metadata.copy()
        self.loadingContext.metadata["http://commonwl.org/cwltool#original_cwlVersion"] = "v1.0"

        if len(job_res_reqs) == 1:
            # RAM request needs to be at least 128 MiB or the workflow
            # runner itself won't run reliably.
            if job_res_reqs[0].get("ramMin", 1024) < 128:
                job_res_reqs[0]["ramMin"] = 128

        arguments = ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl", "cwl.input.yml"]
        if runtimeContext.debug:
            arguments.insert(0, '--debug')

        wf_runner = cmap({
            "class": "CommandLineTool",
            "baseCommand": "cwltool",
            "inputs": self.tool["inputs"],
            "outputs": self.tool["outputs"],
            "stdout": "cwl.output.json",
            "requirements": self.requirements+job_res_reqs+[
                {"class": "InlineJavascriptRequirement"},
                {
                "class": "InitialWorkDirRequirement",
                "listing": [{
                        "entryname": "workflow.cwl",
                        "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh
                    }, {
                        "entryname": "cwl.input.yml",
                        "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
                    }]
            }],
            "hints": self.hints,
            "arguments": arguments,
            "id": "#"
        })
        return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext)
Exemplo n.º 8
0
    def arvados_job_spec(self, runtimeContext):
        """Create an Arvados container request for this workflow.

        The returned dict can be used to create a container passed as
        the +body+ argument to container_requests().create().
        """

        adjustDirObjs(self.job_order, trim_listing)
        visit_class(self.job_order, ("File", "Directory"),
                    trim_anonymous_location)
        visit_class(self.job_order, ("File", "Directory"),
                    remove_redundant_fields)

        secret_mounts = {}
        for param in sorted(self.job_order.keys()):
            if self.secret_store.has_secret(self.job_order[param]):
                mnt = "/secrets/s%d" % len(secret_mounts)
                secret_mounts[mnt] = {
                    "kind": "text",
                    "content":
                    self.secret_store.retrieve(self.job_order[param])
                }
                self.job_order[param] = {"$include": mnt}

        container_req = {
            "name": self.name,
            "output_path": "/var/spool/cwl",
            "cwd": "/var/spool/cwl",
            "priority": self.priority,
            "state": "Committed",
            "container_image": arvados_jobs_image(self.arvrunner,
                                                  self.jobs_image),
            "mounts": {
                "/var/lib/cwl/cwl.input.json": {
                    "kind": "json",
                    "content": self.job_order
                },
                "stdout": {
                    "kind": "file",
                    "path": "/var/spool/cwl/cwl.output.json"
                },
                "/var/spool/cwl": {
                    "kind": "collection",
                    "writable": True
                }
            },
            "secret_mounts": secret_mounts,
            "runtime_constraints": {
                "vcpus":
                math.ceil(self.submit_runner_cores),
                "ram":
                1024 * 1024 * (math.ceil(self.submit_runner_ram) +
                               math.ceil(self.collection_cache_size)),
                "API":
                True
            },
            "use_existing":
            False,  # Never reuse the runner container - see #15497.
            "properties": {}
        }

        if self.embedded_tool.tool.get("id", "").startswith("keep:"):
            sp = self.embedded_tool.tool["id"].split('/')
            workflowcollection = sp[0][5:]
            workflowname = "/".join(sp[1:])
            workflowpath = "/var/lib/cwl/workflow/%s" % workflowname
            container_req["mounts"]["/var/lib/cwl/workflow"] = {
                "kind": "collection",
                "portable_data_hash": "%s" % workflowcollection
            }
        else:
            packed = packed_workflow(self.arvrunner, self.embedded_tool,
                                     self.merged_map)
            workflowpath = "/var/lib/cwl/workflow.json#main"
            container_req["mounts"]["/var/lib/cwl/workflow.json"] = {
                "kind": "json",
                "content": packed
            }
            if self.embedded_tool.tool.get("id", "").startswith("arvwf:"):
                container_req["properties"][
                    "template_uuid"] = self.embedded_tool.tool["id"][6:33]

        # --local means execute the workflow instead of submitting a container request
        # --api=containers means use the containers API
        # --no-log-timestamps means don't add timestamps (the logging infrastructure does this)
        # --disable-validate because we already validated so don't need to do it again
        # --eval-timeout is the timeout for javascript invocation
        # --parallel-task-count is the number of threads to use for job submission
        # --enable/disable-reuse sets desired job reuse
        # --collection-cache-size sets aside memory to store collections
        command = [
            "arvados-cwl-runner", "--local", "--api=containers",
            "--no-log-timestamps", "--disable-validate", "--disable-color",
            "--eval-timeout=%s" % self.arvrunner.eval_timeout,
            "--thread-count=%s" % self.arvrunner.thread_count,
            "--enable-reuse" if self.enable_reuse else "--disable-reuse",
            "--collection-cache-size=%s" % self.collection_cache_size
        ]

        if self.output_name:
            command.append("--output-name=" + self.output_name)
            container_req["output_name"] = self.output_name

        if self.output_tags:
            command.append("--output-tags=" + self.output_tags)

        if runtimeContext.debug:
            command.append("--debug")

        if runtimeContext.storage_classes != "default":
            command.append("--storage-classes=" +
                           runtimeContext.storage_classes)

        if self.on_error:
            command.append("--on-error=" + self.on_error)

        if self.intermediate_output_ttl:
            command.append("--intermediate-output-ttl=%d" %
                           self.intermediate_output_ttl)

        if self.arvrunner.trash_intermediate:
            command.append("--trash-intermediate")

        if self.arvrunner.project_uuid:
            command.append("--project-uuid=" + self.arvrunner.project_uuid)

        if self.enable_dev:
            command.append("--enable-dev")

        command.extend([workflowpath, "/var/lib/cwl/cwl.input.json"])

        container_req["command"] = command

        return container_req
Exemplo n.º 9
0
    def arv_executor(self,
                     updated_tool,
                     job_order,
                     runtimeContext,
                     logger=None):
        self.debug = runtimeContext.debug

        updated_tool.visit(self.check_features)

        self.project_uuid = runtimeContext.project_uuid
        self.pipeline = None
        self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
        self.secret_store = runtimeContext.secret_store

        self.trash_intermediate = runtimeContext.trash_intermediate
        if self.trash_intermediate and self.work_api != "containers":
            raise Exception(
                "--trash-intermediate is only supported with --api=containers."
            )

        self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl
        if self.intermediate_output_ttl and self.work_api != "containers":
            raise Exception(
                "--intermediate-output-ttl is only supported with --api=containers."
            )
        if self.intermediate_output_ttl < 0:
            raise Exception(
                "Invalid value %d for --intermediate-output-ttl, cannot be less than zero"
                % self.intermediate_output_ttl)

        if runtimeContext.submit_request_uuid and self.work_api != "containers":
            raise Exception(
                "--submit-request-uuid requires containers API, but using '{}' api"
                .format(self.work_api))

        if not runtimeContext.name:
            runtimeContext.name = self.name = updated_tool.tool.get(
                "label") or updated_tool.metadata.get(
                    "label") or os.path.basename(updated_tool.tool["id"])

        # Upload local file references in the job order.
        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
                                     updated_tool, job_order)

        # the last clause means: if it is a command line tool, and we
        # are going to wait for the result, and always_submit_runner
        # is false, then we don't submit a runner process.

        submitting = (runtimeContext.update_workflow
                      or runtimeContext.create_workflow or
                      (runtimeContext.submit
                       and not (updated_tool.tool["class"] == "CommandLineTool"
                                and runtimeContext.wait
                                and not runtimeContext.always_submit_runner)))

        loadingContext = self.loadingContext.copy()
        loadingContext.do_validate = False
        loadingContext.do_update = False
        if submitting:
            # Document may have been auto-updated. Reload the original
            # document with updating disabled because we want to
            # submit the document with its original CWL version, not
            # the auto-updated one.
            tool = load_tool(updated_tool.tool["id"], loadingContext)
        else:
            tool = updated_tool

        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
        # Also uploads docker images.
        merged_map = upload_workflow_deps(self, tool)

        # Recreate process object (ArvadosWorkflow or
        # ArvadosCommandTool) because tool document may have been
        # updated by upload_workflow_deps in ways that modify
        # inheritance of hints or requirements.
        loadingContext.loader = tool.doc_loader
        loadingContext.avsc_names = tool.doc_schema
        loadingContext.metadata = tool.metadata
        tool = load_tool(tool.tool, loadingContext)

        existing_uuid = runtimeContext.update_workflow
        if existing_uuid or runtimeContext.create_workflow:
            # Create a pipeline template or workflow record and exit.
            if self.work_api == "containers":
                return (upload_workflow(
                    self,
                    tool,
                    job_order,
                    self.project_uuid,
                    uuid=existing_uuid,
                    submit_runner_ram=runtimeContext.submit_runner_ram,
                    name=runtimeContext.name,
                    merged_map=merged_map), "success")

        self.apply_reqs(job_order, tool)

        self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse
        self.eval_timeout = runtimeContext.eval_timeout

        runtimeContext = runtimeContext.copy()
        runtimeContext.use_container = True
        runtimeContext.tmpdir_prefix = "tmp"
        runtimeContext.work_api = self.work_api

        if self.work_api == "containers":
            if self.ignore_docker_for_reuse:
                raise Exception(
                    "--ignore-docker-for-reuse not supported with containers API."
                )
            runtimeContext.outdir = "/var/spool/cwl"
            runtimeContext.docker_outdir = "/var/spool/cwl"
            runtimeContext.tmpdir = "/tmp"
            runtimeContext.docker_tmpdir = "/tmp"

        if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
            raise Exception("--priority must be in the range 1..1000.")

        if self.should_estimate_cache_size:
            visited = set()
            estimated_size = [0]

            def estimate_collection_cache(obj):
                if obj.get("location", "").startswith("keep:"):
                    m = pdh_size.match(obj["location"][5:])
                    if m and m.group(1) not in visited:
                        visited.add(m.group(1))
                        estimated_size[0] += int(m.group(2))

            visit_class(job_order, ("File", "Directory"),
                        estimate_collection_cache)
            runtimeContext.collection_cache_size = max(
                ((estimated_size[0] * 192) // (1024 * 1024)) + 1, 256)
            self.collection_cache.set_cap(
                runtimeContext.collection_cache_size * 1024 * 1024)

        logger.info("Using collection cache size %s MiB",
                    runtimeContext.collection_cache_size)

        runnerjob = None
        if runtimeContext.submit:
            # Submit a runner job to run the workflow for us.
            if self.work_api == "containers":
                if submitting:
                    tool = RunnerContainer(
                        self,
                        updated_tool,
                        tool,
                        loadingContext,
                        runtimeContext.enable_reuse,
                        self.output_name,
                        self.output_tags,
                        submit_runner_ram=runtimeContext.submit_runner_ram,
                        name=runtimeContext.name,
                        on_error=runtimeContext.on_error,
                        submit_runner_image=runtimeContext.submit_runner_image,
                        intermediate_output_ttl=runtimeContext.
                        intermediate_output_ttl,
                        merged_map=merged_map,
                        priority=runtimeContext.priority,
                        secret_store=self.secret_store,
                        collection_cache_size=runtimeContext.
                        collection_cache_size,
                        collection_cache_is_default=self.
                        should_estimate_cache_size)
                else:
                    runtimeContext.runnerjob = tool.tool["id"]

        if runtimeContext.cwl_runner_job is not None:
            self.uuid = runtimeContext.cwl_runner_job.get('uuid')

        jobiter = tool.job(job_order, self.output_callback, runtimeContext)

        if runtimeContext.submit and not runtimeContext.wait:
            runnerjob = next(jobiter)
            runnerjob.run(runtimeContext)
            return (runnerjob.uuid, "success")

        current_container = arvados_cwl.util.get_current_container(
            self.api, self.num_retries, logger)
        if current_container:
            logger.info("Running inside container %s",
                        current_container.get("uuid"))

        self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count)

        try:
            self.workflow_eval_lock.acquire()

            # Holds the lock while this code runs and releases it when
            # it is safe to do so in self.workflow_eval_lock.wait(),
            # at which point on_message can update job state and
            # process output callbacks.

            loopperf = Perf(metrics, "jobiter")
            loopperf.__enter__()
            for runnable in jobiter:
                loopperf.__exit__()

                if self.stop_polling.is_set():
                    break

                if self.task_queue.error is not None:
                    raise self.task_queue.error

                if runnable:
                    with Perf(metrics, "run"):
                        self.start_run(runnable, runtimeContext)
                else:
                    if (self.task_queue.in_flight + len(self.processes)) > 0:
                        self.workflow_eval_lock.wait(3)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable processes and not waiting on any pending processes."
                        )
                        break

                if self.stop_polling.is_set():
                    break

                loopperf.__enter__()
            loopperf.__exit__()

            while (self.task_queue.in_flight + len(self.processes)) > 0:
                if self.task_queue.error is not None:
                    raise self.task_queue.error
                self.workflow_eval_lock.wait(3)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info(
            )[0] is SystemExit:
                logger.error("Interrupted, workflow will be cancelled")
            elif isinstance(sys.exc_info()[1], WorkflowException):
                logger.error(
                    "Workflow execution failed:\n%s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            else:
                logger.exception("Workflow execution failed")

            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)

            if self.work_api == "containers" and not current_container:
                # Not running in a crunch container, so cancel any outstanding processes.
                for p in self.processes:
                    try:
                        self.api.container_requests().update(
                            uuid=p, body={
                                "priority": "0"
                            }).execute(num_retries=self.num_retries)
                    except Exception:
                        pass
        finally:
            self.workflow_eval_lock.release()
            self.task_queue.drain()
            self.stop_polling.set()
            self.polling_thread.join()
            self.task_queue.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if runtimeContext.submit and isinstance(tool, Runner):
            logger.info("Final output collection %s", tool.final_output)
        else:
            if self.output_name is None:
                self.output_name = "Output of %s" % (shortname(
                    tool.tool["id"]))
            if self.output_tags is None:
                self.output_tags = ""

            storage_classes = runtimeContext.storage_classes.strip().split(",")
            self.final_output, self.final_output_collection = self.make_output_collection(
                self.output_name, storage_classes, self.output_tags,
                self.final_output)
            self.set_crunch_output()

        if runtimeContext.compute_checksum:
            adjustDirObjs(self.final_output,
                          partial(get_listing, self.fs_access))
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        if self.trash_intermediate and self.final_status == "success":
            self.trash_intermediate_output()

        return (self.final_output, self.final_status)
Exemplo n.º 10
0
    def make_output_collection(self, name, storage_classes, tagsString,
                               outputObj):
        outputObj = copy.deepcopy(outputObj)

        files = []

        def capture(fileobj):
            files.append(fileobj)

        adjustDirObjs(outputObj, capture)
        adjustFileObjs(outputObj, capture)

        generatemapper = NoFollowPathMapper(files, "", "", separateDirs=False)

        final = arvados.collection.Collection(api_client=self.api,
                                              keep_client=self.keep_client,
                                              num_retries=self.num_retries)

        for k, v in generatemapper.items():
            if v.type == "Directory" and v.resolved.startswith("_:"):
                continue
            if v.type == "CreateFile" and (k.startswith("_:")
                                           or v.resolved.startswith("_:")):
                with final.open(v.target, "wb") as f:
                    f.write(v.resolved.encode("utf-8"))
                    continue

            if not v.resolved.startswith("keep:"):
                raise Exception("Output source is not in keep or a literal")
            sp = v.resolved.split("/")
            srccollection = sp[0][5:]
            try:
                reader = self.collection_cache.get(srccollection)
                srcpath = "/".join(sp[1:]) if len(sp) > 1 else "."
                final.copy(srcpath,
                           v.target,
                           source_collection=reader,
                           overwrite=False)
            except arvados.errors.ArgumentError as e:
                logger.error("Creating CollectionReader for '%s' '%s': %s", k,
                             v, e)
                raise
            except IOError as e:
                logger.error("While preparing output collection: %s", e)
                raise

        def rewrite(fileobj):
            fileobj["location"] = generatemapper.mapper(
                fileobj["location"]).target
            for k in ("listing", "contents", "nameext", "nameroot", "dirname"):
                if k in fileobj:
                    del fileobj[k]

        adjustDirObjs(outputObj, rewrite)
        adjustFileObjs(outputObj, rewrite)

        with final.open("cwl.output.json", "w") as f:
            res = str(
                json.dumps(outputObj,
                           sort_keys=True,
                           indent=4,
                           separators=(',', ': '),
                           ensure_ascii=False))
            f.write(res)

        final.save_new(name=name,
                       owner_uuid=self.project_uuid,
                       storage_classes=storage_classes,
                       ensure_unique_name=True)

        logger.info("Final output collection %s \"%s\" (%s)",
                    final.portable_data_hash(),
                    final.api_response()["name"], final.manifest_locator())

        final_uuid = final.manifest_locator()
        tags = tagsString.split(',')
        for tag in tags:
            self.api.links().create(body={
                "head_uuid": final_uuid,
                "link_class": "tag",
                "name": tag
            }).execute(num_retries=self.num_retries)

        def finalcollection(fileobj):
            fileobj["location"] = "keep:%s/%s" % (final.portable_data_hash(),
                                                  fileobj["location"])

        adjustDirObjs(outputObj, finalcollection)
        adjustFileObjs(outputObj, finalcollection)

        return (outputObj, final)