def make_builder(joborder, hints, requirements, runtimeContext, metadata): return Builder( job=joborder, files=[], # type: List[Dict[Text, Text]] bindings=[], # type: List[Dict[Text, Any]] schemaDefs={}, # type: Dict[Text, Dict[Text, Any]] names=None, # type: Names requirements=requirements, # type: List[Dict[Text, Any]] hints=hints, # type: List[Dict[Text, Any]] resources={}, # type: Dict[str, int] mutation_manager=None, # type: Optional[MutationManager] formatgraph=None, # type: Optional[Graph] make_fs_access=None, # type: Type[StdFsAccess] fs_access=None, # type: StdFsAccess job_script_provider=runtimeContext. job_script_provider, # type: Optional[Any] timeout=runtimeContext.eval_timeout, # type: float debug=runtimeContext.debug, # type: bool js_console=runtimeContext.js_console, # type: bool force_docker_pull=runtimeContext.force_docker_pull, # type: bool loadListing="", # type: Text outdir="", # type: Text tmpdir="", # type: Text stagedir="", # type: Text cwlVersion=metadata.get( "http://commonwl.org/cwltool#original_cwlVersion") or metadata.get("cwlVersion"), container_engine="docker")
def _makebuilder(cudaReq: CWLObjectType) -> Builder: return Builder( {}, [], [], {}, schema.Names(), [cudaReq], [], {"cudaDeviceCount": 1}, None, None, StdFsAccess, StdFsAccess(""), None, 0.1, False, False, False, "", "", "", "", INTERNAL_VERSION, "docker", )
def job(self, joborder, output_callback, runtimeContext): req, _ = self.get_requirement( "http://arvados.org/cwl#RunInSingleContainer") if req: with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)): if "id" not in self.tool: raise WorkflowException("%s object must have 'id'" % (self.tool["class"])) document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch( self.tool["id"]), self.tool["id"]) discover_secondary_files(self.tool["inputs"], joborder) with Perf(metrics, "subworkflow upload_deps"): upload_dependencies(self.arvrunner, os.path.basename(joborder.get("id", "#")), document_loader, joborder, joborder.get("id", "#"), False) if self.wf_pdh is None: workflowobj["requirements"] = dedup_reqs(self.requirements) workflowobj["hints"] = dedup_reqs(self.hints) packed = pack(document_loader, workflowobj, uri, self.metadata) builder = Builder(joborder, requirements=workflowobj["requirements"], hints=workflowobj["hints"], resources={}) def visit(item): for t in ("hints", "requirements"): if t not in item: continue for req in item[t]: if req["class"] == "ResourceRequirement": dyn = False for k in max_res_pars + sum_res_pars: if k in req: if isinstance(req[k], basestring): if item["id"] == "#main": # only the top-level requirements/hints may contain expressions self.dynamic_resource_req.append( req) dyn = True break else: with SourceLine( req, k, WorkflowException): raise WorkflowException( "Non-top-level ResourceRequirement in single container cannot have expressions" ) if not dyn: self.static_resource_req.append(req) visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit) if self.static_resource_req: self.static_resource_req = [ get_overall_res_req(self.static_resource_req) ] upload_dependencies(self.arvrunner, runtimeContext.name, document_loader, packed, uri, False) # Discover files/directories referenced by the # workflow (mainly "default" values) visit_class(packed, ("File", "Directory"), self.wf_reffiles.append) if self.dynamic_resource_req: builder = Builder(joborder, requirements=self.requirements, hints=self.hints, resources={}) # Evaluate dynamic resource requirements using current builder rs = copy.copy(self.static_resource_req) for dyn_rs in self.dynamic_resource_req: eval_req = {"class": "ResourceRequirement"} for a in max_res_pars + sum_res_pars: if a in dyn_rs: eval_req[a] = builder.do_eval(dyn_rs[a]) rs.append(eval_req) job_res_reqs = [get_overall_res_req(rs)] else: job_res_reqs = self.static_resource_req with Perf(metrics, "subworkflow adjust"): joborder_resolved = copy.deepcopy(joborder) joborder_keepmount = copy.deepcopy(joborder) reffiles = [] visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append) mapper = ArvPathMapper(self.arvrunner, reffiles + self.wf_reffiles, runtimeContext.basedir, "/keep/%s", "/keep/%s/%s") # For containers API, we need to make sure any extra # referenced files (ie referenced by the workflow but # not in the inputs) are included in the mounts. if self.wf_reffiles: runtimeContext = runtimeContext.copy() runtimeContext.extra_reffiles = copy.deepcopy( self.wf_reffiles) def keepmount(obj): remove_redundant_fields(obj) with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)): if "location" not in obj: raise WorkflowException( "%s object is missing required 'location' field: %s" % (obj["class"], obj)) with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)): if obj["location"].startswith("keep:"): obj["location"] = mapper.mapper( obj["location"]).target if "listing" in obj: del obj["listing"] elif obj["location"].startswith("_:"): del obj["location"] else: raise WorkflowException( "Location is not a keep reference or a literal: '%s'" % obj["location"]) visit_class(joborder_keepmount, ("File", "Directory"), keepmount) def resolved(obj): if obj["location"].startswith("keep:"): obj["location"] = mapper.mapper( obj["location"]).resolved visit_class(joborder_resolved, ("File", "Directory"), resolved) if self.wf_pdh is None: adjustFileObjs(packed, keepmount) adjustDirObjs(packed, keepmount) self.wf_pdh = upload_workflow_collection( self.arvrunner, shortname(self.tool["id"]), packed) wf_runner = cmap({ "class": "CommandLineTool", "baseCommand": "cwltool", "inputs": self.tool["inputs"], "outputs": self.tool["outputs"], "stdout": "cwl.output.json", "requirements": self.requirements + job_res_reqs + [{ "class": "InlineJavascriptRequirement" }, { "class": "InitialWorkDirRequirement", "listing": [{ "entryname": "workflow.cwl", "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh }, { "entryname": "cwl.input.yml", "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',', ': ')).replace( "\\", "\\\\").replace( '$(', '\$(').replace('${', '\${') }] }], "hints": self.hints, "arguments": [ "--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml" ], "id": "#" }) return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job( joborder_resolved, output_callback, runtimeContext) else: return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)
def test_docker_tmpdir_prefix(tmp_path: Path) -> None: """Test that DockerCommandLineJob respects temp directory directives.""" (tmp_path / "3").mkdir() tmpdir_prefix = str(tmp_path / "3" / "ttmp") runtime_context = RuntimeContext({ "tmpdir_prefix": tmpdir_prefix, "user_space_docker_cmd": None }) builder = Builder( {}, [], [], {}, schema.Names(), [], [], {}, None, None, StdFsAccess, StdFsAccess(""), None, 0.1, False, False, False, "", runtime_context.get_outdir(), runtime_context.get_tmpdir(), runtime_context.get_stagedir(), INTERNAL_VERSION, ) job = DockerCommandLineJob(builder, {}, PathMapper, [], [], "") runtime: List[str] = [] volume_writable_file = MapperEnt(resolved=get_data("tests/2.fastq"), target="foo", type=None, staged=None) (tmp_path / "1").mkdir() job.add_writable_file_volume(runtime, volume_writable_file, None, str(tmp_path / "1" / "writable_file")) children = sorted((tmp_path / "1").glob("*")) assert len(children) == 1 subdir = tmp_path / children[0] assert subdir.name.startswith("writable_file") assert len(sorted(subdir.glob("*"))) == 1 assert (subdir / "2.fastq").exists() resolved_writable_dir = tmp_path / "data_orig" resolved_writable_dir.mkdir(parents=True) volume_dir = MapperEnt(resolved=str(resolved_writable_dir), target="bar", type=None, staged=None) (tmp_path / "2").mkdir() job.add_writable_directory_volume(runtime, volume_dir, None, str(tmp_path / "2" / "dir")) children = sorted((tmp_path / "2").glob("*")) assert len(children) == 1 subdir = tmp_path / "2" / children[0] assert subdir.name.startswith("dir") assert len(sorted(subdir.glob("*"))) == 1 assert (subdir / "data_orig").exists() cidfile = job.create_runtime({}, runtime_context)[1] assert cidfile and cidfile.startswith(tmpdir_prefix) volume_file = MapperEnt(resolved="Hoopla!", target="baz", type=None, staged=None) (tmp_path / "4").mkdir() job.create_file_and_add_volume(runtime, volume_file, None, None, str(tmp_path / "4" / "file")) children = sorted((tmp_path / "4").glob("*")) assert len(children) == 1 subdir = tmp_path / "4" / children[0] assert subdir.name.startswith("file") assert len(sorted(subdir.glob("*"))) == 1 assert (subdir / "baz").exists()
def job(self, joborder, output_callback, runtimeContext): req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer") if req: with SourceLine(self.tool, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)): if "id" not in self.tool: raise WorkflowException("%s object must have 'id'" % (self.tool["class"])) document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"]) discover_secondary_files(self.tool["inputs"], joborder) with Perf(metrics, "subworkflow upload_deps"): upload_dependencies(self.arvrunner, os.path.basename(joborder.get("id", "#")), document_loader, joborder, joborder.get("id", "#"), False) if self.wf_pdh is None: workflowobj["requirements"] = dedup_reqs(self.requirements) workflowobj["hints"] = dedup_reqs(self.hints) packed = pack(document_loader, workflowobj, uri, self.metadata) builder = Builder(joborder, requirements=workflowobj["requirements"], hints=workflowobj["hints"], resources={}) def visit(item): for t in ("hints", "requirements"): if t not in item: continue for req in item[t]: if req["class"] == "ResourceRequirement": dyn = False for k in max_res_pars + sum_res_pars: if k in req: if isinstance(req[k], basestring): if item["id"] == "#main": # only the top-level requirements/hints may contain expressions self.dynamic_resource_req.append(req) dyn = True break else: with SourceLine(req, k, WorkflowException): raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions") if not dyn: self.static_resource_req.append(req) visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit) if self.static_resource_req: self.static_resource_req = [get_overall_res_req(self.static_resource_req)] upload_dependencies(self.arvrunner, runtimeContext.name, document_loader, packed, uri, False) # Discover files/directories referenced by the # workflow (mainly "default" values) visit_class(packed, ("File", "Directory"), self.wf_reffiles.append) if self.dynamic_resource_req: builder = Builder(joborder, requirements=self.requirements, hints=self.hints, resources={}) # Evaluate dynamic resource requirements using current builder rs = copy.copy(self.static_resource_req) for dyn_rs in self.dynamic_resource_req: eval_req = {"class": "ResourceRequirement"} for a in max_res_pars + sum_res_pars: if a in dyn_rs: eval_req[a] = builder.do_eval(dyn_rs[a]) rs.append(eval_req) job_res_reqs = [get_overall_res_req(rs)] else: job_res_reqs = self.static_resource_req with Perf(metrics, "subworkflow adjust"): joborder_resolved = copy.deepcopy(joborder) joborder_keepmount = copy.deepcopy(joborder) reffiles = [] visit_class(joborder_keepmount, ("File", "Directory"), reffiles.append) mapper = ArvPathMapper(self.arvrunner, reffiles+self.wf_reffiles, runtimeContext.basedir, "/keep/%s", "/keep/%s/%s") # For containers API, we need to make sure any extra # referenced files (ie referenced by the workflow but # not in the inputs) are included in the mounts. if self.wf_reffiles: runtimeContext = runtimeContext.copy() runtimeContext.extra_reffiles = copy.deepcopy(self.wf_reffiles) def keepmount(obj): remove_redundant_fields(obj) with SourceLine(obj, None, WorkflowException, logger.isEnabledFor(logging.DEBUG)): if "location" not in obj: raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj)) with SourceLine(obj, "location", WorkflowException, logger.isEnabledFor(logging.DEBUG)): if obj["location"].startswith("keep:"): obj["location"] = mapper.mapper(obj["location"]).target if "listing" in obj: del obj["listing"] elif obj["location"].startswith("_:"): del obj["location"] else: raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"]) visit_class(joborder_keepmount, ("File", "Directory"), keepmount) def resolved(obj): if obj["location"].startswith("keep:"): obj["location"] = mapper.mapper(obj["location"]).resolved visit_class(joborder_resolved, ("File", "Directory"), resolved) if self.wf_pdh is None: adjustFileObjs(packed, keepmount) adjustDirObjs(packed, keepmount) self.wf_pdh = upload_workflow_collection(self.arvrunner, shortname(self.tool["id"]), packed) wf_runner = cmap({ "class": "CommandLineTool", "baseCommand": "cwltool", "inputs": self.tool["inputs"], "outputs": self.tool["outputs"], "stdout": "cwl.output.json", "requirements": self.requirements+job_res_reqs+[ {"class": "InlineJavascriptRequirement"}, { "class": "InitialWorkDirRequirement", "listing": [{ "entryname": "workflow.cwl", "entry": '$({"class": "File", "location": "keep:%s/workflow.cwl"})' % self.wf_pdh }, { "entryname": "cwl.input.yml", "entry": json.dumps(joborder_keepmount, indent=2, sort_keys=True, separators=(',',': ')).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${') }] }], "hints": self.hints, "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"], "id": "#" }) return ArvadosCommandTool(self.arvrunner, wf_runner, self.loadingContext).job(joborder_resolved, output_callback, runtimeContext) else: return super(ArvadosWorkflow, self).job(joborder, output_callback, runtimeContext)