def run(self, file_store): cwljob = resolve_indirect(self.cwljob) fill_in_defaults( self.cwltool.tool['inputs'], cwljob, self.runtime_context.make_fs_access( self.runtime_context.basedir or "")) realjob = CWLJob(self.cwltool, cwljob, self.runtime_context) self.addChild(realjob) return realjob.rv()
def run(self, file_store): cwljob = resolve_indirect(self.cwljob) fill_in_defaults( self.step_inputs, cwljob, self.runtime_context.make_fs_access("")) for inp_id in cwljob.keys(): found = False for field in self.cwltool.inputs_record_schema['fields']: if field['name'] == inp_id: found = True if not found: cwljob.pop(inp_id) # Exports temporary directory for batch systems that reset TMPDIR os.environ["TMPDIR"] = os.path.realpath( self.runtime_context.tmpdir or file_store.getLocalTempDir()) outdir = os.path.join(file_store.getLocalTempDir(), "out") os.mkdir(outdir) top_tmp_outdir = self.workdir or os.environ["TMPDIR"] tmp_outdir_prefix = os.path.join( _makeNestedTempDir(top=top_tmp_outdir, seed=outdir, levels=2), "out_tmpdir") self.openTempDirs.append(top_tmp_outdir) index = {} existing = {} runtime_context = self.runtime_context.copy() runtime_context.basedir = os.getcwd() runtime_context.outdir = outdir runtime_context.tmp_outdir_prefix = tmp_outdir_prefix runtime_context.tmpdir_prefix = file_store.getLocalTempDir() runtime_context.make_fs_access = functools.partial( ToilFsAccess, file_store=file_store) runtime_context.toil_get_file = functools.partial( toil_get_file, file_store, index, existing) # Run the tool (output, status) = cwltool.executors.SingleJobExecutor().execute( self.cwltool, cwljob, runtime_context, cwllogger) if status != "success": raise cwltool.errors.WorkflowException(status) adjustDirObjs(output, functools.partial( get_listing, cwltool.stdfsaccess.StdFsAccess(outdir), recursive=True)) adjustFileObjs(output, functools.partial( uploadFile, functools.partial(writeGlobalFileWrapper, file_store), index, existing)) return output
def upload_job_order(arvrunner, name, tool, job_order): """Upload local files referenced in the input object and return updated input object with 'location' updated to the proper keep references. """ # Make a copy of the job order and set defaults. builder_job_order = copy.copy(job_order) # fill_in_defaults throws an error if there are any # missing required parameters, we don't want it to do that # so make them all optional. inputs_copy = copy.deepcopy(tool.tool["inputs"]) for i in inputs_copy: if "null" not in i["type"]: i["type"] = ["null"] + aslist(i["type"]) fill_in_defaults(inputs_copy, builder_job_order, arvrunner.fs_access) # Need to create a builder object to evaluate expressions. builder = make_builder(builder_job_order, tool.hints, tool.requirements, ArvRuntimeContext(), tool.metadata) # Now update job_order with secondaryFiles discover_secondary_files(arvrunner.fs_access, builder, tool.tool["inputs"], job_order) jobmapper = upload_dependencies(arvrunner, name, tool.doc_loader, job_order, job_order.get("id", "#"), False) if "id" in job_order: del job_order["id"] # Need to filter this out, gets added by cwltool when providing # parameters on the command line. if "job_order" in job_order: del job_order["job_order"] return job_order
def main(args=None, stdout=sys.stdout): """Main method for toil-cwl-runner.""" cwllogger.removeHandler(defaultStreamHandler) config = Config() config.cwl = True parser = argparse.ArgumentParser() addOptions(parser, config) parser.add_argument("cwltool", type=str) parser.add_argument("cwljob", nargs=argparse.REMAINDER) # Will override the "jobStore" positional argument, enables # user to select jobStore or get a default from logic one below. parser.add_argument("--jobStore", type=str) parser.add_argument("--not-strict", action="store_true") parser.add_argument("--quiet", dest="logLevel", action="store_const", const="ERROR") parser.add_argument("--basedir", type=str) parser.add_argument("--outdir", type=str, default=os.getcwd()) parser.add_argument("--version", action='version', version=baseVersion) dockergroup = parser.add_mutually_exclusive_group() dockergroup.add_argument( "--user-space-docker-cmd", help="(Linux/OS X only) Specify a user space docker command (like " "udocker or dx-docker) that will be used to call 'pull' and 'run'") dockergroup.add_argument( "--singularity", action="store_true", default=False, help="[experimental] Use Singularity runtime for running containers. " "Requires Singularity v2.3.2+ and Linux with kernel version v3.18+ or " "with overlayfs support backported.") dockergroup.add_argument( "--no-container", action="store_true", help="Do not execute jobs in a " "Docker container, even when `DockerRequirement` " "is specified under `hints`.") parser.add_argument( "--preserve-environment", type=str, nargs='+', help="Preserve specified environment variables when running" " CommandLineTools", metavar=("VAR1 VAR2"), default=("PATH",), dest="preserve_environment") parser.add_argument( "--destBucket", type=str, help="Specify a cloud bucket endpoint for output files.") parser.add_argument( "--beta-dependency-resolvers-configuration", default=None) parser.add_argument("--beta-dependencies-directory", default=None) parser.add_argument( "--beta-use-biocontainers", default=None, action="store_true") parser.add_argument( "--beta-conda-dependencies", default=None, action="store_true") parser.add_argument("--tmpdir-prefix", type=Text, help="Path prefix for temporary directories", default="tmp") parser.add_argument("--tmp-outdir-prefix", type=Text, help="Path prefix for intermediate output directories", default="tmp") parser.add_argument( "--force-docker-pull", action="store_true", default=False, dest="force_docker_pull", help="Pull latest docker image even if it is locally present") parser.add_argument( "--no-match-user", action="store_true", default=False, help="Disable passing the current uid to `docker run --user`") # mkdtemp actually creates the directory, but # toil requires that the directory not exist, # so make it and delete it and allow # toil to create it again (!) workdir = tempfile.mkdtemp() os.rmdir(workdir) if args is None: args = sys.argv[1:] # we use workdir as jobStore: options = parser.parse_args([workdir] + args) # if tmpdir_prefix is not the default value, set workDir too if options.tmpdir_prefix != 'tmp': options.workDir = options.tmpdir_prefix if options.provisioner and not options.jobStore: raise NoSuchJobStoreException( 'Please specify a jobstore with the --jobStore option when specifying a provisioner.') use_container = not options.no_container if options.logLevel: cwllogger.setLevel(options.logLevel) outdir = os.path.abspath(options.outdir) tmp_outdir_prefix = os.path.abspath(options.tmp_outdir_prefix) tmpdir_prefix = os.path.abspath(options.tmpdir_prefix) fileindex = {} existing = {} conf_file = getattr(options, "beta_dependency_resolvers_configuration", None) use_conda_dependencies = getattr(options, "beta_conda_dependencies", None) job_script_provider = None if conf_file or use_conda_dependencies: dependencies_configuration = DependenciesConfiguration(options) job_script_provider = dependencies_configuration options.default_container = None runtime_context = cwltool.context.RuntimeContext(vars(options)) runtime_context.find_default_container = functools.partial( find_default_container, options) runtime_context.workdir = workdir runtime_context.move_outputs = "leave" runtime_context.rm_tmpdir = False loading_context = cwltool.context.LoadingContext(vars(options)) with Toil(options) as toil: if options.restart: outobj = toil.restart() else: loading_context.hints = [{ "class": "ResourceRequirement", "coresMin": toil.config.defaultCores, "ramMin": toil.config.defaultMemory / (2**20), "outdirMin": toil.config.defaultDisk / (2**20), "tmpdirMin": 0 }] loading_context.construct_tool_object = toil_make_tool loading_context.resolver = cwltool.resolver.tool_resolver loading_context.strict = not options.not_strict options.workflow = options.cwltool options.job_order = options.cwljob uri, tool_file_uri = cwltool.load_tool.resolve_tool_uri( options.cwltool, loading_context.resolver, loading_context.fetcher_constructor) options.tool_help = None options.debug = options.logLevel == "DEBUG" job_order_object, options.basedir, jobloader = \ cwltool.main.load_job_order( options, sys.stdin, loading_context.fetcher_constructor, loading_context.overrides_list, tool_file_uri) document_loader, workflowobj, uri = \ cwltool.load_tool.fetch_document( uri, loading_context.resolver, loading_context.fetcher_constructor) document_loader, avsc_names, processobj, metadata, uri = \ cwltool.load_tool.validate_document( document_loader, workflowobj, uri, loading_context.enable_dev, loading_context.strict, False, loading_context.fetcher_constructor, False, loading_context.overrides_list, do_validate=loading_context.do_validate) loading_context.overrides_list.extend( metadata.get("cwltool:overrides", [])) try: tool = cwltool.load_tool.make_tool( document_loader, avsc_names, metadata, uri, loading_context) except cwltool.process.UnsupportedRequirement as err: logging.error(err) return 33 runtime_context.secret_store = SecretStore() initialized_job_order = cwltool.main.init_job_order( job_order_object, options, tool, jobloader, sys.stdout, secret_store=runtime_context.secret_store) fs_access = cwltool.stdfsaccess.StdFsAccess(options.basedir) fill_in_defaults( tool.tool["inputs"], initialized_job_order, fs_access) def path_to_loc(obj): if "location" not in obj and "path" in obj: obj["location"] = obj["path"] del obj["path"] def import_files(tool): visit_class(tool, ("File", "Directory"), path_to_loc) visit_class(tool, ("File", ), functools.partial( add_sizes, fs_access)) normalizeFilesDirs(tool) adjustDirObjs(tool, functools.partial( get_listing, fs_access, recursive=True)) adjustFileObjs(tool, functools.partial( uploadFile, toil.importFile, fileindex, existing, skip_broken=True)) tool.visit(import_files) for inp in tool.tool["inputs"]: def set_secondary(fileobj): if isinstance(fileobj, Mapping) \ and fileobj.get("class") == "File": if "secondaryFiles" not in fileobj: fileobj["secondaryFiles"] = [ {"location": cwltool.builder.substitute( fileobj["location"], sf), "class": "File"} for sf in inp["secondaryFiles"]] if isinstance(fileobj, MutableSequence): for entry in fileobj: set_secondary(entry) if shortname(inp["id"]) in initialized_job_order \ and inp.get("secondaryFiles"): set_secondary(initialized_job_order[shortname(inp["id"])]) import_files(initialized_job_order) visitSteps(tool, import_files) try: runtime_context.use_container = use_container runtime_context.tmpdir = os.path.realpath(tmpdir_prefix) runtime_context.tmp_outdir_prefix = os.path.realpath( tmp_outdir_prefix) runtime_context.job_script_provider = job_script_provider runtime_context.force_docker_pull = options.force_docker_pull runtime_context.no_match_user = options.no_match_user (wf1, _) = makeJob(tool, {}, None, runtime_context) except cwltool.process.UnsupportedRequirement as err: logging.error(err) return 33 wf1.cwljob = initialized_job_order if wf1 is CWLJob: # Clean up temporary directories only created with CWLJobs. wf1.addFollowOnFn(cleanTempDirs, wf1) outobj = toil.start(wf1) outobj = resolve_indirect(outobj) # Stage files. Specify destination bucket if specified in CLI # options. If destination bucket not passed in, # options.destBucket's value will be None. toilStageFiles( toil, outobj, outdir, fileindex, existing, export=True, destBucket=options.destBucket) if not options.destBucket: visit_class(outobj, ("File",), functools.partial( compute_checksums, cwltool.stdfsaccess.StdFsAccess(""))) visit_class(outobj, ("File", ), MutationManager().unset_generation) stdout.write(json.dumps(outobj, indent=4)) return 0