def test_input_can_be_named_pipe(tmp_path: Path, streamable: bool, streaming_allowed: bool, raise_exception: bool) -> None: """Test that input can be a named pipe.""" clt = CommandLineTool( toolpath_object, loading_context, ) runtime_context = RuntimeContext() runtime_context.streaming_allowed = streaming_allowed path = tmp_path / "tmp" os.mkfifo(path) joborder: CWLObjectType = { "inp": { "class": "File", "location": str(path), "streamable": streamable, } } job = next(clt.job(joborder, None, runtime_context)) assert isinstance(job, JobBase) if raise_exception: with pytest.raises(WorkflowException): job._setup(runtime_context) else: job._setup(runtime_context)
def test_redefined_args(self): runtime_context = RuntimeContext() runtime_context.use_container = False runtime_context.on_error = "continue" f = cwltool.factory.Factory(runtime_context=runtime_context) assert f.runtime_context.use_container is False assert f.runtime_context.on_error == "continue"
def test_runtimeContext_respects_tmpdir_prefix(tmp_path: Path) -> None: """Test that RuntimeContext helper methods respects tmpdir_prefix.""" tmpdir_prefix = str(tmp_path / "foo") runtime_context = RuntimeContext({"tmpdir_prefix": tmpdir_prefix}) assert runtime_context.get_tmpdir().startswith(tmpdir_prefix) assert runtime_context.get_stagedir().startswith(tmpdir_prefix) assert runtime_context.create_tmpdir().startswith(tmpdir_prefix) assert create_tmp_dir(tmpdir_prefix).startswith(tmpdir_prefix)
def test_factory_redefined_args() -> None: runtime_context = RuntimeContext() runtime_context.use_container = False runtime_context.on_error = "continue" factory = cwltool.factory.Factory(runtime_context=runtime_context) assert factory.runtime_context.use_container is False assert factory.runtime_context.on_error == "continue"
def relocate_outputs(workflow, job_data, cwl_args=None): """ Moves or copies filtered outputs to "outputs_folder" depending on "runtime_context.move_outputs" value, however "tmp_folder" is not going to be deleted as it will be done when DAG finishes running. Saves report with relocated outputs as "workflow_report.json" to "outputs_folder". Maps outputs from "workflow" back to normal (from step_id_step_out to workflow output) and filters "job_data" based on them (combining items from "job_data" into a list based on "outputSource" if it was a list). "cwl_args" can be used to update default parameters used for loading and runtime contexts. """ cwl_args = {} if cwl_args is None else cwl_args default_cwl_args = get_default_cwl_args(cwl_args) workflow_tool = fast_cwl_load(workflow=workflow, cwl_args=default_cwl_args) # Filter "job_data" to include only items required by workflow outputs. # Remap keys to the proper workflow outputs IDs (without step id). # If "outputSource" was a list even of len=1, find all correspondent items # from the "job_data" and assign them as list of the same size. job_data_copy = deepcopy(job_data) filtered_job_data = {} for output_id, output_data in get_items(workflow_tool["outputs"]): collected_job_items = [] for source_id, _ in get_items(output_data["outputSource"]): collected_job_items.append(job_data_copy[source_id.replace( "/", "_")]) if isinstance(output_data["outputSource"], list): filtered_job_data[output_id] = collected_job_items else: filtered_job_data[output_id] = collected_job_items[0] runtime_context = RuntimeContext(default_cwl_args) relocated_job_data = relocateOutputs( outputObj=filtered_job_data, destination_path=job_data_copy["outputs_folder"], source_directories=[ job_data_copy["tmp_folder"] ], # need to set it to tmp_folder otherwise it won't be able to delete tmp data if action is "move" action=runtime_context.move_outputs, fs_access=runtime_context.make_fs_access(""), compute_checksum=runtime_context.compute_checksum, path_mapper=runtime_context.path_mapper) # Dump report with relocated outputs workflow_report = os.path.join(job_data_copy["outputs_folder"], "workflow_report.json") dump_json(relocated_job_data, workflow_report) return relocated_job_data, workflow_report
def test_factory_partial_output(): runtime_context = RuntimeContext() runtime_context.on_error = "continue" factory = cwltool.factory.Factory(runtime_context=runtime_context) with pytest.raises(cwltool.factory.WorkflowStatus) as err_info: factory.make(get_data("tests/wf/wffail.cwl"))() err = err_info.value assert err.out["out1"]["checksum"] == 'sha1$e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e' assert err.out["out2"] is None
def test_compute_checksum(self): runtime_context = RuntimeContext() runtime_context.compute_checksum = True runtime_context.use_container = onWindows() f = cwltool.factory.Factory(runtime_context=runtime_context) echo = f.make(get_data("tests/wf/cat-tool.cwl")) output = echo( file1={"class": "File", "location": get_data("tests/wf/whale.txt")}, reverse=False) self.assertEquals(output['output']["checksum"], "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376")
def test_compute_checksum(): runtime_context = RuntimeContext() runtime_context.compute_checksum = True runtime_context.use_container = onWindows() factory = cwltool.factory.Factory(runtime_context=runtime_context) echo = factory.make(get_data("tests/wf/cat-tool.cwl")) output = echo( file1={"class": "File", "location": get_data("tests/wf/whale.txt")}, reverse=False) assert output['output']["checksum"] == "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376"
def test_partial_output(self): runtime_context = RuntimeContext() runtime_context.on_error = "continue" f = cwltool.factory.Factory(runtime_context=runtime_context) fail = f.make(get_data("tests/wf/wffail.cwl")) try: fail() except cwltool.factory.WorkflowStatus as e: self.assertEquals('sha1$e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e', e.out["out1"]["checksum"]) self.assertIsNone(e.out["out2"]) else: self.fail("Should have raised WorkflowStatus")
def test_sequential_workflow(self): test_file = "tests/wf/count-lines1-wf.cwl" executor = MultithreadedJobExecutor() runtime_context = RuntimeContext() runtime_context.select_resources = executor.select_resources factory = get_windows_safe_factory( executor=executor, runtime_context=runtime_context) echo = factory.make(get_data(test_file)) self.assertEqual( echo(file1={"class": "File", "location": get_data("tests/wf/whale.txt")}), {"count_output": 16})
def test_factory_partial_output(): runtime_context = RuntimeContext() runtime_context.on_error = "continue" factory = cwltool.factory.Factory(runtime_context=runtime_context) with pytest.raises(cwltool.factory.WorkflowStatus) as err_info: factory.make(get_data("tests/wf/wffail.cwl"))() err = err_info.value assert err.out["out1"][ "checksum"] == 'sha1$e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e' assert err.out["out2"] is None
def test_sequential_workflow(tmp_path: Path) -> None: test_file = "tests/wf/count-lines1-wf.cwl" executor = MultithreadedJobExecutor() runtime_context = RuntimeContext() runtime_context.outdir = str(tmp_path) runtime_context.select_resources = executor.select_resources factory = get_windows_safe_factory( executor=executor, runtime_context=runtime_context ) echo = factory.make(get_data(test_file)) file_contents = {"class": "File", "location": get_data("tests/wf/whale.txt")} assert echo(file1=file_contents) == {"count_output": 16}
def test_factory_partial_scatter(): runtime_context = RuntimeContext() runtime_context.on_error = "continue" factory = cwltool.factory.Factory(runtime_context=runtime_context) with pytest.raises(cwltool.factory.WorkflowStatus) as err_info: factory.make(get_data("tests/wf/scatterfail.cwl"))() err = err_info.value assert err.out["out"][0]["checksum"] == 'sha1$e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e' assert err.out["out"][1] is None assert err.out["out"][2]["checksum"] == 'sha1$a3db5c13ff90a36963278c6a39e4ee3c22e2a436'
def get_windows_safe_factory(runtime_context=None, # type: RuntimeContext loading_context=None, # type: LoadingContext executor=None # type: Any ): # type: (...) -> Factory if onWindows(): if not runtime_context: runtime_context = RuntimeContext() runtime_context.find_default_container = functools.partial( force_default_container, windows_default_container_id) runtime_context.use_container = True runtime_context.default_container = windows_default_container_id return Factory(executor, loading_context, runtime_context)
def test_partial_scatter(self): runtime_context = RuntimeContext() runtime_context.on_error = "continue" f = cwltool.factory.Factory(runtime_context=runtime_context) fail = f.make(get_data("tests/wf/scatterfail.cwl")) try: fail() except cwltool.factory.WorkflowStatus as e: self.assertEquals('sha1$e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e', e.out["out"][0]["checksum"]) self.assertIsNone(e.out["out"][1]) self.assertEquals('sha1$a3db5c13ff90a36963278c6a39e4ee3c22e2a436', e.out["out"][2]["checksum"]) else: self.fail("Should have raised WorkflowStatus")
def test_sequential_workflow(self): test_file = "tests/wf/count-lines1-wf.cwl" executor = MultithreadedJobExecutor() runtime_context = RuntimeContext() runtime_context.select_resources = executor.select_resources factory = get_windows_safe_factory(executor=executor, runtime_context=runtime_context) echo = factory.make(get_data(test_file)) self.assertEqual( echo(file1={ "class": "File", "location": get_data("tests/wf/whale.txt") }), {"count_output": 16})
def test_sequential_workflow(tmpdir): load_tool.loaders = {} test_file = "tests/wf/count-lines1-wf.cwl" executor = MultithreadedJobExecutor() runtime_context = RuntimeContext() runtime_context.outdir = str(tmpdir) runtime_context.select_resources = executor.select_resources factory = get_windows_safe_factory( executor=executor, runtime_context=runtime_context) echo = factory.make(get_data(test_file)) file_contents = {"class": "File", "location": get_data("tests/wf/whale.txt")} assert echo(file1=file_contents) == {"count_output": 16}
def test_factory_partial_scatter(): runtime_context = RuntimeContext() runtime_context.on_error = "continue" factory = cwltool.factory.Factory(runtime_context=runtime_context) with pytest.raises(cwltool.factory.WorkflowStatus) as err_info: factory.make(get_data("tests/wf/scatterfail.cwl"))() err = err_info.value assert err.out["out"][0][ "checksum"] == 'sha1$e5fa44f2b31c1fb553b6021e7360d07d5d91ff5e' assert err.out["out"][1] is None assert err.out["out"][2][ "checksum"] == 'sha1$a3db5c13ff90a36963278c6a39e4ee3c22e2a436'
def test_compute_checksum() -> None: runtime_context = RuntimeContext() runtime_context.compute_checksum = True runtime_context.use_container = onWindows() factory = cwltool.factory.Factory(runtime_context=runtime_context) echo = factory.make(get_data("tests/wf/cat-tool.cwl")) output = echo( file1={"class": "File", "location": get_data("tests/wf/whale.txt")}, reverse=False, ) assert isinstance(output, dict) result = output["output"] assert isinstance(result, dict) assert result["checksum"] == "sha1$327fc7aedf4f6b69a42a7c8b808dc5a7aff61376"
def test_commandLineTool_job_tmpdir_prefix(tmp_path: Path) -> None: """Test that non-docker enabled CommandLineTool respects temp directory directives.""" loading_context = LoadingContext({ "metadata": { "cwlVersion": INTERNAL_VERSION, "http://commonwl.org/cwltool#original_cwlVersion": INTERNAL_VERSION, } }) clt = CommandLineTool( cast( CommentedMap, cmap({ "cwlVersion": INTERNAL_VERSION, "class": "CommandLineTool", "inputs": [], "outputs": [], "requirements": [], }), ), loading_context, ) tmpdir_prefix = str(tmp_path / "1") tmp_outdir_prefix = str(tmp_path / "2") runtime_context = RuntimeContext({ "tmpdir_prefix": tmpdir_prefix, "tmp_outdir_prefix": tmp_outdir_prefix, }) job = next(clt.job({}, None, runtime_context)) assert isinstance(job, JobBase) assert job.stagedir and job.stagedir.startswith(tmpdir_prefix) assert job.tmpdir and job.tmpdir.startswith(tmpdir_prefix) assert job.outdir and job.outdir.startswith(tmp_outdir_prefix)
def test_check_version(): """ It is permitted to load without updating, but not execute. Attempting to execute without updating to the internal version should raise an error. """ joborder = {"inp": "abc"} loadingContext = LoadingContext({"do_update": True}) tool = load_tool(get_data("tests/echo.cwl"), loadingContext) for j in tool.job(joborder, None, RuntimeContext()): pass loadingContext = LoadingContext({"do_update": False}) tool = load_tool(get_data("tests/echo.cwl"), loadingContext) with pytest.raises(WorkflowException): for j in tool.job(joborder, None, RuntimeContext()): pass
def execute(self) -> Tuple[UUID, Dict, Optional[Exception]]: """ :return: Run ID, dict with new files, exception if there is any """ run_id = uuid4() runtime_context = RuntimeContext() runtime_context.outdir = self.file_manager.ROOT_DIRECTORY runtime_context.basedir = self.file_manager.ROOT_DIRECTORY runtime_context.default_stdin = subprocess.DEVNULL runtime_context.default_stdout = subprocess.DEVNULL runtime_context.default_stderr = subprocess.DEVNULL os.chdir(self.file_manager.ROOT_DIRECTORY) factory = Factory(runtime_context=runtime_context) executable = factory.make(self._workflow_path) data = {} for data_file in self._data_paths: with open(data_file) as f: new_data = yaml.load(f, Loader=yaml.Loader) data = {**new_data, **data} try: result: Dict = executable(**data) return run_id, result, None except Exception as e: traceback.print_exc(file=sys.stderr) return run_id, {}, e
def test_replace_default_stdout_stderr(): import sys # break stdout & stderr original_stdout = sys.stdout original_stderr = sys.stderr sys.stdout = '' sys.stderr = '' runtime_context = RuntimeContext() runtime_context.default_stdout = subprocess.DEVNULL runtime_context.default_stderr = subprocess.DEVNULL factory = get_windows_safe_factory(runtime_context=runtime_context) echo = factory.make(get_data("tests/echo.cwl")) assert echo(inp="foo") == {"out": "foo\n"} sys.stdout = original_stdout sys.stderr = original_stderr
def test_scatter_output_filenames(tmpdir: py.path.local) -> None: """If a scatter step produces identically named output then confirm that the final output is renamed correctly.""" cwd = tmpdir.chdir() rtc = RuntimeContext() rtc.outdir = str(cwd) factory = cwltool.factory.Factory(runtime_context=rtc) output_names = ["output.txt", "output.txt_2", "output.txt_3"] scatter_workflow = factory.make(get_data("tests/scatter_numbers.cwl")) result = scatter_workflow(range=3) assert isinstance(result, dict) assert "output" in result locations = sorted([element["location"] for element in result["output"]]) assert ( locations[0].endswith("output.txt") and locations[1].endswith("output.txt_2") and locations[2].endswith("output.txt_3") ), "Locations {} do not end with {}".format(locations, output_names)
def execute_workflow_step(workflow, task_id, job_data, cwl_args=None, executor=None): """ Constructs and executes single step workflow based on the "workflow" and "task_id". "cwl_args" can be used to update default parameters used for loading and runtime contexts. Exports json file with the execution results. """ cwl_args = {} if cwl_args is None else cwl_args executor = SingleJobExecutor() if executor is None else executor step_tmp_folder, step_cache_folder, step_outputs_folder, step_report = get_temp_folders( task_id=task_id, job_data=job_data) default_cwl_args = get_default_cwl_args(cwl_args) default_cwl_args.update({ # add execution specific parameters "tmp_outdir_prefix": step_cache_folder + "/", "tmpdir_prefix": step_cache_folder + "/", "cidfile_dir": step_tmp_folder, "cidfile_prefix": task_id, "basedir": os.getcwd( ), # job should already have abs path for inputs, so this is useless "outdir": step_outputs_folder }) workflow_step_path = os.path.join(step_tmp_folder, task_id + "_step_workflow.cwl") fast_cwl_step_load( # will save new worlflow to "workflow_step_path" workflow=workflow, target_id=task_id, cwl_args=default_cwl_args, location=workflow_step_path) _stderr = sys.stderr # to trick the logger sys.stderr = sys.__stderr__ step_outputs, step_status = executor( slow_cwl_load(workflow=workflow_step_path, cwl_args=default_cwl_args), job_data, RuntimeContext(default_cwl_args)) sys.stderr = _stderr if step_status != "success": raise ValueError # To remove "http://commonwl.org/cwltool#generation": 0 (copied from cwltool) visit_class(step_outputs, ("File", ), MutationManager().unset_generation) dump_json(step_outputs, step_report) return step_outputs, step_report
def main(): parser = arg_parser() parsed_args = parser.parse_args(sys.argv[1:]) # Load the requested parsl configuration if parsed_args.parsl == 'cori': parsl.load(cori_regular_config) elif parsed_args.parsl == 'cori-debug': parsl.load(cori_debug_config) else: parsl.load(threads_config) # Trigger the argparse message if the cwl file is missing # Otherwise cwltool will use the default argparser if not parsed_args.workflow: if os.path.isfile("CWLFile"): setattr(parsed_args, "workflow", "CWLFile") else: _logger.error("") _logger.error("CWL document required, no input file was provided") parser.print_help() sys.exit(1) elif not parsed_args.basedir: _logger.error("") _logger.error("Basedir is required for storing itermediate results") parser.print_help() sys.exit(1) rc = RuntimeContext(vars(parsed_args)) rc.shifter = False parsed_args.__dict__['parallel'] = True rc.tmpdir_prefix = rc.basedir + '/tmp/tmp' rc.tmp_outdir_prefix = rc.basedir + '/out/out' # type: Text if parsed_args.shifter: rc.shifter = True rc.docker_outdir = '/spooldir' rc.docker_stagedir = rc.basedir + '/stage' rc.docker_tmpdir = '/tmpdir' lc = LoadingContext(vars(parsed_args)) lc.construct_tool_object = customMakeTool sys.exit( cwltool.main.main(args=parsed_args, loadingContext=lc, runtimeContext=rc))
def test_replace_default_stdout_stderr() -> None: """Test our ability to replace the default stdout/err.""" import sys # break stdout & stderr original_stdout = sys.stdout original_stderr = sys.stderr sys.stdout = "" # type: ignore sys.stderr = "" # type: ignore runtime_context = RuntimeContext() runtime_context.default_stdout = subprocess.DEVNULL # type: ignore runtime_context.default_stderr = subprocess.DEVNULL # type: ignore factory = get_windows_safe_factory(runtime_context=runtime_context) echo = factory.make(get_data("tests/echo.cwl")) assert echo(inp="foo") == {"out": "foo\n"} sys.stdout = original_stdout sys.stderr = original_stderr
def get_windows_safe_factory( runtime_context: Optional[RuntimeContext] = None, loading_context: Optional[LoadingContext] = None, executor: Optional[JobExecutor] = None, ) -> Factory: if onWindows(): if not runtime_context: runtime_context = RuntimeContext() runtime_context.find_default_container = functools.partial( force_default_container, windows_default_container_id) runtime_context.use_container = True runtime_context.default_container = windows_default_container_id return Factory(executor, loading_context, runtime_context)
def test_default_docker_warning(self,mock_logger,mock_windows): class TestCommandLineTool(CommandLineTool): def __init__(self, **kwargs): self.requirements=[] self.hints=[] def find_default_container(self, args, builder): return windows_default_container_id TestObject = TestCommandLineTool() TestObject.make_job_runner(RuntimeContext({ "find_default_container": lambda x: "frolvlad/alpine-bash"})) mock_logger.warning.assert_called_with(DEFAULT_CONTAINER_MSG, windows_default_container_id, windows_default_container_id)
def load_job(workflow, job, cwl_args=None, cwd=None): """ Tries to load json object from "job". If failed, assumes that "job" has been already parsed into Object. Inits loaded "job_data" based on the "workflow" (mostly for setting defaults from the workflow inputs; never fails). "cwl_args" can be used to update parameters for loading and runtime contexts. If "job" was file, resolves relative paths based on the job file location. If "job" was already parsed into Object, resolves relative paths based on "cwd". If "cwd" was None uses "inputs_folder" value from "cwl_args" or its default value returned from "get_default_cwl_args" function. Checking links after relative paths are resolved is disabled (checklinks is set to False in both places). This will prevent rasing an exception by schema salad in those cases when an input file will be created from the provided content during workflow execution. Always returns CommentedMap """ cwl_args = {} if cwl_args is None else cwl_args default_cwl_args = get_default_cwl_args(cwl_args) cwd = default_cwl_args["inputs_folder"] if cwd is None else cwd loading_context = setup_loadingContext( LoadingContext(default_cwl_args), RuntimeContext(default_cwl_args), argparse.Namespace(**default_cwl_args)) job_copy = deepcopy(job) try: job_data, _ = loading_context.loader.resolve_ref(job_copy, checklinks=False) except (FileNotFoundError, SchemaSaladException) as err: job_data = load_yaml(json.dumps(job_copy)) job_data["id"] = file_uri(cwd) + "/" job_data, metadata = loading_context.loader.resolve_all( job_data, job_data["id"], checklinks=False) initialized_job_data = init_job_order( job_order_object=job_data, args=argparse.Namespace(**default_cwl_args), process=slow_cwl_load(workflow=workflow, cwl_args=default_cwl_args), loader=loading_context.loader, stdout=os.devnull) return initialized_job_data
def test_cuda_eval_resource_max() -> None: with open(get_data("cwltool/extensions-v1.1.yml")) as res: use_custom_schema("v1.2", "http://commonwl.org/cwltool", res.read()) joborder = {} # type: CWLObjectType loadingContext = LoadingContext({"do_update": True}) runtime_context = RuntimeContext({}) tool = load_tool(get_data("tests/wf/nvidia-smi-max.cwl"), loadingContext) builder = _makebuilder(tool.requirements[0]) builder.job = joborder resources = tool.evalResources(builder, runtime_context) assert resources["cudaDeviceCount"] == 4
def test_default_docker_warning(mocker: Any) -> None: """Check warning when default docker Container is used on Windows.""" mocker.patch("cwltool.command_line_tool._logger") tool = command_line_tool.CommandLineTool( cast(CommentedMap, cmap({"inputs": [], "outputs": []})), LoadingContext() ) tool.make_job_runner( RuntimeContext({"find_default_container": lambda x: "frolvlad/alpine-bash"}) ) command_line_tool._logger.warning.assert_called_with( # type: ignore command_line_tool.DEFAULT_CONTAINER_MSG, windows_default_container_id, windows_default_container_id, )
def mk_tool( schema: Names, opts: List[str], reqs: Optional[List[CommentedMap]] = None, hints: Optional[List[CommentedMap]] = None, ) -> Tuple[LoadingContext, RuntimeContext, CommentedMap]: tool = basetool.copy() if reqs is not None: tool["requirements"] = CommentedSeq(reqs) if hints is not None: tool["hints"] = CommentedSeq(hints) args = cwltool.argparser.arg_parser().parse_args(opts) args.enable_ext = True rc = RuntimeContext(vars(args)) lc = cwltool.main.setup_loadingContext(None, rc, args) lc.avsc_names = schema return lc, rc, tool
def test_regular_file() -> None: """Test that regular files do not raise any exception when they are checked in job._setup.""" clt = CommandLineTool( toolpath_object, loading_context, ) runtime_context = RuntimeContext() joborder: CWLObjectType = { "inp": { "class": "File", "location": get_data("tests/wf/whale.txt"), } } job = next(clt.job(joborder, None, runtime_context)) assert isinstance(job, JobBase) job._setup(runtime_context)
def test_default_docker_warning(mocker): mocker.patch("cwltool.command_line_tool.onWindows", return_value=True) mocker.patch("cwltool.command_line_tool._logger") class TestCommandLineTool(command_line_tool.CommandLineTool): def __init__(self, **kwargs): self.requirements = [] self.hints = [] def find_default_container(self, args, builder): return windows_default_container_id tool = TestCommandLineTool() tool.make_job_runner( RuntimeContext( {"find_default_container": lambda x: "frolvlad/alpine-bash"})) command_line_tool._logger.warning.assert_called_with( command_line_tool.DEFAULT_CONTAINER_MSG, windows_default_container_id, windows_default_container_id)