def test_execution_failed_error(): try: raise ExecutionFailedError( executor="foo", target="target", status="Failed", inputs={ "a": 1, "b": "bar" }, executor_stdout="dis happen", executor_stderr="dis an err\noopsy", failed_task="baz", failed_task_exit_status=1, failed_task_stdout="oh oh\nsomeone set us up the bomb", failed_task_stderr="beep boop beep\nI am except") except ExecutionFailedError as err: assert str(err) == textwrap.dedent(f""" foo failed with status Failed while running task baz of target: inputs: {{'a': 1, 'b': 'bar'}} executor_stdout: dis happen executor_stderr: dis an err oopsy failed_task_exit_status: 1 failed_task_stdout: oh oh someone set us up the bomb failed_task_stderr: beep boop beep I am except """)
def _poll_until_terminal(self, run_id: str, target: str, inputs_dict: Optional[dict] = None, timeout: int = DEFAULT_POLLING_TIMEOUT): def get_status(status_url): with requests.get(status_url, auth=self._auth) as rsp: status_dict = self._resp_to_json(rsp, target, inputs_dict) return status_dict.get("status") in TERMINAL_STATES try: poll(get_status, args=(f"{self._cromwell_api_url}/{run_id}/status", ), step=DEFAULT_POLLING_STEP, timeout=timeout) except PollingException: LOG.exception(f"Encountered timeout for run with id {run_id}") error_kwargs = { "executor": "cromwell-server", "target": target, "status": "Failed", "inputs": inputs_dict, "msg": f"Encountered timeout for run with id {run_id}", } raise ExecutionFailedError(**error_kwargs)
def _resp_to_json(resp, target=None, inputs_dict=None): if resp.ok: return resp.json() else: error_kwargs = { "executor": "cromwell-server", "target": target, "status": "Failed", "inputs": inputs_dict, } if resp.reason: error_kwargs["msg"] = resp.reason raise ExecutionFailedError(**error_kwargs)
def run_workflow(self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: The name of the workflow in the WDL script. If None, the name of the WDL script is used (without the .wdl extension). * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. * imports_file: Path to the WDL imports file to use. Imports are written to this file only if it doesn't exist. * java_args: Additional arguments to pass to Java runtime. * cromwell_args: Additional arguments to pass to `cromwell run`. Returns: Dict of outputs. Raises: ExecutionFailedError: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ target, is_task = get_target_name(wdl_path=wdl_path, import_dirs=self._import_dirs, **kwargs) if is_task: raise ValueError( "Cromwell cannot execute tasks independently of a workflow") inputs_dict, inputs_file = read_write_inputs(inputs_dict=inputs, namespace=target) imports_file = self._get_workflow_imports(kwargs.get("imports_file")) inputs_arg = f"-i {inputs_file}" if inputs_file else "" imports_zip_arg = f"-p {imports_file}" if imports_file else "" java_args = kwargs.get("java_args", self.java_args) or "" cromwell_args = kwargs.get("cromwell_args", self._cromwell_args) or "" metadata_file = Path.cwd() / "metadata.json" cmd = ( f"{self.java_bin} {java_args} -jar {self._cromwell_jar_file} run " f"-m {metadata_file} {cromwell_args} {inputs_arg} {imports_zip_arg} " f"{wdl_path}") LOG.info(f"Executing cromwell command '{cmd}' with inputs " f"{json.dumps(inputs_dict, default=str)}") exe = subby.run(cmd, raise_on_error=False) metadata = None if metadata_file.exists(): with open(metadata_file, "rt") as inp: metadata = json.load(inp) if exe.ok: if metadata: assert metadata["status"] == "Succeeded" outputs = metadata["outputs"] else: LOG.warning( f"Cromwell command completed successfully but did not generate " f"a metadata file at {metadata_file}") outputs = self._get_cromwell_outputs(exe.output) else: error_kwargs = { "executor": "cromwell", "target": target, "status": "Failed", "inputs": inputs_dict, "executor_stdout": exe.output, "executor_stderr": exe.error, } if metadata: failures = self._get_failures(metadata) if failures: error_kwargs.update({ "failed_task": failures.failed_task, "failed_task_exit_status": failures.failed_task_exit_status, "failed_task_stdout": failures.failed_task_stdout, "failed_task_stderr": failures.failed_task_stderr }) if failures.num_failed > 1: error_kwargs["msg"] = \ f"cromwell failed on {failures.num_failed} instances of " \ f"{failures.failed_task} of {target}; only " \ f"showing output from the first failed task" else: error_kwargs[ "msg"] = f"cromwell failed on workflow {target}" else: error_kwargs["msg"] = \ f"Cromwell command failed but did not generate a metadata " \ f"file at {metadata_file}" raise ExecutionFailedError(**error_kwargs) if expected: self._validate_outputs(outputs, expected, target) return outputs
def run_workflow(self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: Name of the workflow to run. * task_name: Name of the task to run if a workflow isn't defined. * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. Returns: Dict of outputs. Raises: Exception: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ wdl_doc = CLI.load(str(wdl_path), path=[str(path) for path in self._import_dirs], check_quant=kwargs.get("check_quant", True), read_source=CLI.read_source) namespace, is_task = get_target_name(wdl_doc=wdl_doc, **kwargs) inputs_dict, inputs_file = read_write_inputs( inputs_dict=inputs, namespace=namespace if not is_task else None, ) target, input_env, input_json = CLI.runner_input( doc=wdl_doc, inputs=[], input_file=str(inputs_file) if inputs_file else None, empty=[], task=namespace if is_task else None) logger = logging.getLogger("miniwdl-run") logger.setLevel(CLI.NOTICE_LEVEL) CLI.install_coloredlogs(logger) # initialize Docker client = docker.from_env() try: logger.debug("dockerd :: " + json.dumps(client.version())[1:-1]) _util.initialize_local_docker(logger, client) finally: client.close() try: if isinstance(target, Tree.Task): entrypoint = runtime.run_local_task else: entrypoint = runtime.run_local_workflow rundir, output_env = entrypoint(target, input_env, copy_input_files=kwargs.get( "copy_input_files", False)) except Error.EvalError as err: # TODO: test errors MiniwdlExecutor.log_source(logger, err) raise except Error.RuntimeError as err: MiniwdlExecutor.log_source(logger, err) if isinstance(err, runtime.error.RunFailed): # This will be a workflow- or a task-level failure, depending on # whether a workflow or task was executed. If it is workflow-level, # we need to get the task-level error that caused the workflow to fail. if isinstance(err.exe, Tree.Workflow): err = err.__cause__ task_err = cast(runtime.error.RunFailed, err) cause = task_err.__cause__ failed_task_exit_status = None failed_task_stderr = None if isinstance(cause, runtime.error.CommandFailed): # If the task failed due to an error in the command, populate the # command exit status and stderr. cmd_err = cast(runtime.error.CommandFailed, cause) failed_task_exit_status = cmd_err.exit_status failed_task_stderr = MiniwdlExecutor.read_miniwdl_command_std( cmd_err.stderr_file) raise ExecutionFailedError( "miniwdl", namespace, status="Failed", inputs=task_err.exe.inputs, failed_task=task_err.exe.name, failed_task_exit_status=failed_task_exit_status, failed_task_stderr=failed_task_stderr) from err else: raise outputs = CLI.values_to_json(output_env, namespace=target.name) if expected: self._validate_outputs(outputs, expected, target.name) return outputs
def run_workflow( self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs ) -> dict: # TODO: handle "task_name" kwarg - run app instead of workflow wdl_doc = parse_wdl(wdl_path, self._import_dirs, **kwargs) if not wdl_doc.workflow: raise ValueError( "Currently, the dxWDL executor only supports executing " "workflows, not individual tasks" ) workflow_name = wdl_doc.workflow.name if ( "workflow_name" in kwargs and workflow_name != kwargs["workflow-name"] ): raise ValueError( f"The workflow name '{workflow_name}' does not match the value " f"of the 'workflow_name' parameter '{kwargs['workflow-name']}'" ) namespace = kwargs.get("stage_id", "stage-common") inputs_dict = None if "inputs_file" in kwargs: inputs_file = ensure_path(kwargs["inputs_file"]) if inputs_file.exists(): with open(inputs_file, "rt") as inp: inputs_dict = json.load(inp) if not inputs_dict: workflow_inputs = wdl_doc.workflow.available_inputs if workflow_inputs: dx_inputs_formatter = DxInputsFormatter(wdl_doc, **kwargs) inputs_dict = dx_inputs_formatter.format_inputs(inputs, namespace) else: inputs_dict = {} try: with login(): workflow = self._resolve_workflow(wdl_path, workflow_name, kwargs) analysis = workflow.run(inputs_dict) try: analysis.wait_on_done() outputs = self._get_analysis_outputs(analysis, expected.keys()) if expected: self._validate_outputs(outputs, expected, OUTPUT_STAGE) return outputs except dxpy.exceptions.DXJobFailureError: raise ExecutionFailedError( "dxWDL", workflow_name, analysis.describe()["state"], inputs_dict, **self._get_failed_task(analysis) ) finally: if self._cleanup_cache: shutil.rmtree(self._dxwdl_cache_dir) except dxpy.exceptions.InvalidAuthentication as ierr: raise ExecutorError("dxwdl", "Invalid DNAnexus credentials/token") from ierr except dxpy.exceptions.ResourceNotFound as rerr: raise ExecutorError("dxwdl", "Required resource was not found") from rerr except dxpy.exceptions.PermissionDenied as perr: raise ExecutorError( "dxwdl", f"You must have at least CONTRIBUTE permission" ) from perr
def run_workflow( self, wdl_path: Path, inputs: Optional[dict] = None, expected: Optional[dict] = None, **kwargs, ) -> dict: """ Run a WDL workflow on given inputs, and check that the output matches given expected values. Args: wdl_path: The WDL script to execute. inputs: Object that will be serialized to JSON and provided to Cromwell as the workflow inputs. expected: Dict mapping output parameter names to expected values. kwargs: Additional keyword arguments, mostly for debugging: * workflow_name: The name of the workflow in the WDL script. If None, the name of the WDL script is used (without the .wdl extension). * inputs_file: Path to the Cromwell inputs file to use. Inputs are written to this file only if it doesn't exist. * imports_file: Path to the WDL imports file to use. Imports are written to this file only if it doesn't exist. * java_args: Additional arguments to pass to Java runtime. * cromwell_args: Additional arguments to pass to `cromwell run`. Returns: Dict of outputs. Raises: ExecutionFailedError: if there was an error executing Cromwell AssertionError: if the actual outputs don't match the expected outputs """ target, is_task = get_target_name(wdl_path=wdl_path, import_dirs=self._import_dirs, **kwargs) if is_task: raise ValueError( "Cromwell cannot execute tasks independently of a workflow") inputs_dict, _ = read_write_inputs( inputs_file=kwargs.get("inputs_file"), inputs_dict=inputs, namespace=target, write_formatted_inputs=False) payload = {} payload_files = [] def open_payload_file(path: Path, mode: str = "r") -> IO: open_file = open(path, mode) payload_files.append(open_file) return open_file try: payload["workflowSource"] = open_payload_file(wdl_path) if inputs_dict: payload["workflowInputs"] = json.dumps(inputs_dict, default=str) imports_file = self._get_workflow_imports( self._import_dirs, kwargs.get("imports_file")) if imports_file: payload["workflowDependencies"] = open_payload_file( imports_file, "rb") if self._cromwell_config_file: if isinstance(inputs_dict, dict): payload["workflowOptions"] = json.dumps( self._cromwell_config_file, default=str) else: payload["workflowOptions"] = open_payload_file( self._cromwell_config_file) LOG.info( f"Executing cromwell server '{self._cromwell_api_url}' with inputs " f"{json.dumps(inputs_dict, default=str)}") with requests.post(self._cromwell_api_url, files=payload, auth=self._auth) as resp: status_object = self._resp_to_json(resp, target, inputs_dict) run_id = status_object["id"] LOG.info( f"Executing on cromwell with id {run_id}. Waiting until terminal " f"state is reached") finally: for fh in payload_files: try: fh.close() except: LOG.exception("Error closing file %s", fh) self._poll_until_terminal( run_id, target, inputs_dict, kwargs.get("timeout", DEFAULT_POLLING_TIMEOUT)) metadata_url = f"{self._cromwell_api_url}/{run_id}/metadata" outputs = None with requests.get(metadata_url, auth=self._auth) as metadata_response: metadata = self._resp_to_json(metadata_response, target, inputs_dict) if metadata["status"] == "Succeeded": outputs = metadata["outputs"] else: error_kwargs = { "executor": "cromwell", "target": target, "status": "Failed", "inputs": inputs_dict, } self._parse_metadata_errors(metadata, target=target, error_kwargs=error_kwargs) raise ExecutionFailedError(**error_kwargs) if expected: self._validate_outputs(outputs, expected, target) return outputs