示例#1
0
def test_execution_failed_error():
    try:
        raise ExecutionFailedError(
            executor="foo",
            target="target",
            status="Failed",
            inputs={
                "a": 1,
                "b": "bar"
            },
            executor_stdout="dis happen",
            executor_stderr="dis an err\noopsy",
            failed_task="baz",
            failed_task_exit_status=1,
            failed_task_stdout="oh oh\nsomeone set us up the bomb",
            failed_task_stderr="beep boop beep\nI am except")
    except ExecutionFailedError as err:
        assert str(err) == textwrap.dedent(f"""
        foo failed with status Failed while running task baz of target:
            inputs:
                {{'a': 1, 'b': 'bar'}}
            executor_stdout:
                dis happen
            executor_stderr:
                dis an err
                oopsy
            failed_task_exit_status: 1
            failed_task_stdout:
                oh oh
                someone set us up the bomb
            failed_task_stderr:
                beep boop beep
                I am except
        """)
示例#2
0
    def _poll_until_terminal(self,
                             run_id: str,
                             target: str,
                             inputs_dict: Optional[dict] = None,
                             timeout: int = DEFAULT_POLLING_TIMEOUT):
        def get_status(status_url):
            with requests.get(status_url, auth=self._auth) as rsp:
                status_dict = self._resp_to_json(rsp, target, inputs_dict)
                return status_dict.get("status") in TERMINAL_STATES

        try:
            poll(get_status,
                 args=(f"{self._cromwell_api_url}/{run_id}/status", ),
                 step=DEFAULT_POLLING_STEP,
                 timeout=timeout)
        except PollingException:
            LOG.exception(f"Encountered timeout for run with id {run_id}")

            error_kwargs = {
                "executor": "cromwell-server",
                "target": target,
                "status": "Failed",
                "inputs": inputs_dict,
                "msg": f"Encountered timeout for run with id {run_id}",
            }

            raise ExecutionFailedError(**error_kwargs)
示例#3
0
    def _resp_to_json(resp, target=None, inputs_dict=None):
        if resp.ok:
            return resp.json()
        else:
            error_kwargs = {
                "executor": "cromwell-server",
                "target": target,
                "status": "Failed",
                "inputs": inputs_dict,
            }

            if resp.reason:
                error_kwargs["msg"] = resp.reason

            raise ExecutionFailedError(**error_kwargs)
示例#4
0
    def run_workflow(self,
                     wdl_path: Path,
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: The name of the workflow in the WDL script. If None,
                    the name of the WDL script is used (without the .wdl extension).
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.
                * imports_file: Path to the WDL imports file to use. Imports are
                    written to this file only if it doesn't exist.
                * java_args: Additional arguments to pass to Java runtime.
                * cromwell_args: Additional arguments to pass to `cromwell run`.

        Returns:
            Dict of outputs.

        Raises:
            ExecutionFailedError: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """
        target, is_task = get_target_name(wdl_path=wdl_path,
                                          import_dirs=self._import_dirs,
                                          **kwargs)
        if is_task:
            raise ValueError(
                "Cromwell cannot execute tasks independently of a workflow")

        inputs_dict, inputs_file = read_write_inputs(inputs_dict=inputs,
                                                     namespace=target)

        imports_file = self._get_workflow_imports(kwargs.get("imports_file"))
        inputs_arg = f"-i {inputs_file}" if inputs_file else ""
        imports_zip_arg = f"-p {imports_file}" if imports_file else ""
        java_args = kwargs.get("java_args", self.java_args) or ""
        cromwell_args = kwargs.get("cromwell_args", self._cromwell_args) or ""
        metadata_file = Path.cwd() / "metadata.json"

        cmd = (
            f"{self.java_bin} {java_args} -jar {self._cromwell_jar_file} run "
            f"-m {metadata_file} {cromwell_args} {inputs_arg} {imports_zip_arg} "
            f"{wdl_path}")
        LOG.info(f"Executing cromwell command '{cmd}' with inputs "
                 f"{json.dumps(inputs_dict, default=str)}")

        exe = subby.run(cmd, raise_on_error=False)

        metadata = None
        if metadata_file.exists():
            with open(metadata_file, "rt") as inp:
                metadata = json.load(inp)

        if exe.ok:
            if metadata:
                assert metadata["status"] == "Succeeded"
                outputs = metadata["outputs"]
            else:
                LOG.warning(
                    f"Cromwell command completed successfully but did not generate "
                    f"a metadata file at {metadata_file}")
                outputs = self._get_cromwell_outputs(exe.output)
        else:
            error_kwargs = {
                "executor": "cromwell",
                "target": target,
                "status": "Failed",
                "inputs": inputs_dict,
                "executor_stdout": exe.output,
                "executor_stderr": exe.error,
            }
            if metadata:
                failures = self._get_failures(metadata)
                if failures:
                    error_kwargs.update({
                        "failed_task":
                        failures.failed_task,
                        "failed_task_exit_status":
                        failures.failed_task_exit_status,
                        "failed_task_stdout":
                        failures.failed_task_stdout,
                        "failed_task_stderr":
                        failures.failed_task_stderr
                    })
                    if failures.num_failed > 1:
                        error_kwargs["msg"] = \
                            f"cromwell failed on {failures.num_failed} instances of " \
                            f"{failures.failed_task} of {target}; only " \
                            f"showing output from the first failed task"
                else:
                    error_kwargs[
                        "msg"] = f"cromwell failed on workflow {target}"
            else:
                error_kwargs["msg"] = \
                    f"Cromwell command failed but did not generate a metadata " \
                    f"file at {metadata_file}"

            raise ExecutionFailedError(**error_kwargs)

        if expected:
            self._validate_outputs(outputs, expected, target)

        return outputs
示例#5
0
    def run_workflow(self,
                     wdl_path: Path,
                     inputs: Optional[dict] = None,
                     expected: Optional[dict] = None,
                     **kwargs) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: Name of the workflow to run.
                * task_name: Name of the task to run if a workflow isn't defined.
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.

        Returns:
            Dict of outputs.

        Raises:
            Exception: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """

        wdl_doc = CLI.load(str(wdl_path),
                           path=[str(path) for path in self._import_dirs],
                           check_quant=kwargs.get("check_quant", True),
                           read_source=CLI.read_source)

        namespace, is_task = get_target_name(wdl_doc=wdl_doc, **kwargs)

        inputs_dict, inputs_file = read_write_inputs(
            inputs_dict=inputs,
            namespace=namespace if not is_task else None,
        )

        target, input_env, input_json = CLI.runner_input(
            doc=wdl_doc,
            inputs=[],
            input_file=str(inputs_file) if inputs_file else None,
            empty=[],
            task=namespace if is_task else None)

        logger = logging.getLogger("miniwdl-run")
        logger.setLevel(CLI.NOTICE_LEVEL)
        CLI.install_coloredlogs(logger)

        # initialize Docker
        client = docker.from_env()
        try:
            logger.debug("dockerd :: " + json.dumps(client.version())[1:-1])
            _util.initialize_local_docker(logger, client)
        finally:
            client.close()

        try:
            if isinstance(target, Tree.Task):
                entrypoint = runtime.run_local_task
            else:
                entrypoint = runtime.run_local_workflow

            rundir, output_env = entrypoint(target,
                                            input_env,
                                            copy_input_files=kwargs.get(
                                                "copy_input_files", False))
        except Error.EvalError as err:  # TODO: test errors
            MiniwdlExecutor.log_source(logger, err)
            raise
        except Error.RuntimeError as err:
            MiniwdlExecutor.log_source(logger, err)

            if isinstance(err, runtime.error.RunFailed):
                # This will be a workflow- or a task-level failure, depending on
                # whether a workflow or task was executed. If it is workflow-level,
                # we need to get the task-level error that caused the workflow to fail.
                if isinstance(err.exe, Tree.Workflow):
                    err = err.__cause__

                task_err = cast(runtime.error.RunFailed, err)
                cause = task_err.__cause__
                failed_task_exit_status = None
                failed_task_stderr = None
                if isinstance(cause, runtime.error.CommandFailed):
                    # If the task failed due to an error in the command, populate the
                    # command exit status and stderr.
                    cmd_err = cast(runtime.error.CommandFailed, cause)
                    failed_task_exit_status = cmd_err.exit_status
                    failed_task_stderr = MiniwdlExecutor.read_miniwdl_command_std(
                        cmd_err.stderr_file)

                raise ExecutionFailedError(
                    "miniwdl",
                    namespace,
                    status="Failed",
                    inputs=task_err.exe.inputs,
                    failed_task=task_err.exe.name,
                    failed_task_exit_status=failed_task_exit_status,
                    failed_task_stderr=failed_task_stderr) from err
            else:
                raise

        outputs = CLI.values_to_json(output_env, namespace=target.name)

        if expected:
            self._validate_outputs(outputs, expected, target.name)

        return outputs
示例#6
0
文件: dx.py 项目: ruchim/pytest-wdl
    def run_workflow(
        self,
        wdl_path: Path,
        inputs: Optional[dict] = None,
        expected: Optional[dict] = None,
        **kwargs
    ) -> dict:
        # TODO: handle "task_name" kwarg - run app instead of workflow
        wdl_doc = parse_wdl(wdl_path, self._import_dirs, **kwargs)

        if not wdl_doc.workflow:
            raise ValueError(
                "Currently, the dxWDL executor only supports executing "
                "workflows, not individual tasks"
            )

        workflow_name = wdl_doc.workflow.name

        if (
            "workflow_name" in kwargs
            and workflow_name != kwargs["workflow-name"]
        ):
            raise ValueError(
                f"The workflow name '{workflow_name}' does not match the value "
                f"of the 'workflow_name' parameter '{kwargs['workflow-name']}'"
            )

        namespace = kwargs.get("stage_id", "stage-common")
        inputs_dict = None

        if "inputs_file" in kwargs:
            inputs_file = ensure_path(kwargs["inputs_file"])

            if inputs_file.exists():
                with open(inputs_file, "rt") as inp:
                    inputs_dict = json.load(inp)

        if not inputs_dict:
            workflow_inputs = wdl_doc.workflow.available_inputs

            if workflow_inputs:
                dx_inputs_formatter = DxInputsFormatter(wdl_doc, **kwargs)
                inputs_dict = dx_inputs_formatter.format_inputs(inputs, namespace)
            else:
                inputs_dict = {}

        try:
            with login():
                workflow = self._resolve_workflow(wdl_path, workflow_name, kwargs)
                analysis = workflow.run(inputs_dict)

                try:
                    analysis.wait_on_done()

                    outputs = self._get_analysis_outputs(analysis, expected.keys())

                    if expected:
                        self._validate_outputs(outputs, expected, OUTPUT_STAGE)

                    return outputs
                except dxpy.exceptions.DXJobFailureError:
                    raise ExecutionFailedError(
                        "dxWDL",
                        workflow_name,
                        analysis.describe()["state"],
                        inputs_dict,
                        **self._get_failed_task(analysis)
                    )
                finally:
                    if self._cleanup_cache:
                        shutil.rmtree(self._dxwdl_cache_dir)
        except dxpy.exceptions.InvalidAuthentication as ierr:
            raise ExecutorError("dxwdl", "Invalid DNAnexus credentials/token") from ierr
        except dxpy.exceptions.ResourceNotFound as rerr:
            raise ExecutorError("dxwdl", "Required resource was not found") from rerr
        except dxpy.exceptions.PermissionDenied as perr:
            raise ExecutorError(
                "dxwdl", f"You must have at least CONTRIBUTE permission"
            ) from perr
示例#7
0
    def run_workflow(
        self,
        wdl_path: Path,
        inputs: Optional[dict] = None,
        expected: Optional[dict] = None,
        **kwargs,
    ) -> dict:
        """
        Run a WDL workflow on given inputs, and check that the output matches
        given expected values.

        Args:
            wdl_path: The WDL script to execute.
            inputs: Object that will be serialized to JSON and provided to Cromwell
                as the workflow inputs.
            expected: Dict mapping output parameter names to expected values.
            kwargs: Additional keyword arguments, mostly for debugging:
                * workflow_name: The name of the workflow in the WDL script. If None,
                    the name of the WDL script is used (without the .wdl extension).
                * inputs_file: Path to the Cromwell inputs file to use. Inputs are
                    written to this file only if it doesn't exist.
                * imports_file: Path to the WDL imports file to use. Imports are
                    written to this file only if it doesn't exist.
                * java_args: Additional arguments to pass to Java runtime.
                * cromwell_args: Additional arguments to pass to `cromwell run`.

        Returns:
            Dict of outputs.

        Raises:
            ExecutionFailedError: if there was an error executing Cromwell
            AssertionError: if the actual outputs don't match the expected outputs
        """
        target, is_task = get_target_name(wdl_path=wdl_path,
                                          import_dirs=self._import_dirs,
                                          **kwargs)

        if is_task:
            raise ValueError(
                "Cromwell cannot execute tasks independently of a workflow")

        inputs_dict, _ = read_write_inputs(
            inputs_file=kwargs.get("inputs_file"),
            inputs_dict=inputs,
            namespace=target,
            write_formatted_inputs=False)

        payload = {}
        payload_files = []

        def open_payload_file(path: Path, mode: str = "r") -> IO:
            open_file = open(path, mode)
            payload_files.append(open_file)
            return open_file

        try:
            payload["workflowSource"] = open_payload_file(wdl_path)

            if inputs_dict:
                payload["workflowInputs"] = json.dumps(inputs_dict,
                                                       default=str)

            imports_file = self._get_workflow_imports(
                self._import_dirs, kwargs.get("imports_file"))

            if imports_file:
                payload["workflowDependencies"] = open_payload_file(
                    imports_file, "rb")

            if self._cromwell_config_file:
                if isinstance(inputs_dict, dict):
                    payload["workflowOptions"] = json.dumps(
                        self._cromwell_config_file, default=str)
                else:
                    payload["workflowOptions"] = open_payload_file(
                        self._cromwell_config_file)

            LOG.info(
                f"Executing cromwell server '{self._cromwell_api_url}' with inputs "
                f"{json.dumps(inputs_dict, default=str)}")

            with requests.post(self._cromwell_api_url,
                               files=payload,
                               auth=self._auth) as resp:
                status_object = self._resp_to_json(resp, target, inputs_dict)
                run_id = status_object["id"]
                LOG.info(
                    f"Executing on cromwell with id {run_id}. Waiting until terminal "
                    f"state is reached")
        finally:
            for fh in payload_files:
                try:
                    fh.close()
                except:
                    LOG.exception("Error closing file %s", fh)

        self._poll_until_terminal(
            run_id, target, inputs_dict,
            kwargs.get("timeout", DEFAULT_POLLING_TIMEOUT))

        metadata_url = f"{self._cromwell_api_url}/{run_id}/metadata"
        outputs = None

        with requests.get(metadata_url, auth=self._auth) as metadata_response:
            metadata = self._resp_to_json(metadata_response, target,
                                          inputs_dict)

            if metadata["status"] == "Succeeded":
                outputs = metadata["outputs"]
            else:
                error_kwargs = {
                    "executor": "cromwell",
                    "target": target,
                    "status": "Failed",
                    "inputs": inputs_dict,
                }
                self._parse_metadata_errors(metadata,
                                            target=target,
                                            error_kwargs=error_kwargs)
                raise ExecutionFailedError(**error_kwargs)

        if expected:
            self._validate_outputs(outputs, expected, target)

        return outputs