示例#1
0
def test_map_wps_output_location_duplicate_subdir():
    for tmp_dir in ["/tmp/tmp/tmp", "/tmp/tmpdir"]:
        wps_out = "http:///localhost/wps-output/tmp"
        settings = {
            "weaver.wps_output_dir": tmp_dir,
            "weaver.wps_output_url": wps_out
        }
        path = map_wps_output_location(f"{wps_out}/tmp/some-file-tmp.tmp",
                                       settings,
                                       exists=False)
        assert path == f"{tmp_dir}/tmp/some-file-tmp.tmp"

        path = map_wps_output_location(f"{tmp_dir}/here/some-file-tmp.tmp",
                                       settings,
                                       exists=False,
                                       reverse=True)
        assert path == f"{wps_out}/here/some-file-tmp.tmp"
示例#2
0
    def stage_results(self, results, expected_outputs, out_dir):
        # type: (JobResults, CWL_ExpectedOutputs, str) -> None
        """
        Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory.

        This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`.

        .. note::
            The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``,
            but this definition could be a glob pattern to match multiple file and/or nested directories.
            We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``).
        """
        for result in results:
            res_id = get_any_id(result)
            if res_id not in expected_outputs:
                continue

            # plan ahead when list of multiple output values could be supported
            result_values = get_any_value(result)
            if not isinstance(result_values, list):
                result_values = [result_values]
            cwl_out_dir = out_dir.rstrip("/")
            for value in result_values:
                src_name = value.split("/")[-1]
                dst_path = "/".join([cwl_out_dir, src_name])
                # performance improvement:
                #   Bypass download if file can be resolved as local resource (already fetched or same server).
                #   Because CWL expects the file to be in specified 'out_dir', make a link for it to be found
                #   even though the file is stored in the full job output location instead (already staged by step).
                map_path = map_wps_output_location(value, self.settings)
                as_link = False
                if map_path:
                    LOGGER.info(
                        "Detected result [%s] from [%s] as local reference to this instance. "
                        "Skipping fetch and using local copy in output destination: [%s]",
                        res_id, value, dst_path)
                    LOGGER.debug("Mapped result [%s] to local reference: [%s]",
                                 value, map_path)
                    src_path = map_path
                    as_link = True
                else:
                    LOGGER.info(
                        "Fetching result [%s] from [%s] to CWL output destination: [%s]",
                        res_id, value, dst_path)
                    src_path = value
                fetch_file(src_path,
                           cwl_out_dir,
                           settings=self.settings,
                           link=as_link)
示例#3
0
def test_map_wps_output_location_exists():
    wps_url = "http:///localhost/wps-output/tmp"
    wps_dir = "/tmp/weaver-test/test-outputs"
    settings = {
        "weaver.wps_output_dir": wps_dir,
        "weaver.wps_output_url": wps_url
    }
    try:
        os.makedirs(wps_dir, exist_ok=True)
        with tempfile.NamedTemporaryFile(dir=wps_dir,
                                         suffix="test.txt",
                                         mode="w") as tmp_file:
            tmp_file.write("fake data")
            tmp_file.flush()
            tmp_file.seek(0)
            tmp_path = tmp_file.name
            tmp_name = os.path.split(tmp_file.name)[-1]
            tmp_http = f"{wps_url}/{tmp_name}"
            assert os.path.isfile(tmp_path), "failed setup test file"

            path = map_wps_output_location(tmp_http, settings, exists=True)
            assert path == tmp_path
            path = map_wps_output_location(tmp_http, settings, exists=False)
            assert path == tmp_path

            path = map_wps_output_location(tmp_path,
                                           settings,
                                           exists=True,
                                           reverse=True)
            assert path == tmp_http
            path = map_wps_output_location(tmp_path,
                                           settings,
                                           exists=False,
                                           reverse=True)
            assert path == tmp_http

        assert not os.path.isfile(
            tmp_path), "test file expected to be auto-cleaned"

        path = map_wps_output_location(tmp_http, settings, exists=True)
        assert path is None
        path = map_wps_output_location(tmp_http, settings, exists=False)
        assert path == tmp_path

        path = map_wps_output_location(tmp_path,
                                       settings,
                                       exists=True,
                                       reverse=True)
        assert path is None
        path = map_wps_output_location(tmp_path,
                                       settings,
                                       exists=False,
                                       reverse=True)
        assert path == tmp_http

    except AssertionError:
        raise
    except Exception as exc:
        pytest.fail(f"Failed due to unexpected exception: [{exc}]")
    finally:
        shutil.rmtree(wps_dir, ignore_errors=True)
示例#4
0
    def stage_results(self, results, expected_outputs, out_dir):
        # type: (JobResults, CWL_ExpectedOutputs, str) -> None
        """
        Retrieves the remote execution :term:`Job` results for staging locally into the specified output directory.

        This operation should be called by the implementing remote :term:`Process` definition after :meth:`execute`.

        .. note::
            The :term:`CWL` runner expects the output file(s) to be written matching definition in ``expected_outputs``,
            but this definition could be a glob pattern to match multiple file and/or nested directories.
            We cannot rely on specific file names to be mapped, since glob can match many (eg: ``"*.txt"``).

        .. seealso::
            Function :func:`weaver.processes.convert.any2cwl_io` defines a generic glob pattern using the output ID
            and expected file extension based on Content-Type format. Since the remote :term:`WPS` :term:`Process`
            doesn't necessarily produces file names with the output ID as expected to find them (could be anything),
            staging must patch locations to let :term:`CWL` runtime resolve the files according to glob definitions.

        .. warning::
            Only remote :term:`Provider` implementations (which auto-generate a pseudo :term:`CWL` to map components)
            that produce outputs with inconsistent file names as described above should set attribute
            :attr:`WpsProcessInterface.stage_output_id_nested` accordingly. For :term:`Process` that directly provide
            an actual :term:`CWL` :term:`Application Package` definition (e.g.: Docker application), auto-mapping
            of glob patterns should be avoided, as it is expected that the :term:`CWL` contains real mapping to be
            respected for correct execution and retrieval of outputs from the application.
        """
        for result in results:
            res_id = get_any_id(result)
            if res_id not in expected_outputs:
                continue

            # plan ahead when list of multiple output values could be supported
            result_values = get_any_value(result)
            if not isinstance(result_values, list):
                result_values = [result_values]
            if self.stage_output_id_nested:
                cwl_out_dir = "/".join([out_dir.rstrip("/"), res_id])
            else:
                cwl_out_dir = out_dir.rstrip("/")
            os.makedirs(cwl_out_dir, mode=0o700, exist_ok=True)
            for value in result_values:
                src_name = value.split("/")[-1]
                dst_path = "/".join([cwl_out_dir, src_name])
                # performance improvement:
                #   Bypass download if file can be resolved as local resource (already fetched or same server).
                #   Because CWL expects the file to be in specified 'out_dir', make a link for it to be found
                #   even though the file is stored in the full job output location instead (already staged by step).
                map_path = map_wps_output_location(value, self.settings)
                as_link = False
                if map_path:
                    LOGGER.info(
                        "Detected result [%s] from [%s] as local reference to this instance. "
                        "Skipping fetch and using local copy in output destination: [%s]",
                        res_id, value, dst_path)
                    LOGGER.debug("Mapped result [%s] to local reference: [%s]",
                                 value, map_path)
                    src_path = map_path
                    as_link = True
                else:
                    LOGGER.info(
                        "Fetching result [%s] from [%s] to CWL output destination: [%s]",
                        res_id, value, dst_path)
                    src_path = value
                fetch_file(src_path,
                           cwl_out_dir,
                           settings=self.settings,
                           link=as_link)
示例#5
0
文件: utils.py 项目: crim-ca/weaver
def get_job_results_response(job, container, headers=None):
    # type: (Job, AnySettingsContainer, Optional[AnyHeadersContainer]) -> AnyResponseType
    """
    Generates the :term:`OGC` compliant :term:`Job` results response according to submitted execution parameters.

    Parameters that impact the format of the response are:
        - Amount of outputs to be returned.
        - Parameter ``response: raw|document``
        - Parameter ``transmissionMode: value|reference`` per output if ``response: raw``.

    .. seealso::
        More details available for each combination:
        - https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response
        - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7

    :param job: Job for which to generate the results response.
    :param container: Application settings.
    :param headers: Additional headers to provide in the response.
    """
    raise_job_dismissed(job, container)
    raise_job_bad_status(job, container)

    # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw'
    # See:
    #   - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document)
    #   - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document
    is_raw = job.execution_response == ExecuteResponse.RAW
    results, refs = get_results(
        job,
        container,
        value_key="value",
        schema=JobInputsOutputsSchema.
        OGC,  # not strict to provide more format details
        link_references=is_raw)
    headers = headers or {}
    if "location" not in headers:
        headers["Location"] = job.status_url(container)

    if not is_raw:
        # note:
        #   Cannot add "links" field in response body because variable Output ID keys are directly at the root
        #   Possible conflict with an output that would be named "links".
        results = sd.Result().deserialize(results)
        return HTTPOk(json=results, headers=headers)

    if not results:  # avoid schema validation error if all by reference
        # Status code 204 for empty body
        # see:
        #   - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref
        refs.extend(headers.items())
        return HTTPNoContent(headers=refs)

    # raw response can be data-only value, link-only or a mix of them
    if results:
        # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one
        out_vals = list(results.items(
        ))  # type: List[Tuple[str, ExecutionResultValue]]  # noqa
        out_info = out_vals[0][-1]  # type: ExecutionResultValue
        out_type = get_any_value(out_info, key=True)
        out_data = get_any_value(out_info)

        # FIXME: https://github.com/crim-ca/weaver/issues/376
        #  implement multipart, both for multi-output IDs and array-output under same ID
        if len(results) > 1 or (isinstance(out_data, list)
                                and len(out_data) > 1):
            # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi
            raise HTTPNotImplemented(
                json={
                    "code":
                    "NotImplemented",
                    "type":
                    "NotImplemented",
                    "detail":
                    "Multipart results with 'transmissionMode=value' and 'response=raw' not implemented.",
                })

        # single value only
        out_data = out_data[0] if isinstance(out_data, list) else out_data
        if out_type == "href":
            out_path = map_wps_output_location(out_data,
                                               container,
                                               exists=True,
                                               url=False)
            out_type = out_info.get("type")  # noqa
            out_headers = get_file_headers(out_path,
                                           download_headers=True,
                                           content_headers=True,
                                           content_type=out_type)
            resp = FileResponse(out_path)
            resp.headers.update(out_headers)
            resp.headers.update(headers)
        else:
            resp = HTTPOk(body=out_data,
                          charset="UTF-8",
                          content_type=ContentType.TEXT_PLAIN,
                          headers=headers)
    else:
        resp = HTTPOk(headers=headers)
    if refs:
        # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref
        # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi
        resp.headerlist.extend(refs)
    return resp