def format_response_details(response, request): # type: (Response, Request) -> HTTPException http_response = function(request) http_headers = get_header("Content-Type", http_response.headers) or [] req_headers = get_header("Accept", request.headers) or [] if any([CONTENT_TYPE_APP_JSON in http_headers, CONTENT_TYPE_APP_JSON in req_headers]): body = OWSException.json_formatter(http_response.status, response.message or "", http_response.title, request.environ) body["detail"] = get_request_info(request) http_response._json = body if http_response.status_code != response.status_code: raise http_response # re-raise if code was fixed return http_response
def get_vault_auth(request): # type: (Request) -> Tuple[AnyUUID, Optional[str]] """ Obtain the requested file reference and parsed access token from the :term:`Vault` authorization header. :param request: Request containing reference file UUID and authorization headers. :return: Extracted file reference and authentication token. :raises: Appropriate HTTP exception according to use case. """ try: file_id = request.matchdict.get("file_id") file_id = sd.VaultFileID().deserialize(file_id) except colander.Invalid as ex: raise HTTPBadRequest( json={ "code": "VaultInvalidParameter", "description": sd.BadRequestVaultFileAccessResponse.description, "error": colander.Invalid.__name__, "cause": str(ex), "value": repr_json(ex.value or dict(request.matchdict), force_string=False), }) auth = get_header(sd.XAuthVaultFileHeader.name, request.headers) return file_id, auth
def get_wps_output_context(request): # type: (AnyRequestType) -> Optional[str] """ Obtains and validates allowed values for sub-directory context of WPS outputs in header ``X-WPS-Output-Context``. :raises HTTPUnprocessableEntity: if the header was provided an contains invalid or illegal value. :returns: validated context or None if not specified. """ headers = getattr(request, "headers", {}) ctx = get_header(sd.WpsOutputContextHeader.name, headers) if not ctx: settings = get_settings(request) ctx_default = settings.get("weaver.wps_output_context", None) if not ctx_default: return None LOGGER.debug("Using default 'wps.wps_output_context': %s", ctx_default) ctx = ctx_default cxt_found = re.match(r"^(?=[\w-]+)([\w-]+/?)+$", ctx) if cxt_found and cxt_found[0] == ctx: ctx_matched = ctx[:-1] if ctx.endswith("/") else ctx LOGGER.debug("Using request 'X-WPS-Output-Context': %s", ctx_matched) return ctx_matched raise HTTPUnprocessableEntity( json={ "code": "InvalidHeaderValue", "name": sd.WpsOutputContextHeader.name, "description": "Provided value for 'X-WPS-Output-Context' request header is invalid.", "cause": "Value must be an alphanumeric context directory or tree hierarchy of sub-directory names.", "value": str(ctx) })
def get_wps_client(url, container=None, verify=None, headers=None, language=None): # type: (str, Optional[AnySettingsContainer], bool, Optional[HeadersType], Optional[str]) -> WebProcessingService """ Obtains a :class:`WebProcessingService` with pre-configured request options for the given URL. :param url: WPS URL location. :param container: request or settings container to retrieve headers and other request options. :param verify: flag to enable SSL verification (overrides request options from container). :param headers: specific headers to apply (overrides retrieved ones from container). :param language: preferred response language if supported by the service. :returns: created WPS client object with configured request options. """ if headers is None and hasattr(container, "headers"): headers = container.headers else: headers = headers or {} # remove invalid values that should be recomputed by the client as needed # employ the provided headers instead of making new ones in order to forward any language/authorization definition # copy to avoid modify original headers for sub-requests for next steps that could use them # employ dict() rather than deepcopy since headers that can be an instance of EnvironHeaders cannot be serialized headers = dict(headers) for header in [ "Accept", "Content-Length", "Content-Type", "Content-Transfer-Encoding" ]: hdr_low = header.lower() for hdr in [ header, hdr_low, header.replace("-", "_"), hdr_low.replace("-", "_") ]: headers.pop(hdr, None) opts = get_request_options("get", url, container) if verify is None: verify = get_ssl_verify_option("get", url, container, request_options=opts) # convert objects to allow caching keys against values (object instances always different) language = language or getattr(container, "accept_language", None) or get_header( "Accept-Language", headers) if language is not None and not isinstance(language, str): language = str(language) if headers is not None and not isinstance(headers, dict): headers = dict(headers) request_args = (url, headers, verify, language) if get_no_cache_option(headers, request_options=opts): for func in (_get_wps_client_cached, _describe_process_cached): caching_args = (func, "request", *request_args) invalidate_region(caching_args) wps = _get_wps_client_cached(*request_args) return wps
def _submit_job(self, wps_request): # type: (WPSRequest) -> Union[WPSResponse, HTTPValid, JSON] """ Dispatch operation to WPS-REST endpoint, which in turn should call back the real Celery Worker for execution. Returns the status response as is if XML, or convert it to JSON, according to request ``Accept`` header. """ req = wps_request.http_request pid = wps_request.identifier ctx = get_wps_output_context( req ) # re-validate here in case submitted via WPS endpoint instead of REST-API proc = get_process( process_id=pid, settings=self.settings) # raises if invalid or missing wps_process = self.processes.get(pid) # create the JSON payload from the XML content and submit job is_workflow = proc.type == ProcessType.WORKFLOW tags = req.args.get( "tags", "").split(",") + ["xml", f"wps-{wps_request.version}"] data = wps2json_job_payload(wps_request, wps_process) resp = submit_job_handler(data, self.settings, proc.processEndpointWPS1, process_id=pid, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx) # enforced JSON results with submitted data that includes 'response=document' # use 'json_body' to work with any 'response' implementation body = resp.json_body # if Accept was JSON, provide response content as is # if anything else (even */*), return as XML # NOTE: # It is very important to respect default XML since 'owslib.wps.WebProcessingService' does not provide any # way to provide explicitly Accept header. Even our Wps1Process as Workflow step depends on this behaviour. accept_type = get_header("Accept", req.headers) if accept_type == ContentType.APP_JSON: resp = get_job_submission_response(body, resp.headers) setattr( resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp return body
def get_job_submission_response(body, headers, error=False): # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated] """ Generates the successful response from contents returned by :term:`Job` submission process. If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status. Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously. .. seealso:: :func:`weaver.processes.execution.submit_job` :func:`weaver.processes.execution.submit_job_handler` """ # convert headers to pass as list to avoid any duplicate Content-related headers # otherwise auto-added by JSON handling when provided by dict-like structure if hasattr(headers, "items"): headers = list(headers.items()) get_header("Content-Type", headers, pop=True) headers.append(("Content-Type", ContentType.APP_JSON)) status = map_status(body.get("status")) if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: if error: http_class = HTTPBadRequest http_desc = sd.FailedSyncJobResponse.description else: http_class = HTTPOk http_desc = sd.CompletedJobResponse.description body = sd.CompletedJobStatusSchema().deserialize(body) body["description"] = http_desc return http_class(json=body, headerlist=headers) body["description"] = sd.CreatedLaunchJobResponse.description body = sd.CreatedJobStatusSchema().deserialize(body) return HTTPCreated(json=body, headerlist=headers)
def _get_capabilities_redirect(self, wps_request, *_, **__): # type: (WPSRequest, Any, Any) -> Optional[Union[WPSResponse, HTTPValid]] """ Redirects to WPS-REST endpoint if requested ``Content-Type`` is JSON. """ req = wps_request.http_request accept_type = get_header("Accept", req.headers) if accept_type == CONTENT_TYPE_APP_JSON: url = get_weaver_url(self.settings) resp = HTTPSeeOther(location="{}{}".format( url, sd.processes_uri)) # redirect setattr( resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp return None
def _parse_for_app_req(method, url, **req_kwargs): """ Obtain request details with adjustments to support specific handling for :class:`webTest.TestApp`. WebTest application employs ``params`` instead of ``data``/``json``. Actual query parameters must be pre-appended to ``url``. """ method = method.lower() url = req_kwargs.pop("base_url", url) body = req_kwargs.pop("data", None) _json = req_kwargs.pop("json", None) query = req_kwargs.pop("query", None) params = req_kwargs.pop("params", {}) if query: url += ("" if query.startswith("?") else "?") + query elif params: if isinstance(params, str): url += ("" if params.startswith("?") else "?") + params else: url = get_path_kvp(url, **params) req_kwargs["params"] = content = body or _json or {} # remove unsupported parameters that cannot be passed down to TestApp for key in [ "timeout", "cert", "auth", "ssl_verify", "verify", "language", "stream" ]: req_kwargs.pop(key, None) cookies = req_kwargs.pop("cookies", None) if cookies: cookies = dict(cookies) # in case list of tuples for name, value in cookies.items(): app.set_cookie(name, value) # although headers for JSON content can be set, some methods are not working (eg: PUT) # obtain the corresponding '<method>_json' function to have the proper behaviour headers = req_kwargs.get("headers", {}) or {} if ((get_header("Content-Type", headers) == CONTENT_TYPE_APP_JSON or isinstance(content, (dict, list))) and hasattr(app, method + "_json")): method = method + "_json" if isinstance(content, str): req_kwargs["params"] = json.loads(req_kwargs["params"]) req = getattr(app, method) return url, req, req_kwargs
def _submit_job(self, wps_request): # type: (WPSRequest) -> Union[WPSResponse, HTTPValid, JSON] """ Dispatch operation to WPS-REST endpoint, which in turn should call back the real Celery Worker for execution. """ req = wps_request.http_request pid = wps_request.identifier proc = get_process( process_id=pid, settings=self.settings) # raises if invalid or missing wps_process = self.processes.get(pid) # create the JSON payload from the XML content and submit job is_workflow = proc.type == PROCESS_WORKFLOW tags = req.args.get("tags", "").split(",") + [ "xml", "wps-{}".format(wps_request.version) ] data = wps2json_job_payload(wps_request, wps_process) body = submit_job_handler(data, self.settings, proc.processEndpointWPS1, process_id=pid, is_local=True, is_workflow=is_workflow, visibility=VISIBILITY_PUBLIC, language=wps_request.language, tags=tags, auth=dict(req.headers)) # if Accept was JSON, provide response content as is accept_type = get_header("Accept", req.headers) if accept_type == CONTENT_TYPE_APP_JSON: resp = get_job_submission_response(body) setattr( resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp return body
def submit_local_job(request): # type: (PyramidRequest) -> AnyViewResponse """ Execute a process registered locally. Execution location and method is according to deployed Application Package. """ process = get_process(request=request) ctype = clean_mime_type_format(get_header("content-type", request.headers, default=None), strip_parameters=True) if ctype in ContentType.ANY_XML: # Send the XML request to the WPS endpoint which knows how to parse it properly. # Execution will end up in the same 'submit_job_handler' function as other branch for JSON. service = get_pywps_service() wps_params = {"version": "1.0.0", "request": "Execute", "service": "WPS", "identifier": process.id} request.path_info = get_wps_path(request) request.query_string = get_path_kvp("", **wps_params)[1:] location = request.application_url + request.path_info + request.query_string LOGGER.warning("Route redirection [%s] -> [%s] for WPS-XML support.", request.url, location) http_request = extend_instance(request, WerkzeugRequest) http_request.shallow = False return service.call(http_request) return submit_job(request, process, tags=["wps-rest"])
def __init__(self, code=200, headers=None, **kwargs): # type: (int, Optional[HeadersType], **Any) -> None # drop any 'app_iter' content generator that would recalculate and reset the content_length kwargs.pop("body", None) kwargs.pop("json", None) kwargs.pop("text", None) self.code = code http_class = status_map[code] self.title = http_class.title self.explanation = http_class.explanation content_type = None if headers: # in order to automatically add charset when needed, 'content_type' creates a duplicate # remove the original preemptively to avoid errors in parsers receiving the response headers = deepcopy(headers) content_type = get_header("Content-Type", headers, pop=True) super(HTTPHeadFileResponse, self).__init__( content_type=content_type, # don't override content-type headerlist=None, # extend content-type with charset as applicable headers=headers, app_iter=[b""], # don't recalculate content-length **kwargs )
def _describe_process_redirect(self, wps_request, *_, **__): # type: (WPSRequest, Any, Any) -> Optional[Union[WPSResponse, HTTPValid]] """ Redirects to WPS-REST endpoint if requested ``Content-Type`` is JSON. """ req = wps_request.http_request accept_type = get_header("Accept", req.headers) if accept_type == CONTENT_TYPE_APP_JSON: url = get_weaver_url(self.settings) proc = wps_request.identifiers if not proc: raise HTTPBadRequest( sd.BadRequestGetProcessInfoResponse.description) if len(proc) > 1: raise HTTPBadRequest( "Unsupported multi-process ID for description. Only provide one." ) path = sd.process_uri.format(process_id=proc[0]) resp = HTTPSeeOther(location="{}{}".format(url, path)) # redirect setattr( resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp return None
def submit_job_handler( payload, # type: JSON settings, # type: SettingsType service_url, # type: str provider_id=None, # type: Optional[str] process_id=None, # type: str is_workflow=False, # type: bool is_local=True, # type: bool visibility=None, # type: Optional[AnyVisibility] language=None, # type: Optional[str] headers=None, # type: Optional[HeaderCookiesType] tags=None, # type: Optional[List[str]] user=None, # type: Optional[int] context=None, # type: Optional[str] ): # type: (...) -> AnyResponseType """ Submits the job to the Celery worker with provided parameters. Assumes that parameters have been pre-fetched and validated, except for the input payload. """ try: json_body = sd.Execute().deserialize(payload) except colander.Invalid as ex: raise HTTPBadRequest(f"Invalid schema: [{ex!s}]") db = get_db(settings) headers = headers or {} if is_local: proc_store = db.get_store(StoreProcesses) process = proc_store.fetch_by_id(process_id) job_ctl_opts = process.jobControlOptions else: job_ctl_opts = ExecuteControlOption.values() max_wait = as_int(settings.get("weaver.exec_sync_max_wait"), default=20) mode, wait, applied = parse_prefer_header_execute_mode( headers, job_ctl_opts, max_wait) get_header("prefer", headers, pop=True) if not applied: # whatever returned is a default, consider 'mode' in body as alternative is_execute_async = ExecuteMode.get( json_body.get("mode")) != ExecuteMode.SYNC # convert auto to async else: # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2 # Prefer header not resolve as valid still proces is_execute_async = mode != ExecuteMode.SYNC exec_resp = json_body.get("response") notification_email = json_body.get("notification_email") encrypted_email = encrypt_email(notification_email, settings) if notification_email else None store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process_id, service=provider_id, inputs=json_body.get("inputs"), outputs=json_body.get("outputs"), is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, context=context, execute_async=is_execute_async, execute_response=exec_resp, custom_tags=tags, notification_email=encrypted_email, accept_language=language) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) job = store.update_job(job) location_url = job.status_url(settings) resp_headers = {"Location": location_url} resp_headers.update(applied) wps_url = clean_ows_url(service_url) result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) # type: CeleryResult LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) if not is_execute_async: LOGGER.debug( "Celery task requested as sync if it completes before (wait=%ss)", wait) try: result.wait(timeout=wait) except CeleryTaskTimeoutError: pass if result.ready(): job = store.fetch_by_id(job.id) # when sync is successful, it must return the results direct instead of status info # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response if job.status == Status.SUCCEEDED: return get_job_results_response(job, settings, headers=resp_headers) # otherwise return the error status body = job.json(container=settings, self_link="status") body["location"] = location_url resp = get_job_submission_response(body, resp_headers, error=True) return resp else: LOGGER.debug( "Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) # sync not respected, therefore must drop it # since both could be provided as alternative preferences, drop only async with limited subset prefer = get_header("Preference-Applied", headers, pop=True) _, _, async_applied = parse_prefer_header_execute_mode( {"Prefer": prefer}, [ExecuteMode.ASYNC]) if async_applied: resp_headers.update(async_applied) LOGGER.debug("Celery task submitted to run async.") body = { "jobID": job.id, "processID": job.process, "providerID": provider_id, # dropped by validator if not applicable "status": map_status(Status.ACCEPTED), "location": location_url } resp = get_job_submission_response(body, resp_headers) return resp
def request_quote(request): # type: (PyramidRequest) -> AnyViewResponse """ Request a quotation for a process. """ settings = get_settings(request) weaver_config = get_weaver_configuration(settings) if weaver_config not in WeaverFeature.QUOTING: raise HTTPBadRequest(f"Unsupported quoting request for configuration '{weaver_config}'.") process_id = request.matchdict.get("process_id") process_store = get_db(request).get_store(StoreProcesses) try: process = process_store.fetch_by_id(process_id) # type: Process except ProcessNotFound: raise ProcessNotFound(json={ "title": "NoSuchProcess", "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-process", "detail": "Process with specified reference identifier does not exist.", "status": ProcessNotFound.code, "cause": str(process_id) }) if ( (process.type not in [ProcessType.APPLICATION, ProcessType.WORKFLOW]) or (process.type == ProcessType.WORKFLOW and weaver_config not in WeaverFeature.REMOTE) ): raise HTTPBadRequest(json={ "title": "UnsupportedOperation", "detail": f"Unsupported quoting process type '{process.type}' on '{weaver_config}' instance.", "status": HTTPBadRequest.code, "instance": process.href(settings) }) try: process_params = sd.QuoteProcessParametersSchema().deserialize(request.json) except colander.Invalid as exc: raise OWSMissingParameterValue(json={ "title": "MissingParameterValue", "cause": f"Invalid schema: [{exc.msg!s}]", "error": exc.__class__.__name__, "value": exc.value }) quote_store = get_db(request).get_store(StoreQuotes) quote_user = request.authenticated_userid quote_info = { "process": process_id, "processParameters": process_params, "user": quote_user } quote = Quote(**quote_info) quote = quote_store.save_quote(quote) max_wait = as_int(settings.get("weaver.quote_sync_max_wait"), default=20) mode, wait, applied = parse_prefer_header_execute_mode(request.headers, process.jobControlOptions, max_wait) result = process_quote_estimator.delay(quote.id) LOGGER.debug("Celery pending task [%s] for quote [%s].", result.id, quote.id) if mode == ExecuteMode.SYNC and wait: LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) try: result.wait(timeout=wait) except CeleryTaskTimeoutError: pass if result.ready(): quote = quote_store.fetch_by_id(quote.id) data = quote.json() data.update({"description": sd.CreatedQuoteResponse.description}) data.update({"links": quote.links(settings)}) data = sd.CreatedQuoteResponse().deserialize(data) return HTTPCreated(json=data) else: LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) # sync not respected, therefore must drop it # since both could be provided as alternative preferences, drop only async with limited subset prefer = get_header("Preference-Applied", applied, pop=True) _, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteMode.ASYNC]) applied = async_applied data = quote.partial() data.update({"description": sd.AcceptedQuoteResponse.description}) headers = {"Location": quote.href(settings)} headers.update(applied) return HTTPAccepted(headers=headers, json=data)
def test_frontpage_format(self): resp = self.testapp.get(sd.api_frontpage_service.path, headers=self.json_headers) assert resp.status_code == 200 body = resp.json try: sd.FrontpageSchema().deserialize(body) except colander.Invalid as ex: body = json.dumps(body, indent=2, ensure_ascii=False) self.fail( f"expected valid response format as defined in schema [{ex!s}] in\n{body}" ) refs = [link["rel"] for link in body["links"]] assert len(body["links"]) == len( set(refs)), "Link relationships must all be unique" for link in body["links"]: path = link["href"] rtype = link["type"] if rtype in ContentType.ANY_XML: rtype = ContentType.ANY_XML else: rtype = [rtype] rel = link["rel"] # request endpoint to validate it is accessible if "localhost" in path: resp = self.testapp.get( urlparse(path).path, expect_errors=True) # allow error for wps without queries else: resp = request_extra("GET", path, retries=3, retry_after=True, ssl_verify=False, allow_redirects=True) user_agent = get_header("user-agent", resp.request.headers) if resp.status_code == 403 and "python" in user_agent: # some sites will explicitly block bots, retry with mocked user-agent simulating human user access resp = request_extra("GET", path, headers={"User-Agent": "Mozilla"}, retries=3, retry_after=True, ssl_verify=False, allow_redirects=True) # validate contents and expected media-type code = resp.status_code test = f"({rel}) [{path}]" assert code in [ 200, 400 ], f"Reference link expected to be found, got [{code}] for {test}" # FIXME: patch broken content-type from reference websites # (see https://github.com/opengeospatial/NamingAuthority/issues/183) ctype_header_links = { "http://schemas.opengis.net/wps/": ContentType.APP_XML } ctype = resp.headers.get("Content-Type", "").split(";")[0].strip() if not ctype: for ref_link in ctype_header_links: if path.startswith(ref_link): ctype = ctype_header_links[ref_link] break assert ctype in rtype, f"Reference link content does not match [{ctype}]!=[{rtype}] for {test}"