def map_locations(job, settings): # type: (Job, SettingsType) -> None """ Maps directory locations between :mod:`pywps` process execution and produced jobs storage. Generates symlink references from the Job UUID to PyWPS UUID results (outputs directory, status and log locations). Update the Job's WPS ID if applicable (job executed locally). Assumes that all results are located under the same reference UUID. """ local_path = get_wps_local_status_location(job.status_location, settings) if not local_path: LOGGER.debug("Not possible to map Job to WPS locations.") return base_dir, status_xml = os.path.split(local_path) job.wps_id = os.path.splitext(status_xml)[0] wps_loc = os.path.join(base_dir, str(job.wps_id)) job_loc = os.path.join(base_dir, str(job.id)) if wps_loc == job_loc: LOGGER.debug("Job already refers to WPS locations.") return for loc_ext in ["", ".log", ".xml"]: wps_ref = wps_loc + loc_ext job_ref = job_loc + loc_ext if os.path.exists( wps_ref ): # possible that there are no results (e.g.: failed job) os.symlink(wps_ref, job_ref)
def __init__(self, url_location, settings): # type: (str, SettingsType) -> None self._url = url_location # location might not exist yet based on worker execution timing self._file = get_wps_local_status_location(url_location, settings, must_exist=False)
def execute(self, identifier, wps_request, uuid): # type: (str, Union[WPSRequest, WorkerRequest], str) -> Union[WPSResponse, HTTPValid] """ Handles the ``Execute`` KVP/XML request submitted on the WPS endpoint. Submit WPS request to corresponding WPS-REST endpoint and convert back for requested ``Accept`` content-type. Overrides the original execute operation, that will instead be handled by :meth:`execute_job` following callback from Celery Worker, which handles process job creation and monitoring. If ``Accept`` is JSON, the result is directly returned from :meth:`_submit_job`. If ``Accept`` is XML or undefined, :class:`WorkerExecuteResponse` converts the received JSON with XML template. """ result = self._submit_job(wps_request) if not isinstance(result, dict): return result # pre-built HTTP response with JSON contents when requested # otherwise, recreate the equivalent content with expected XML template format job_id = result["jobID"] wps_process = self.processes.get(wps_request.identifier) # because we are building the XML response (and JSON not explicitly requested) # caller is probably a WPS-1 client also expecting a status XML file # remap the status location accordingly from the current REST endpoint job_url = result["location"] if urlparse(job_url).path.endswith(f"/jobs/{job_id}"): # file status does not exist yet since client calling this method is waiting for it # pywps will generate it once the WorkerExecuteResponse is returned status_path = get_wps_local_status_location(job_url, self.settings, must_exist=False) wps_dir = get_wps_output_dir(self.settings) wps_url = get_wps_output_url(self.settings) job_url = status_path.replace(wps_dir, wps_url, 1) # when called by the WSGI app, 'WorkerExecuteResponse.__call__' will generate the XML from 'doc' property, # which itself is generated by template substitution of data from above 'json' property try: return WorkerExecuteResponse(wps_request, job_id, wps_process, job_url, settings=self.settings) except Exception as ex: # noqa LOGGER.exception( "Error building XML response by PyWPS Service during WPS Execute result from worker." ) message = f"Failed building XML response from WPS Execute result. Error [{ex!r}]" raise OWSNoApplicableCode(message, locator=job_id)
def execute_process(task, job_id, wps_url, headers=None): # type: (Task, UUID, str, Optional[HeadersType]) -> StatusType """ Celery task that executes the WPS process job monitoring as status updates (local and remote). """ from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") task_process = get_celery_process() rss_start = task_process.memory_info().rss registry = get_registry( None) # local thread, whether locally or dispatched celery settings = get_settings(registry) db = get_db( registry, reset_connection=True ) # reset the connection because we are in a forked celery process store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.started = now() job.status = Status.STARTED # will be mapped to 'RUNNING' job.status_message = f"Job {Status.STARTED}." # will preserve detail of STARTED vs RUNNING job.save_log(message=job.status_message) task_logger = get_task_logger(__name__) job.save_log(logger=task_logger, message="Job task setup initiated.") load_pywps_config(settings) job.progress = JobProgress.SETUP job.task_id = task.request.id job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) # Flag to keep track if job is running in background (remote-WPS, CWL app, etc.). # If terminate signal is sent to worker task via API dismiss request while still running in background, # the raised exception within the task will switch the job to Status.FAILED, but this will not raise an # exception here. Since the task execution 'succeeds' without raising, it skips directly to the last 'finally'. # Patch it back to Status.DISMISSED in this case. task_terminated = True try: job.progress = JobProgress.DESCRIBE job.save_log(logger=task_logger, message=f"Employed WPS URL: [{wps_url!s}]", level=logging.DEBUG) job.save_log( logger=task_logger, message=f"Execute WPS request for process [{job.process!s}]") wps_process = fetch_wps_process(job, wps_url, headers, settings) # prepare inputs job.progress = JobProgress.GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") wps_inputs = parse_wps_inputs(wps_process, job) # prepare outputs job.progress = JobProgress.GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in wps_process.processOutputs] # if process refers to a remote WPS provider, pass it down to avoid unnecessary re-fetch request if job.is_local: process = None # already got all the information needed pre-loaded in PyWPS service else: service = Service(name=job.service, url=wps_url) process = Process.from_ows(wps_process, service, settings) job.progress = JobProgress.EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job, wps_inputs=wps_inputs, wps_outputs=wps_outputs, remote_process=process, headers=headers) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JobProgress.EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log( logger=task_logger, level=logging.DEBUG, message=f"Updated job status location: [{wps_status_path}].") job.status = Status.RUNNING job.status_message = execution.statusMessage or f"{job!s} initiation done." job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JobProgress.EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: job.save_log(errors=execution.errors, logger=task_logger) job = store.update_job(job) raise Exception( f"Could not read status document after {max_retries} retries. Giving up." ) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. progress_min = JobProgress.EXECUTE_MONITOR_LOOP progress_max = JobProgress.EXECUTE_MONITOR_DONE job.progress = progress_min run_delay = wait_secs(run_step) execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=run_delay) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job_status_msg = job_msg or "n/a" job_percent = execution.percentCompleted job.status_message = f"Job execution monitoring (progress: {job_percent}%, status: {job_status_msg})." if execution.isComplete(): msg_progress = f" (status: {job_msg})" if job_msg else "" if execution.isSucceded(): wps_package.retrieve_package_job_log( execution, job, progress_min, progress_max) job.status = map_status(Status.SUCCEEDED) job.status_message = f"Job succeeded{msg_progress}." job.progress = progress_max job.save_log(logger=task_logger) job_results = [ ows2json_output_data(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") wps_package.retrieve_package_job_log( execution, job, progress_min, progress_max) job.status_message = f"Job failed{msg_progress}." job.progress = progress_max job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) job = store.update_job(job) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = f"Could not read status XML document for {job!s}. Trying again..." job.save_log(errors=execution.errors, logger=task_logger) job = store.update_job(job) sleep(1) else: num_retries = 0 run_step += 1 finally: task_terminated = False # reached only if WPS execution completed (worker not terminated beforehand) job = store.update_job(job) except Exception as exc: # if 'execute_job' finishes quickly before even reaching the 'monitoring loop' # consider WPS execution produced an error (therefore Celery worker not terminated) task_terminated = False LOGGER.exception("Failed running [%s]", job) LOGGER.debug("Failed job [%s] raised an exception.", job, exc_info=exc) # note: don't update the progress here to preserve last one that was set job.status = map_status(Status.FAILED) job.status_message = f"Failed to run {job!s}." errors = f"{fully_qualified_name(exc)}: {exc!s}" job.save_log(errors=errors, logger=task_logger) job = store.update_job(job) finally: # if task worker terminated, local 'job' is out of date compared to remote/background runner last update job = store.fetch_by_id(job.id) if task_terminated and map_status(job.status) == Status.FAILED: job.status = Status.DISMISSED task_success = map_status( job.status) not in JOB_STATUS_CATEGORIES[StatusCategory.FAILED] collect_statistics(task_process, settings, job, rss_start) if task_success: job.progress = JobProgress.EXECUTE_MONITOR_END job.status_message = f"Job {job.status}." job.save_log(logger=task_logger) if task_success: job.progress = JobProgress.NOTIFY send_job_complete_notification_email(job, task_logger, settings) if job.status not in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: job.status = Status.SUCCEEDED job.status_message = f"Job {job.status}." job.mark_finished() if task_success: job.progress = JobProgress.DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status
def execute_process(self, job_id, url, headers=None): from weaver.wps.service import get_pywps_service LOGGER.debug("Job execute process called.") settings = get_settings(app) task_logger = get_task_logger(__name__) load_pywps_config(settings) task_logger.debug("Job task setup.") # reset the connection because we are in a forked celery process db = get_db(app, reset_connection=True) store = db.get_store(StoreJobs) job = store.fetch_by_id(job_id) job.task_id = self.request.id job.progress = JOB_PROGRESS_SETUP job.save_log(logger=task_logger, message="Job task setup completed.") job = store.update_job(job) try: try: job.progress = JOB_PROGRESS_DESCRIBE job.save_log( logger=task_logger, message="Execute WPS request for process [{!s}]".format( job.process)) ssl_verify = get_ssl_verify_option("get", url, settings=settings) wps = WebProcessingService(url=url, headers=get_cookie_headers(headers), verify=ssl_verify) set_wps_language(wps, accept_language=job.accept_language) raise_on_xml_exception(wps._capabilities) # noqa except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS capabilities. Error: [{}].".format( str(ex))) try: process = wps.describeprocess(job.process) except Exception as ex: raise OWSNoApplicableCode( "Failed to retrieve WPS process description. Error: [{}].". format(str(ex))) # prepare inputs job.progress = JOB_PROGRESS_GET_INPUTS job.save_log(logger=task_logger, message="Fetching job input definitions.") complex_inputs = [] for process_input in process.dataInputs: if WPS_COMPLEX_DATA in process_input.dataType: complex_inputs.append(process_input.identifier) try: wps_inputs = list() for process_input in job.inputs: input_id = get_any_id(process_input) process_value = get_any_value(process_input) # in case of array inputs, must repeat (id,value) input_values = process_value if isinstance( process_value, list) else [process_value] # we need to support file:// scheme but PyWPS doesn't like them so remove the scheme file:// input_values = [ val[7:] if str(val).startswith("file://") else val for val in input_values ] # need to use ComplexDataInput structure for complex input # need to use literal String for anything else than complex # TODO: BoundingBox not supported wps_inputs.extend([ (input_id, ComplexDataInput(input_value) if input_id in complex_inputs else str(input_value)) for input_value in input_values ]) except KeyError: wps_inputs = [] # prepare outputs job.progress = JOB_PROGRESS_GET_OUTPUTS job.save_log(logger=task_logger, message="Fetching job output definitions.") wps_outputs = [(o.identifier, o.dataType == WPS_COMPLEX_DATA) for o in process.processOutputs] mode = EXECUTE_MODE_ASYNC if job.execute_async else EXECUTE_MODE_SYNC job.progress = JOB_PROGRESS_EXECUTE_REQUEST job.save_log(logger=task_logger, message="Starting job process execution.") job.save_log( logger=task_logger, message= "Following updates could take a while until the Application Package answers..." ) wps_worker = get_pywps_service(environ=settings, is_worker=True) execution = wps_worker.execute_job(job.process, wps_inputs=wps_inputs, wps_outputs=wps_outputs, mode=mode, job_uuid=job.id) if not execution.process and execution.errors: raise execution.errors[0] # adjust status location wps_status_path = get_wps_local_status_location( execution.statusLocation, settings) job.progress = JOB_PROGRESS_EXECUTE_STATUS_LOCATION LOGGER.debug("WPS status location that will be queried: [%s]", wps_status_path) if not wps_status_path.startswith("http") and not os.path.isfile( wps_status_path): LOGGER.warning( "WPS status location not resolved to local path: [%s]", wps_status_path) job.save_log(logger=task_logger, level=logging.DEBUG, message="Updated job status location: [{}].".format( wps_status_path)) job.status = map_status(STATUS_STARTED) job.status_message = execution.statusMessage or "{} initiation done.".format( str(job)) job.status_location = wps_status_path job.request = execution.request job.response = execution.response job.progress = JOB_PROGRESS_EXECUTE_MONITOR_START job.save_log(logger=task_logger, message="Starting monitoring of job execution.") job = store.update_job(job) max_retries = 5 num_retries = 0 run_step = 0 while execution.isNotComplete() or run_step == 0: if num_retries >= max_retries: raise Exception( "Could not read status document after {} retries. Giving up." .format(max_retries)) try: # NOTE: # Don't actually log anything here until process is completed (success or fail) so that underlying # WPS execution logs can be inserted within the current job log and appear continuously. # Only update internal job fields in case they get referenced elsewhere. job.progress = JOB_PROGRESS_EXECUTE_MONITOR_LOOP execution = check_wps_status(location=wps_status_path, settings=settings, sleep_secs=wait_secs(run_step)) job_msg = (execution.statusMessage or "").strip() job.response = execution.response job.status = map_status(execution.getStatus()) job.status_message = "Job execution monitoring (progress: {}%, status: {})."\ .format(execution.percentCompleted, job_msg or "n/a") # job.save_log(logger=task_logger) # job = store.update_job(job) if execution.isComplete(): job.mark_finished() job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END msg_progress = " (status: {})".format( job_msg) if job_msg else "" if execution.isSucceded(): job.status = map_status(STATUS_SUCCEEDED) job.status_message = "Job succeeded{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(logger=task_logger) job_results = [ ows2json_output(output, process, settings) for output in execution.processOutputs ] job.results = make_results_relative( job_results, settings) else: task_logger.debug("Job failed.") job.status_message = "Job failed{}.".format( msg_progress) wps_package.retrieve_package_job_log(execution, job) job.save_log(errors=execution.errors, logger=task_logger) task_logger.debug( "Mapping Job references with generated WPS locations.") map_locations(job, settings) except Exception as exc: num_retries += 1 task_logger.debug("Exception raised: %s", repr(exc)) job.status_message = "Could not read status XML document for {!s}. Trying again...".format( job) job.save_log(errors=execution.errors, logger=task_logger) sleep(1) else: # job.status_message = "Update {}...".format(str(job)) # job.save_log(logger=task_logger) num_retries = 0 run_step += 1 finally: job = store.update_job(job) except Exception as exc: LOGGER.exception("Failed running [%s]", job) job.status = map_status(STATUS_FAILED) job.status_message = "Failed to run {!s}.".format(job) job.progress = JOB_PROGRESS_EXECUTE_MONITOR_ERROR exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) errors = "{0}: {1!s}".format(exception_class, exc) job.save_log(errors=errors, logger=task_logger) finally: job.progress = JOB_PROGRESS_EXECUTE_MONITOR_END job.status_message = "Job {}.".format(job.status) job.save_log(logger=task_logger) # Send email if requested if job.notification_email is not None: job.progress = JOB_PROGRESS_NOTIFY try: notify_job_complete(job, job.notification_email, settings) message = "Notification email sent successfully." job.save_log(logger=task_logger, message=message) except Exception as exc: exception_class = "{}.{}".format( type(exc).__module__, type(exc).__name__) exception = "{0}: {1!s}".format(exception_class, exc) message = "Couldn't send notification email ({})".format( exception) job.save_log(errors=message, logger=task_logger, message=message) job.progress = JOB_PROGRESS_DONE job.save_log(logger=task_logger, message="Job task complete.") job = store.update_job(job) return job.status